org.apache.pig.impl.util.Utils#getSchemaFromString

Source File: TestProjectStarRangeInUdf.java From spork with Apache License 2.0

6 votes

@Test
public void testProjMixExpand1NoSchema() throws IOException {

    String query;

    query =
        "  l1 = load '" + INP_FILE_5FIELDS + "';"
        + "f = foreach l1 generate TOBAG(*, $0 .. $2) as tt;"
        ; 
    Schema sch = Utils.getSchemaFromString("tt : {(NullALias)}");
    sch.getField(0).schema.getField(0).schema.getField(0).alias = null;
    sch.getField(0).schema.getField(0).schema.getField(0).type = DataType.NULL;
    
    compileAndCompareSchema(sch, query, "f");
    Iterator<Tuple> it = pigServer.openIterator("f");

    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStringAsByteArray(
                new String[] {
                        "({('10'),('20'),('30'),('40'),('50'),('10'),('20'),('30')})",
                        "({('11'),('21'),('31'),('41'),('51'),('11'),('21'),('31')})",
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}

Source File: FixedWidthLoader.java From spork with Apache License 2.0

6 votes

@Override
public void prepareToRead(RecordReader reader, PigSplit split) throws IOException {
    // Save reader to use in getNext()
    this.reader = reader;

    splitIndex = split.getSplitIndex();

    // Get schema from front-end
    UDFContext udfc = UDFContext.getUDFContext();
    Properties p = udfc.getUDFProperties(this.getClass(), new String[] { udfContextSignature });

    String strSchema = p.getProperty(SCHEMA_SIGNATURE);
    if (strSchema == null) {
        throw new IOException("Could not find schema in UDF context");
    }
    schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));

    requiredFields = (boolean[]) ObjectSerializer.deserialize(p.getProperty(REQUIRED_FIELDS_SIGNATURE));
    if (requiredFields != null) {
        numRequiredFields = 0;
        for (int i = 0; i < requiredFields.length; i++) {
            if (requiredFields[i])
                numRequiredFields++;
        }
    }
}

Source File: TestProjectStarExpander.java From spork with Apache License 2.0

6 votes

/**
 * Test projecting multiple *
 * @throws IOException
 * @throws ParseException
 */
@Test
public void testProjectStarMulti() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int);"
        + "f = foreach l1 generate * as (aa, bb, cc), *;"
    ; 

    Util.registerMultiLineQuery(pig, query);
   
    Schema expectedSch = Utils.getSchemaFromString(
            "aa : int, bb : int, cc : int, a : int, b : int, c : int");
    Schema sch = pig.dumpSchema("f");
    assertEquals("Checking expected schema", expectedSch, sch);
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(10,20,30,10,20,30)",
                        "(11,21,31,11,21,31)",
                });
    Iterator<Tuple> it = pig.openIterator("f");
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}

Source File: TestPigServer.java From spork with Apache License 2.0

6 votes

private void registerScalarScript(boolean useScalar, String expectedSchemaStr) throws IOException {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("A = load 'adata' AS (a: int, b: int);");
    //scalar
    pig.registerQuery("C = FOREACH A GENERATE *;");
    String overrideScalar = useScalar ? "C = FILTER A BY b % 2 == 0; " : "";
    pig.registerQuery("B = FOREACH (GROUP A BY a) { " +
            overrideScalar +
            "D = FILTER A BY b % 2 == 1;" +
            "GENERATE group AS a, A.b AS every, C.b AS even, D.b AS odd;" +
            "};");
    Schema dumpedSchema = pig.dumpSchema("B");
    Schema expectedSchema = Utils.getSchemaFromString(
            expectedSchemaStr);
    assertEquals(expectedSchema, dumpedSchema);
}

Source File: TestSchema.java From spork with Apache License 2.0

5 votes

@Test
// See PIG-730
public void testMergeSchemaWithTwoLevelAccess() throws Exception {
    // Generate two schemas
    Schema s1 = Utils.getSchemaFromString("a:{t:(a0:int, a1:int)}");
    Schema s2 = Utils.getSchemaFromString("b:{t:(b0:int, b1:int)}");
    s1.getField(0).schema.setTwoLevelAccessRequired(true);
    s1.getField(0).schema.setTwoLevelAccessRequired(false);
    Schema s3 = Schema.mergeSchema(s1, s2, true);
    assertEquals(s3, s2);
}

Source File: TestTextDataParser.java From spork with Apache License 2.0

5 votes

@Test
public void testMapFloatValueType() throws Exception{
    String myMap = "[key1#0.1f]";
    Schema schema = Utils.getSchemaFromString("m:map[float]");
    ResourceFieldSchema rfs = new ResourceSchema(schema).getFields()[0];
    Map<String, Object> map = ps.getLoadCaster().bytesToMap(myMap.getBytes(), rfs);
    String key = map.keySet().iterator().next();
    Object v = map.get("key1");
    assertEquals("key1", key);
    assertTrue(v instanceof Float);
    String value = String.valueOf(v);
    assertEquals("0.1", value);
}

Source File: TestPigSchemaConverter.java From parquet-mr with Apache License 2.0

5 votes

private void testConversion(String pigSchemaString, String schemaString) throws Exception {
  Schema pigSchema = Utils.getSchemaFromString(pigSchemaString);
  MessageType schema = pigSchemaConverter.convert(pigSchema);
  MessageType expectedMT = MessageTypeParser.parseMessageType(schemaString);
  assertEquals("converting "+pigSchemaString+" to "+schemaString, expectedMT, schema);

  MessageType filtered = pigSchemaConverter.filter(schema, pigSchema, null);
  assertEquals("converting "+pigSchemaString+" to "+schemaString+" and filtering", schema.toString(), filtered.toString());
}

Source File: TestPigServer.java From spork with Apache License 2.0

5 votes

@Test
public void testDescribeLimit() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    pig.registerQuery("b = limit a 10;") ;
    Schema dumpedSchema = pig.dumpSchema("b") ;
    Schema expectedSchema = Utils.getSchemaFromString("field1: int,field2: float,field3: chararray");
    assertEquals(expectedSchema, dumpedSchema);
}

Source File: TestPigServer.java From spork with Apache License 2.0

5 votes

@Test
public void testDescribeFilter() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    pig.registerQuery("b = filter a by field1 > 10;") ;
    Schema dumpedSchema = pig.dumpSchema("b") ;
    Schema expectedSchema = Utils.getSchemaFromString("field1: int,field2: float,field3: chararray");
    assertEquals(expectedSchema, dumpedSchema);
}

Source File: PigSerializationEventConverterTest.java From elasticsearch-hadoop with Apache License 2.0

5 votes

private ResourceSchema createSchema(String schema) {
    try {
        return new ResourceSchema(Utils.getSchemaFromString(schema));
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
}

Source File: DBStorage.java From spork with Apache License 2.0

5 votes

/**
 * Initialise the database connection and prepared statement here.
 */
@SuppressWarnings("unchecked")
@Override
public void prepareToWrite(RecordWriter writer)
    throws IOException {
  ps = null;
  con = null;
  if (insertQuery == null) {
    throw new IOException("SQL Insert command not specified");
  }
  try {
    if (user == null || pass == null) {
      con = DriverManager.getConnection(jdbcURL);
    } else {
      con = DriverManager.getConnection(jdbcURL, user, pass);
    }
    con.setAutoCommit(false);
    ps = con.prepareStatement(insertQuery);
  } catch (SQLException e) {
    log.error("Unable to connect to JDBC @" + jdbcURL);
    throw new IOException("JDBC Error", e);
  }
  count = 0;

  // Try to get the schema from the UDFContext object.
  UDFContext udfc = UDFContext.getUDFContext();
  Properties p =
      udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature});
  String strSchema = p.getProperty(SCHEMA_SIGNATURE);
  if (strSchema != null) {
      // Parse the schema from the string stored in the properties object.
      schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));
  }
}

Source File: TestPigServer.java From spork with Apache License 2.0

5 votes

@Test
public void testDescribeCross() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    pig.registerQuery("b = load 'b' as (field4, field5: double, field6: chararray );") ;
    pig.registerQuery("c = cross a, b;") ;
    Schema dumpedSchema = pig.dumpSchema("c") ;
    Schema expectedSchema = Utils.getSchemaFromString("a::field1: int,a::field2: float,a::field3: chararray,b::field4: bytearray,b::field5: double,b::field6: chararray");
    assertEquals(expectedSchema, dumpedSchema);
}

Source File: TestPigServer.java From spork with Apache License 2.0

5 votes

@Test
public void testDescribeCogroup() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    pig.registerQuery("b = load 'b' as (field4, field5: double, field6: chararray );") ;
    pig.registerQuery("c = cogroup a by field1, b by field4;") ;
    Schema dumpedSchema = pig.dumpSchema("c") ;
    Schema expectedSchema = Utils.getSchemaFromString("group:int,a:{(field1:int,field2:float,field3:chararray)},b:{(field4:bytearray,field5:double,field6:chararray)}");
    assertEquals(expectedSchema, dumpedSchema);
}

Source File: TestUnionOnSchema.java From spork with Apache License 2.0

5 votes

/**
 * Test UNION ONSCHEMA where a common column has additional 'namespace' part
 *  in the column name in both the inputs
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaScopedColumnNameBothInp2() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "   l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); " 
        + " l2 = load '" + INP_FILE_2NUMS + "' as (i : int, x : chararray); " 
        + " cg1 = cogroup l1 by i, l2 by i; "
        + " f1 = foreach cg1 generate group as gkey, flatten(l1), flatten(l2); "
        + " cg2 = cogroup l2 by i, l1 by i; "
        + " f2 = foreach cg1 generate group as gkey, flatten(l2), flatten(l1); "
        + "u = union onschema f1, f2; " ; 
    Util.registerMultiLineQuery(pig, query);
            
    Schema sch = pig.dumpSchema("u");
    Schema expectedSch = 
        Utils.getSchemaFromString("gkey: int, l1::i: int, l1::j: int, l2::i: int, l2::x: chararray");
    assertEquals("Checking expected schema",sch, expectedSch);

    Iterator<Tuple> it = pig.openIterator("u");
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,1,2,1,'2')",
                        "(5,5,3,5,'3')",
                        "(1,1,2,1,'2')",
                        "(5,5,3,5,'3')",
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);
    
}

Source File: TestPigServer.java From spork with Apache License 2.0

5 votes

@Test
public void testDescribeSort() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    pig.registerQuery("b = order a by * desc;") ;
    Schema dumpedSchema = pig.dumpSchema("b") ;
    Schema expectedSchema = Utils.getSchemaFromString("field1: int,field2: float,field3: chararray");
    assertEquals(expectedSchema, dumpedSchema);
}

Source File: TestTupleRecordConsumer.java From parquet-mr with Apache License 2.0

4 votes

private MessageType getMessageType(String pigSchemaString) throws ParserException {
  Schema pigSchema = Utils.getSchemaFromString(pigSchemaString);
  return new PigSchemaConverter().convert(pigSchema);
}

Source File: GroovyEvalFunc.java From spork with Apache License 2.0

4 votes

public GroovyEvalFunc(String path, String namespace, String methodName, Object target) throws IOException {
  String fqmn = "".equals(namespace) ? methodName : namespace + ScriptEngine.NAMESPACE_SEPARATOR + methodName;

  Class c = scriptClasses.get(path);

  if (null == c) {
    try {
      c = GroovyScriptEngine.getEngine().loadScriptByName(new File(path).toURI().toString());
    } catch (ScriptException se) {
      throw new IOException(se);
    } catch (ResourceException re) {
      throw new IOException(re);
    }
  }

  scriptClasses.put(path, c);

  Method[] methods = c.getMethods();

  int matches = 0;

  for (Method m : methods) {
    if (m.getName().equals(methodName)) {
      this.method = m;
      matches++;
    }
  }

  if (null == this.method) {
    throw new IOException("Method " + methodName + " was not found in '" + path + "'");
  }

  if (matches > 1) {
    throw new IOException("There are " + matches + " methods with name '" + methodName + "', please make sure method names are unique within the Groovy class.");
  }

  //
  // Extract schema
  //

  Annotation[] annotations = this.method.getAnnotations();

  for (Annotation annotation : annotations) {
    if (annotation.annotationType().equals(OutputSchemaFunction.class)) {
      this.schemaFunction = new GroovyEvalFuncObject(path, namespace, ((OutputSchemaFunction) annotation).value());
      break;
    } else if (annotation.annotationType().equals(OutputSchema.class)) {
      this.schema = Utils.getSchemaFromString(((OutputSchema) annotation).value());
      break;
    }
  }

  //
  // For static method, invocation target is null, for non
  // static method, create/set invocation target unless passed
  // to the constructor
  //

  if (!Modifier.isStatic(this.method.getModifiers())) {
    if (null != target) {
      this.invocationTarget = target;
    } else {
      try {
        this.invocationTarget = c.newInstance();
      } catch (InstantiationException ie) {
        throw new IOException(ie);
      } catch (IllegalAccessException iae) {
        throw new IOException(iae);
      }
    }
  }
}

Source File: PigSchemaSaveTest.java From elasticsearch-hadoop with Apache License 2.0

4 votes

@Test
public void testSchemaSerializationPlusBase64() throws Exception {
    Schema schemaFromString = Utils.getSchemaFromString("name:bytearray,links:{(missing:chararray)}");
    Schema schemaSaved = IOUtils.deserializeFromBase64(IOUtils.serializeToBase64(schemaFromString));
    assertEquals(schemaFromString.toString(), schemaSaved.toString());
}

Source File: TestPlanGeneration.java From spork with Apache License 2.0

4 votes

public SchemaLoader(String schemaString) throws ParserException {
    schema = Utils.getSchemaFromString(schemaString);
}

Source File: EvalFunc.java From spork with Apache License 2.0

3 votes

/**
 * Report the schema of the output of this UDF.  Pig will make use of
 * this in error checking, optimization, and planning.  The schema
 * of input data to this UDF is provided.
 * <p>
 * The default implementation interprets the {@link OutputSchema} annotation,
 * if one is present. Otherwise, it returns <code>null</code> (no known output schema).
 *
 * @param input Schema of the input
 * @return Schema of the output
 */
public Schema outputSchema(Schema input) {
    OutputSchema schema = this.getClass().getAnnotation(OutputSchema.class);
    try {
        return (schema == null) ? null : Utils.getSchemaFromString(schema.value());
    } catch (ParserException e) {
        throw new RuntimeException(e);
    }
}

Java Code Examples for org.apache.pig.impl.util.Utils#getSchemaFromString()