Java Code Examples for org.apache.spark.sql.types.DataTypes#createStructField()
The following examples show how to use
org.apache.spark.sql.types.DataTypes#createStructField() .
Example 1
Source File: From envelope with Apache License 2.0 | 6 votes |
private StructType addFieldNameUnderscores(StructType without) { List<StructField> withFields = Lists.newArrayList(); for (StructField withoutField : without.fields()) { String withName = "_" +; if (Arrays.asList(without.fieldNames()).contains(withName)) { throw new RuntimeException("Can not append raw field '" + withName + "' because that " + "field already exists as a result of the translation"); } StructField withField = DataTypes.createStructField( withName, withoutField.dataType(), withoutField.nullable(), withoutField.metadata()); withFields.add(withField); } return DataTypes.createStructType(withFields); }
Example 2
Source File: From envelope with Apache License 2.0 | 6 votes |
@Test public void testAppendWithSchema() { StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true); StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true); StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true); StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3)); Row row = new RowWithSchema(schema, "hello", 1, 2.0); Row appendRow = RowUtils.append(row, "field4", DataTypes.BooleanType, false, true); appendRow = RowUtils.append(appendRow, "field5", DataTypes.StringType, false, "world"); assertEquals(appendRow.length(), 5); assertEquals(appendRow.getAs("field1"), "hello"); assertEquals(appendRow.getAs("field2"), 1); assertEquals(appendRow.getAs("field3"), 2.0); assertEquals(appendRow.getAs("field4"), true); assertEquals(appendRow.getAs("field5"), "world"); }
Example 3
Source File: From envelope with Apache License 2.0 | 6 votes |
@Before public void before() { field = DataTypes.createStructField("time", DataTypes.TimestampType, true); schema = DataTypes.createStructType(Lists.newArrayList(field)); tm = new TimestampTimeModel(); tm.configure(ConfigFactory.empty()); tm.configureFieldNames(Lists.newArrayList(; Timestamp firstTs = new Timestamp(1000L); firstTs.setNanos(1000); Timestamp secondTs = new Timestamp(2000L); secondTs.setNanos(100); Timestamp thirdTs = new Timestamp(2000L); thirdTs.setNanos(101); first = new RowWithSchema(schema, firstTs); second = new RowWithSchema(schema, secondTs); third = new RowWithSchema(schema, thirdTs); }
Example 4
Source File: From envelope with Apache License 2.0 | 6 votes |
@Test public void testAppendRow() { StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true); StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true); StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true); StructType baseSchema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3)); Row base = new RowWithSchema(baseSchema, "hello", 1, 1.0); StructField field4 = DataTypes.createStructField("field4", DataTypes.StringType, true); StructField field5 = DataTypes.createStructField("field5", DataTypes.IntegerType, true); StructField field6 = DataTypes.createStructField("field6", DataTypes.FloatType, true); StructType appendSchema = DataTypes.createStructType(Lists.newArrayList(field4, field5, field6)); Row append = new RowWithSchema(appendSchema, "world", -1, -1.0); Row appended = RowUtils.append(base, append); Row expected = new RowWithSchema( DataTypes.createStructType(Lists.newArrayList(field1, field2, field3, field4, field5, field6)), "hello", 1, 1.0, "world", -1, -1.0); assertEquals(expected, appended); }
Example 5
Source File: From sparkResearch with Apache License 2.0 | 5 votes |
public static void main(String[] args) {
SparkSession sparkSession = SparkSession.builder()
.appName("spark app")
JavaRDD<String> javaRDD = sparkSession.sparkContext().textFile("URL", 1).toJavaRDD();
String schema = "name age";
List<StructField> structFieldList = new ArrayList<>();
for (String fieldName : schema.split(" ")) {
StructField structField = DataTypes.createStructField(fieldName, DataTypes.StringType, true);
StructType structType = DataTypes.createStructType(structFieldList);
JavaRDD<Row> rowJavaRDD = Function<String, Row>() {
public Row call(String v1) {
String[] attirbutes = v1.split(",");
return RowFactory.create(attirbutes[0], attirbutes[1].trim());
Dataset<Row> dataset = sparkSession.createDataFrame(rowJavaRDD, structType);
Dataset<Row> result = sparkSession.sql("select * from user");;
Example 6
Source File: From envelope with Apache License 2.0 | 5 votes |
@Test public void testSubsetSchemaSomeFields() { StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true); StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true); StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true); StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3)); StructType subset = SchemaUtils.subsetSchema(schema, Lists.newArrayList("field1", "field3")); assertEquals(subset.fields().length, 2); assertEquals(subset.fields()[0].name(), "field1"); assertEquals(subset.fields()[1].name(), "field3"); }
Example 7
Source File: From mmtf-spark with Apache License 2.0 | 5 votes |
/** * Converts a JavaRDD<Row> to a Dataset<Row>. This method only * supports simple data types and all data need to be not null. * * @param data JavaRDD of Row objects * @param colNames names of the columns in a row * @return */ public static Dataset<Row> getDataset(JavaRDD<Row> data, String...colNames) { // create the schema for the dataset Row row = data.first(); int length = row.length(); if (length != colNames.length) { throw new IllegalArgumentException("colNames length does not match row length"); } StructField[] sf = new StructField[length]; for (int i = 0; i < row.size(); i++) { Object o = row.get(i); // TODO add more types if (o instanceof String) { sf[i] = DataTypes.createStructField(colNames[i], DataTypes.StringType, false); } else if (o instanceof Integer) { sf[i] = DataTypes.createStructField(colNames[i], DataTypes.IntegerType, false); } else if (o instanceof Long) { sf[i] = DataTypes.createStructField(colNames[i], DataTypes.LongType, false); } else if (o instanceof Float) { sf[i] = DataTypes.createStructField(colNames[i], DataTypes.FloatType, false); } else if (o instanceof Double) { sf[i] = DataTypes.createStructField(colNames[i], DataTypes.DoubleType, false); } else if (o instanceof Boolean) { sf[i] = DataTypes.createStructField(colNames[i], DataTypes.BooleanType, false); } else { System.out.println("Data type not implemented yet"); } } StructType schema = new StructType(sf); // convert JavaRDD to Dataset SparkSession spark = SparkSession.builder().getOrCreate(); return spark.createDataFrame(data, schema); }
Example 8
Source File: From envelope with Apache License 2.0 | 5 votes |
@Test public void testSubsetRowNoFields() { StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true); StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true); StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true); StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3)); StructType subsetSchema = DataTypes.createStructType(Lists.<StructField>newArrayList()); Row row = new RowWithSchema(schema, "hello", 1, 2.0); Row subsetRow = RowUtils.subsetRow(row, subsetSchema); assertEquals(subsetRow.length(), 0); }
Example 9
Source File: From envelope with Apache License 2.0 | 5 votes |
@Test public void testSubtractSchemaNoFields() { StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true); StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true); StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true); StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3)); StructType subset = SchemaUtils.subtractSchema(schema, Lists.<String>newArrayList()); assertEquals(subset.fields().length, 3); assertEquals(subset.fields()[0].name(), "field1"); assertEquals(subset.fields()[1].name(), "field2"); assertEquals(subset.fields()[2].name(), "field3"); }
Example 10
Source File: From mmtf-spark with Apache License 2.0 | 5 votes |
/** * Returns a schema to create Spark Datasets. This schema must match the * order in which the data are return by the {@code getAsObject()} method. * * @param index * an integer index to label an interaction center * @return schema to represent an interaction center in a Spark Dataset. */ public static StructField[] getStructFields(int index) { boolean nullable = true; return new StructField[] { DataTypes.createStructField("atom" + index, DataTypes.StringType, nullable), DataTypes.createStructField("element" + index, DataTypes.StringType, nullable), DataTypes.createStructField("group" + index, DataTypes.StringType, nullable), DataTypes.createStructField("groupNum" + index, DataTypes.StringType, nullable), DataTypes.createStructField("type" + index, DataTypes.StringType, nullable), DataTypes.createStructField("chain" + index, DataTypes.StringType, nullable), DataTypes.createStructField("nbFactor" + index, DataTypes.FloatType, nullable)}; }
Example 11
Source File: From envelope with Apache License 2.0 | 5 votes |
@Test public void testSubsetRowAllFields() { StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true); StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true); StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true); StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3)); Row row = new RowWithSchema(schema, "hello", 1, 2.0); Row subsetRow = RowUtils.subsetRow(row, schema); assertEquals(row, subsetRow); }
Example 12
Source File: From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
private StructType createSchema() { List<StructField> fields = new ArrayList<>(); StructField field = DataTypes.createStructField("conglomerateId", DataTypes.LongType, true); fields.add(field); field = DataTypes.createStructField("key", DataTypes.StringType, true); fields.add(field); field = DataTypes.createStructField("value", DataTypes.BinaryType, true); fields.add(field); StructType schema = DataTypes.createStructType(fields); return schema; }
Example 13
Source File: From jpmml-evaluator-spark with GNU Affero General Public License v3.0 | 5 votes |
public StructField init(Evaluator evaluator){
TargetField field = getField();
DataType dataType = field.getDataType();
return DataTypes.createStructField(getColumnName(), SchemaUtil.translateDataType(dataType), false);
Example 14
Source File: From envelope with Apache License 2.0 | 4 votes |
public void configureFieldNames(List<String> fieldNames) {
this.field = DataTypes.createStructField(fieldNames.get(0), DataTypes.StringType, true);
Example 15
Source File: From envelope with Apache License 2.0 | 4 votes |
public void configureFieldNames(List<String> fieldNames) {
this.field = DataTypes.createStructField(fieldNames.get(0), DataTypes.LongType, true);
Example 16
Source File: From envelope with Apache License 2.0 | 4 votes |
public void configureFieldNames(List<String> fieldNames) {
this.field = DataTypes.createStructField(fieldNames.get(0), DataTypes.StringType, true);
Example 17
Source File: From spliceengine with GNU Affero General Public License v3.0 | 4 votes |
public StructField getStructField(String columnName) {
return DataTypes.createStructField(columnName, DataTypes.BooleanType, true);
Example 18
Source File: From spliceengine with GNU Affero General Public License v3.0 | 4 votes |
public StructField getStructField(String columnName) {
return DataTypes.createStructField(columnName, DataTypes.ByteType, true);
Example 19
Source File: From spliceengine with GNU Affero General Public License v3.0 | 4 votes |
public StructField getStructField(String columnName) {
return DataTypes.createStructField(columnName, DataTypes.TimestampType, true);
Example 20
Source File: From spliceengine with GNU Affero General Public License v3.0 | 4 votes |
public StructField getStructField(String columnName) {
return DataTypes.createStructField(columnName, DataTypes.IntegerType, true);