Java Code Examples for org.apache.spark.sql.types.StructType#fields()
The following examples show how to use
org.apache.spark.sql.types.StructType#fields() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MLContextUtil.java From systemds with Apache License 2.0 | 7 votes |
/** * Examine the DataFrame schema to determine whether the data appears to be * a matrix. * * @param df * the DataFrame * @return {@code true} if the DataFrame appears to be a matrix, * {@code false} otherwise */ public static boolean doesDataFrameLookLikeMatrix(Dataset<Row> df) { StructType schema = df.schema(); StructField[] fields = schema.fields(); if (fields == null) { return true; } for (StructField field : fields) { DataType dataType = field.dataType(); if ((dataType != DataTypes.DoubleType) && (dataType != DataTypes.IntegerType) && (dataType != DataTypes.LongType) && (!(dataType instanceof org.apache.spark.ml.linalg.VectorUDT)) && (!(dataType instanceof org.apache.spark.mllib.linalg.VectorUDT))) { // uncomment if we support arrays of doubles for matrices // if (dataType instanceof ArrayType) { // ArrayType arrayType = (ArrayType) dataType; // if (arrayType.elementType() == DataTypes.DoubleType) { // continue; // } // } return false; } } return true; }
Example 2
Source File: UnaryTransformer.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
@Override public StructType transformSchema(StructType structType) { String inputCol = getInputCol(); String outputCol = getOutputCol(); DataType inputType = structType.apply(inputCol).dataType(); this.validateInputType(inputType); List<String> names = Arrays.asList(structType.fieldNames()); Cond.require(!names.contains(outputCol), "The output column " + outputCol + " already exists in this schema!"); List<StructField> fields = new ArrayList<>(); for (int i = 0; i < structType.fields().length; i++) { fields.add(structType.fields()[i]); } DataType dt = getOutputDataType(); fields.add(DataTypes.createStructField(outputCol, dt, isOutputDataTypeNullable())); return DataTypes.createStructType(fields); }
Example 3
Source File: DBClientWrapper.java From spark-data-sources with MIT License | 6 votes |
public static edb.common.Row sparkToDBRow(org.apache.spark.sql.Row row, StructType type) { edb.common.Row dbRow = new edb.common.Row(); StructField[] fields = type.fields(); for (int i = 0; i < type.size(); i++) { StructField sf = fields[i]; if (sf.dataType() == DataTypes.StringType) { dbRow.addField(new edb.common.Row.StringField(sf.name(), row.getString(i))); } else if (sf.dataType() == DataTypes.DoubleType) { dbRow.addField(new edb.common.Row.DoubleField(sf.name(), row.getDouble(i))); } else if (sf.dataType() == DataTypes.LongType) { dbRow.addField(new edb.common.Row.Int64Field(sf.name(), row.getLong(i))); } else { // TODO: type leakage } } return dbRow; }
Example 4
Source File: Reader.java From iceberg with Apache License 2.0 | 6 votes |
PartitionRowConverter(Schema partitionSchema, PartitionSpec spec) { StructType partitionType = SparkSchemaUtil.convert(partitionSchema); StructField[] fields = partitionType.fields(); this.types = new DataType[fields.length]; this.positions = new int[types.length]; this.javaTypes = new Class<?>[types.length]; this.reusedRow = new GenericInternalRow(types.length); List<PartitionField> partitionFields = spec.fields(); for (int rowIndex = 0; rowIndex < fields.length; rowIndex += 1) { this.types[rowIndex] = fields[rowIndex].dataType(); int sourceId = partitionSchema.columns().get(rowIndex).fieldId(); for (int specIndex = 0; specIndex < partitionFields.size(); specIndex += 1) { PartitionField field = spec.fields().get(specIndex); if (field.sourceId() == sourceId && "identity".equals(field.transform().toString())) { positions[rowIndex] = specIndex; javaTypes[rowIndex] = spec.javaClasses()[specIndex]; break; } } } }
Example 5
Source File: MLContextUtil.java From systemds with Apache License 2.0 | 6 votes |
/** * Examine the DataFrame schema to determine whether the data appears to be * a matrix. * * @param df * the DataFrame * @return {@code true} if the DataFrame appears to be a matrix, * {@code false} otherwise */ public static boolean doesDataFrameLookLikeMatrix(Dataset<Row> df) { StructType schema = df.schema(); StructField[] fields = schema.fields(); if (fields == null) { return true; } for (StructField field : fields) { DataType dataType = field.dataType(); if ((dataType != DataTypes.DoubleType) && (dataType != DataTypes.IntegerType) && (dataType != DataTypes.LongType) && (!(dataType instanceof org.apache.spark.ml.linalg.VectorUDT)) && (!(dataType instanceof org.apache.spark.mllib.linalg.VectorUDT))) { // uncomment if we support arrays of doubles for matrices // if (dataType instanceof ArrayType) { // ArrayType arrayType = (ArrayType) dataType; // if (arrayType.elementType() == DataTypes.DoubleType) { // continue; // } // } return false; } } return true; }
Example 6
Source File: SchemaConverter.java From geowave with Apache License 2.0 | 6 votes |
public static SimpleFeatureType schemaToFeatureType( final StructType schema, final String typeName) { final SimpleFeatureTypeBuilder typeBuilder = new SimpleFeatureTypeBuilder(); typeBuilder.setName(typeName); typeBuilder.setNamespaceURI(BasicFeatureTypes.DEFAULT_NAMESPACE); try { typeBuilder.setCRS(CRS.decode("EPSG:4326", true)); } catch (final FactoryException e) { LOGGER.error(e.getMessage(), e); } final AttributeTypeBuilder attrBuilder = new AttributeTypeBuilder(); for (final StructField field : schema.fields()) { final AttributeDescriptor attrDesc = attrDescFromStructField(attrBuilder, field); typeBuilder.add(attrDesc); } return typeBuilder.buildFeatureType(); }
Example 7
Source File: ExternalTableUtils.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
public static void setPartitionColumnTypes (StructType dataSchema,int[] baseColumnMap, StructType tableSchema){ int ncolumns = dataSchema.fields().length; int nPartitions = baseColumnMap.length; for (int i = 0; i < baseColumnMap.length; ++i) { String name = dataSchema.fields()[ncolumns - i - 1].name(); org.apache.spark.sql.types.DataType type = tableSchema.fields()[baseColumnMap[nPartitions - i - 1]].dataType(); boolean nullable = tableSchema.fields()[baseColumnMap[nPartitions - i - 1]].nullable(); Metadata metadata = tableSchema.fields()[baseColumnMap[nPartitions - i - 1]].metadata(); StructField field = new StructField(name, type, nullable, metadata); dataSchema.fields()[ncolumns - i - 1] = field; } }
Example 8
Source File: ColumnUtils.java From net.jgp.labs.spark with Apache License 2.0 | 5 votes |
public static Metadata getMetadata(Dataset<Row> df, String colName) { StructType schema = df.schema(); StructField[] fields = schema.fields(); for (StructField field : fields) { // TODO check on case if (field.name().compareTo(colName) == 0) { return field.metadata(); } } return null; }
Example 9
Source File: UnsafeFixedWidthAggregationMap.java From indexr with Apache License 2.0 | 5 votes |
/** * @return true if UnsafeFixedWidthAggregationMap supports aggregation buffers with the given * schema, false otherwise. */ public static boolean supportsAggregationBufferSchema(StructType schema) { for (StructField field : schema.fields()) { if (!UnsafeRow.isMutable(field.dataType())) { return false; } } return true; }
Example 10
Source File: DataFrames.java From DataVec with Apache License 2.0 | 5 votes |
/** * Create a datavec schema * from a struct type * * @param structType the struct type to create the schema from * @return the created schema */ public static Schema fromStructType(StructType structType) { Schema.Builder builder = new Schema.Builder(); StructField[] fields = structType.fields(); String[] fieldNames = structType.fieldNames(); for (int i = 0; i < fields.length; i++) { String name = fields[i].dataType().typeName().toLowerCase(); switch (name) { case "double": builder.addColumnDouble(fieldNames[i]); break; case "float": builder.addColumnFloat(fieldNames[i]); break; case "long": builder.addColumnLong(fieldNames[i]); break; case "int": case "integer": builder.addColumnInteger(fieldNames[i]); break; case "string": builder.addColumnString(fieldNames[i]); break; default: throw new RuntimeException("Unknown type: " + name); } } return builder.build(); }
Example 11
Source File: Reader.java From iceberg with Apache License 2.0 | 5 votes |
StructLikeInternalRow(StructType struct) { this.types = new DataType[struct.size()]; StructField[] fields = struct.fields(); for (int i = 0; i < fields.length; i += 1) { types[i] = fields[i].dataType(); } }
Example 12
Source File: MLContextTest.java From systemds with Apache License 2.0 | 5 votes |
@Test public void testOutputDataFrameOfVectorsDML() { System.out.println("MLContextTest - output DataFrame of vectors DML"); String s = "m=matrix('1 2 3 4',rows=2,cols=2);"; Script script = dml(s).out("m"); MLResults results = ml.execute(script); Dataset<Row> df = results.getDataFrame("m", true); Dataset<Row> sortedDF = df.sort(RDDConverterUtils.DF_ID_COLUMN); // verify column types StructType schema = sortedDF.schema(); StructField[] fields = schema.fields(); StructField idColumn = fields[0]; StructField vectorColumn = fields[1]; Assert.assertTrue(idColumn.dataType() instanceof DoubleType); Assert.assertTrue(vectorColumn.dataType() instanceof VectorUDT); List<Row> list = sortedDF.collectAsList(); Row row1 = list.get(0); Assert.assertEquals(1.0, row1.getDouble(0), 0.0); Vector v1 = (DenseVector) row1.get(1); double[] arr1 = v1.toArray(); Assert.assertArrayEquals(new double[] { 1.0, 2.0 }, arr1, 0.0); Row row2 = list.get(1); Assert.assertEquals(2.0, row2.getDouble(0), 0.0); Vector v2 = (DenseVector) row2.get(1); double[] arr2 = v2.toArray(); Assert.assertArrayEquals(new double[] { 3.0, 4.0 }, arr2, 0.0); }
Example 13
Source File: FrameRDDConverterUtils.java From systemds with Apache License 2.0 | 5 votes |
/** * Obtain column vector from DataFrame schema * * @param dfschema schema as StructType * @param containsID if true, contains ID column * @return 0-based column index of vector column, -1 if no vector. */ private static int getColVectFromDFSchema(StructType dfschema, boolean containsID) { int off = containsID ? 1 : 0; for( int i=off; i<dfschema.fields().length; i++ ) { StructField structType = dfschema.apply(i); if(structType.dataType() instanceof VectorUDT) return i-off; } return -1; }
Example 14
Source File: MLContextConversionUtil.java From systemds with Apache License 2.0 | 5 votes |
/** * If the MatrixFormat of the DataFrame has not been explicitly specified, * attempt to determine the proper MatrixFormat. * * @param dataFrame * the Spark {@code DataFrame} * @param matrixMetadata * the matrix metadata, if available */ public static void determineMatrixFormatIfNeeded(Dataset<Row> dataFrame, MatrixMetadata matrixMetadata) { if (matrixMetadata == null) { return; } MatrixFormat matrixFormat = matrixMetadata.getMatrixFormat(); if (matrixFormat != null) { return; } StructType schema = dataFrame.schema(); boolean hasID = false; try { schema.fieldIndex(RDDConverterUtils.DF_ID_COLUMN); hasID = true; } catch (IllegalArgumentException iae) { } StructField[] fields = schema.fields(); MatrixFormat mf = null; if (hasID) { if (fields[1].dataType() instanceof VectorUDT) { mf = MatrixFormat.DF_VECTOR_WITH_INDEX; } else { mf = MatrixFormat.DF_DOUBLES_WITH_INDEX; } } else { if (fields[0].dataType() instanceof VectorUDT) { mf = MatrixFormat.DF_VECTOR; } else { mf = MatrixFormat.DF_DOUBLES; } } if (mf == null) { throw new MLContextException("DataFrame format not recognized as an accepted SystemDS MatrixFormat"); } matrixMetadata.setMatrixFormat(mf); }
Example 15
Source File: DBClientWrapper.java From spark-data-sources with MIT License | 5 votes |
public static Schema sparkToDbSchema(StructType st) { Schema schema = new Schema(); for (StructField sf: st.fields()) { if (sf.dataType() == DataTypes.StringType) { schema.addColumn(sf.name(), Schema.ColumnType.STRING); } else if (sf.dataType() == DataTypes.DoubleType) { schema.addColumn(sf.name(), Schema.ColumnType.DOUBLE); } else if (sf.dataType() == DataTypes.LongType) { schema.addColumn(sf.name(), Schema.ColumnType.INT64); } else { // TODO: type leakage } } return schema; }
Example 16
Source File: ExternalTableUtils.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
public static void checkSchema(StructType tableSchema, StructType dataSchema, int[] partitionColumnMap, String location) throws StandardException{ StructField[] tableFields = tableSchema.fields(); StructField[] dataFields = dataSchema.fields(); if (tableFields.length != dataFields.length) { throw StandardException.newException(SQLState.INCONSISTENT_NUMBER_OF_ATTRIBUTE, tableFields.length, dataFields.length, location); } StructField[] partitionedTableFields = new StructField[tableSchema.fields().length]; Set<Integer> partitionColumns = new HashSet<>(); for (int pos : partitionColumnMap) { partitionColumns.add(pos); } int index = 0; for (int i = 0; i < tableFields.length; ++i) { if (!partitionColumns.contains(i)) { partitionedTableFields[index++] = tableFields[i]; } } for (int i = 0; i < tableFields.length - partitionColumnMap.length; ++i) { String tableFiledTypeName = partitionedTableFields[i].dataType().typeName(); String dataFieldTypeName = dataFields[i].dataType().typeName(); if (!tableFiledTypeName.equals(dataFieldTypeName)){ throw StandardException.newException(SQLState.INCONSISTENT_DATATYPE_ATTRIBUTES, tableFields[i].name(), tableFields[i].dataType().toString(), dataFields[i].name(), dataFields[i].dataType().toString(),location); } } }
Example 17
Source File: SqlResultsWriter.java From geowave with Apache License 2.0 | 4 votes |
public void writeResults(String typeName) { if (typeName == null) { typeName = DEFAULT_TYPE_NAME; LOGGER.warn( "Using default type name (adapter id): '" + DEFAULT_TYPE_NAME + "' for SQL output"); } final StructType schema = results.schema(); final SimpleFeatureType featureType = SchemaConverter.schemaToFeatureType(schema, typeName); final SimpleFeatureBuilder sfBuilder = new SimpleFeatureBuilder(featureType); final FeatureDataAdapter featureAdapter = new FeatureDataAdapter(featureType); final DataStore featureStore = outputDataStore.createDataStore(); final Index featureIndex = new SpatialDimensionalityTypeProvider().createIndex(new SpatialOptions()); featureStore.addType(featureAdapter, featureIndex); try (Writer writer = featureStore.createWriter(featureAdapter.getTypeName())) { final List<Row> rows = results.collectAsList(); for (int r = 0; r < rows.size(); r++) { final Row row = rows.get(r); for (int i = 0; i < schema.fields().length; i++) { final StructField field = schema.apply(i); final Object rowObj = row.apply(i); if (rowObj != null) { if (field.name().equals("geom")) { final Geometry geom = (Geometry) rowObj; sfBuilder.set("geom", geom); } else if (field.dataType() == DataTypes.TimestampType) { final long millis = ((Timestamp) rowObj).getTime(); final Date date = new Date(millis); sfBuilder.set(field.name(), date); } else { sfBuilder.set(field.name(), rowObj); } } } final SimpleFeature sf = sfBuilder.buildFeature("result-" + nf.format(r)); writer.write(sf); } } }
Example 18
Source File: SparkTypeToType.java From iceberg with Apache License 2.0 | 4 votes |
SparkTypeToType(StructType root) { this.root = root; // the root struct's fields use the first ids this.nextId = root.fields().length; }
Example 19
Source File: SchemaIntrospectionApp.java From net.jgp.labs.spark with Apache License 2.0 | 4 votes |
private void start() { SparkSession spark = SparkSession.builder() .appName("Array to Dataframe (Dataset<Row>)") .master("local") .getOrCreate(); StructType schema = DataTypes.createStructType(new StructField[] { DataTypes.createStructField( "id", DataTypes.IntegerType, false), DataTypes.createStructField( "value-s", DataTypes.StringType, false), DataTypes.createStructField( "value-d", DataTypes.DoubleType, false), DataTypes.createStructField( "array", DataTypes.createArrayType(DataTypes.StringType, false), false), DataTypes.createStructField( "struct", DataTypes.createStructType(new StructField[] { DataTypes.createStructField( "sid", DataTypes.IntegerType, false), DataTypes.createStructField( "svalue", DataTypes.StringType, false) }), false), DataTypes.createStructField( "array-struct", DataTypes.createArrayType( DataTypes.createStructType(new StructField[] { DataTypes.createStructField( "asid", DataTypes.IntegerType, false), DataTypes.createStructField( "asvalue", DataTypes.StringType, false) })), false) }); List<Row> rows = new ArrayList<>(); for (int x = 0; x < 10; x++) { List<Row> subrows = new ArrayList<>(); for (int y = 1000; y < 1003; y++) { subrows.add(RowFactory.create(y, "Sub " + y)); } Row str = RowFactory.create(x * 5000, "Struct #" + x); String[] array = new String[] { "v" + (x * 100), "v" + (x * 100 + 1) }; rows.add( RowFactory.create(x, "Value " + x, x / 4.0, array, str, subrows)); } Dataset<Row> df = spark.createDataFrame(rows, schema); df.show(false); df.printSchema(); StructType readSchema = df.schema(); String[] fieldNames = readSchema.fieldNames(); int i = 0; for (String fieldName : fieldNames) { log.info("Field #{}: '{}'", i++, fieldName); } log.info("Catalog: '{}'", readSchema.catalogString()); StructField[] fields = readSchema.fields(); i = 0; for (StructField field : fields) { log.info("DDL for field #{}: '{}'", i++, field.toDDL()); } }
Example 20
Source File: SparkRowConverterTest.java From bunsen with Apache License 2.0 | 3 votes |
/** * Recursively walks the schema to ensure there are no struct fields that are empty. */ private void checkNoEmptyStructs(StructType schema, String fieldName) { Assert.assertNotEquals("Struct field " + fieldName + " is empty", 0, schema.fields().length); for (StructField field : schema.fields()) { if (field.dataType() instanceof StructType) { checkNoEmptyStructs((StructType) field.dataType(), field.name()); } else if (field.dataType() instanceof ArrayType) { ArrayType arrayType = (ArrayType) field.dataType(); if (arrayType.elementType() instanceof StructType) { if (!field.name().equals("contained")) { checkNoEmptyStructs((StructType) arrayType.elementType(), field.name()); } } } } }