Java Code Examples for org.apache.spark.sql.types.StructField#dataType()
The following examples show how to use
org.apache.spark.sql.types.StructField#dataType() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MLContextUtil.java From systemds with Apache License 2.0 | 7 votes |
/** * Examine the DataFrame schema to determine whether the data appears to be * a matrix. * * @param df * the DataFrame * @return {@code true} if the DataFrame appears to be a matrix, * {@code false} otherwise */ public static boolean doesDataFrameLookLikeMatrix(Dataset<Row> df) { StructType schema = df.schema(); StructField[] fields = schema.fields(); if (fields == null) { return true; } for (StructField field : fields) { DataType dataType = field.dataType(); if ((dataType != DataTypes.DoubleType) && (dataType != DataTypes.IntegerType) && (dataType != DataTypes.LongType) && (!(dataType instanceof org.apache.spark.ml.linalg.VectorUDT)) && (!(dataType instanceof org.apache.spark.mllib.linalg.VectorUDT))) { // uncomment if we support arrays of doubles for matrices // if (dataType instanceof ArrayType) { // ArrayType arrayType = (ArrayType) dataType; // if (arrayType.elementType() == DataTypes.DoubleType) { // continue; // } // } return false; } } return true; }
Example 2
Source File: DBClientWrapper.java From spark-data-sources with MIT License | 6 votes |
public static edb.common.Row sparkToDBRow(org.apache.spark.sql.Row row, StructType type) { edb.common.Row dbRow = new edb.common.Row(); StructField[] fields = type.fields(); for (int i = 0; i < type.size(); i++) { StructField sf = fields[i]; if (sf.dataType() == DataTypes.StringType) { dbRow.addField(new edb.common.Row.StringField(sf.name(), row.getString(i))); } else if (sf.dataType() == DataTypes.DoubleType) { dbRow.addField(new edb.common.Row.DoubleField(sf.name(), row.getDouble(i))); } else if (sf.dataType() == DataTypes.LongType) { dbRow.addField(new edb.common.Row.Int64Field(sf.name(), row.getLong(i))); } else { // TODO: type leakage } } return dbRow; }
Example 3
Source File: SchemaConverter.java From geowave with Apache License 2.0 | 6 votes |
private static AttributeDescriptor attrDescFromStructField( final AttributeTypeBuilder attrBuilder, final StructField field) { if (field.name().equals("geom")) { return attrBuilder.binding(Geometry.class).nillable(false).buildDescriptor("geom"); } if (field.dataType() == DataTypes.StringType) { return attrBuilder.binding(String.class).buildDescriptor(field.name()); } else if (field.dataType() == DataTypes.DoubleType) { return attrBuilder.binding(Double.class).buildDescriptor(field.name()); } else if (field.dataType() == DataTypes.FloatType) { return attrBuilder.binding(Float.class).buildDescriptor(field.name()); } else if (field.dataType() == DataTypes.LongType) { return attrBuilder.binding(Long.class).buildDescriptor(field.name()); } else if (field.dataType() == DataTypes.IntegerType) { return attrBuilder.binding(Integer.class).buildDescriptor(field.name()); } else if (field.dataType() == DataTypes.BooleanType) { return attrBuilder.binding(Boolean.class).buildDescriptor(field.name()); } else if (field.dataType() == DataTypes.TimestampType) { return attrBuilder.binding(Date.class).buildDescriptor(field.name()); } return null; }
Example 4
Source File: SimpleFeatureMapper.java From geowave with Apache License 2.0 | 6 votes |
@Override public Row call(final SimpleFeature feature) throws Exception { final Object[] fields = new Serializable[schema.size()]; for (int i = 0; i < schema.size(); i++) { final Object fieldObj = feature.getAttribute(i); if (fieldObj != null) { final StructField structField = schema.apply(i); if (structField.name().equals("geom")) { fields[i] = fieldObj; } else if (structField.dataType() == DataTypes.TimestampType) { fields[i] = new Timestamp(((Date) fieldObj).getTime()); } else if (structField.dataType() != null) { fields[i] = fieldObj; } else { LOGGER.error("Unexpected attribute in field(" + structField.name() + "): " + fieldObj); } } } return new GenericRowWithSchema(fields, schema); }
Example 5
Source File: MLContextUtil.java From systemds with Apache License 2.0 | 6 votes |
/** * Examine the DataFrame schema to determine whether the data appears to be * a matrix. * * @param df * the DataFrame * @return {@code true} if the DataFrame appears to be a matrix, * {@code false} otherwise */ public static boolean doesDataFrameLookLikeMatrix(Dataset<Row> df) { StructType schema = df.schema(); StructField[] fields = schema.fields(); if (fields == null) { return true; } for (StructField field : fields) { DataType dataType = field.dataType(); if ((dataType != DataTypes.DoubleType) && (dataType != DataTypes.IntegerType) && (dataType != DataTypes.LongType) && (!(dataType instanceof org.apache.spark.ml.linalg.VectorUDT)) && (!(dataType instanceof org.apache.spark.mllib.linalg.VectorUDT))) { // uncomment if we support arrays of doubles for matrices // if (dataType instanceof ArrayType) { // ArrayType arrayType = (ArrayType) dataType; // if (arrayType.elementType() == DataTypes.DoubleType) { // continue; // } // } return false; } } return true; }
Example 6
Source File: IndexRUtil.java From indexr with Apache License 2.0 | 6 votes |
public static SegmentSchema sparkSchemaToIndexRSchema(List<StructField> sparkSchema, IsIndexed isIndexed) { List<ColumnSchema> columns = new ArrayList<>(); for (StructField f : sparkSchema) { SQLType type; if (f.dataType() instanceof IntegerType) { type = SQLType.INT; } else if (f.dataType() instanceof LongType) { type = SQLType.BIGINT; } else if (f.dataType() instanceof FloatType) { type = SQLType.FLOAT; } else if (f.dataType() instanceof DoubleType) { type = SQLType.DOUBLE; } else if (f.dataType() instanceof StringType) { type = SQLType.VARCHAR; } else if (f.dataType() instanceof DateType) { type = SQLType.DATE; } else if (f.dataType() instanceof TimestampType) { type = SQLType.DATETIME; } else { throw new IllegalStateException("Unsupported type: " + f.dataType()); } columns.add(new ColumnSchema(f.name(), type, isIndexed.apply(f.name()))); } return new SegmentSchema(columns); }
Example 7
Source File: SchemaConverterTest.java From bunsen with Apache License 2.0 | 5 votes |
/** * Returns the type of a nested field. */ DataType getField(DataType dataType, boolean isNullable, String... names) { StructType schema = dataType instanceof ArrayType ? (StructType) ((ArrayType) dataType).elementType() : (StructType) dataType; StructField field = Arrays.stream(schema.fields()) .filter(sf -> sf.name().equalsIgnoreCase(names[0])) .findFirst() .get(); DataType child = field.dataType(); // Recurse through children if there are more names. if (names.length == 1) { // Check the nullability. Assert.assertEquals("Unexpected nullability of field " + field.name(), isNullable, field.nullable()); return child; } else { return getField(child, isNullable, Arrays.copyOfRange(names, 1, names.length)); } }
Example 8
Source File: DbPersistorSQLServer.java From rdf2x with Apache License 2.0 | 5 votes |
@Override public void writeDataFrame(String name, DataFrame df) { for (StructField field : df.schema().fields()) { String column = field.name(); // convert booleans to integers to avoid error in Spark 1.6.2 // "Cannot specify a column width on data type bit." if (field.dataType() == DataTypes.BooleanType) { df = df.withColumn(column + TMP_SUFFIX, df.col(column).cast(DataTypes.IntegerType)) .drop(column) .withColumnRenamed(column + TMP_SUFFIX, column); } } super.writeDataFrame(name, df); }
Example 9
Source File: SparkMLEncoder.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
public DataField createDataField(FieldName name){ StructType schema = getSchema(); StructField field = schema.apply(name.getValue()); org.apache.spark.sql.types.DataType sparkDataType = field.dataType(); if(sparkDataType instanceof StringType){ return createDataField(name, OpType.CATEGORICAL, DataType.STRING); } else if(sparkDataType instanceof IntegralType){ return createDataField(name, OpType.CONTINUOUS, DataType.INTEGER); } else if(sparkDataType instanceof DoubleType){ return createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE); } else if(sparkDataType instanceof BooleanType){ return createDataField(name, OpType.CATEGORICAL, DataType.BOOLEAN); } else { throw new IllegalArgumentException("Expected string, integral, double or boolean data type, got " + sparkDataType.typeName() + " data type"); } }
Example 10
Source File: TypeCastStep.java From bpmn.ai with BSD 3-Clause "New" or "Revised" License | 5 votes |
private DataType getCurrentDataType(List<StructField> datasetFields, String column) { // search current datatype for(StructField sf : datasetFields) { if(sf.name().equals(column)) { return sf.dataType(); } } return null; }
Example 11
Source File: SupportedFieldTypesValidation.java From envelope with Apache License 2.0 | 5 votes |
@Override public ValidationResult validate(Config config) { for (StructField field : ComponentFactory.create( Schema.class, config.getConfig(this.path), true).getSchema().fields()) { boolean decimalMatch = (field.dataType() instanceof DecimalType && validationTypes.contains(new DecimalType())); if (!validationTypes.contains(field.dataType()) && !decimalMatch) { return new ValidationResult(this, Validity.INVALID, "Schema field type " + field.dataType().simpleString() + " is not supported by this component type."); } } return new ValidationResult(this, Validity.VALID, "Schema field types are valid for this component type."); }
Example 12
Source File: TestHelpers.java From iceberg with Apache License 2.0 | 5 votes |
private static void assertEquals(String context, StructType struct, InternalRow expected, InternalRow actual) { Assert.assertEquals("Should have correct number of fields", struct.size(), actual.numFields()); for (int i = 0; i < actual.numFields(); i += 1) { StructField field = struct.fields()[i]; DataType type = field.dataType(); assertEquals(context + "." + field.name(), type, expected.get(i, type), actual.get(i, type)); } }
Example 13
Source File: FrameRDDConverterUtils.java From systemds with Apache License 2.0 | 5 votes |
/** * Obtain column vector from DataFrame schema * * @param dfschema schema as StructType * @param containsID if true, contains ID column * @return 0-based column index of vector column, -1 if no vector. */ private static int getColVectFromDFSchema(StructType dfschema, boolean containsID) { int off = containsID ? 1 : 0; for( int i=off; i<dfschema.fields().length; i++ ) { StructField structType = dfschema.apply(i); if(structType.dataType() instanceof VectorUDT) return i-off; } return -1; }
Example 14
Source File: FrameRDDConverterUtils.java From systemds with Apache License 2.0 | 5 votes |
/** * Obtain column vector from DataFrame schema * * @param dfschema schema as StructType * @param containsID if true, contains ID column * @return 0-based column index of vector column, -1 if no vector. */ private static int getColVectFromDFSchema(StructType dfschema, boolean containsID) { int off = containsID ? 1 : 0; for( int i=off; i<dfschema.fields().length; i++ ) { StructField structType = dfschema.apply(i); if(structType.dataType() instanceof VectorUDT) return i-off; } return -1; }
Example 15
Source File: DBClientWrapper.java From spark-data-sources with MIT License | 5 votes |
public static Schema sparkToDbSchema(StructType st) { Schema schema = new Schema(); for (StructField sf: st.fields()) { if (sf.dataType() == DataTypes.StringType) { schema.addColumn(sf.name(), Schema.ColumnType.STRING); } else if (sf.dataType() == DataTypes.DoubleType) { schema.addColumn(sf.name(), Schema.ColumnType.DOUBLE); } else if (sf.dataType() == DataTypes.LongType) { schema.addColumn(sf.name(), Schema.ColumnType.INT64); } else { // TODO: type leakage } } return schema; }
Example 16
Source File: InputTranslatorCompatibilityValidation.java From envelope with Apache License 2.0 | 4 votes |
@Override public ValidationResult validate(Config config) { Input input; Translator translator; try { input = ComponentFactory.create(Input.class, config.getConfig(DataStep.INPUT_TYPE), false); translator = ComponentFactory.create( Translator.class, config.getConfig(StreamingStep.TRANSLATOR_PROPERTY), false); } catch (Exception e) { return new ValidationResult(this, Validity.VALID, "Could not instantiate input and/or translator, so will not check if they" + " are compatible."); } String inputClass = input.getClass().getSimpleName(); String translatorClass = translator.getClass().getSimpleName(); if (translator instanceof UsesProvidedSchema && !(input instanceof DeclaresProvidingSchema)) { return new ValidationResult(this, Validity.INVALID, inputClass + " is not compatible with " + translatorClass + " because " + translatorClass + " requires " + inputClass + " to declare the schema that" + " it provides, but " + inputClass + " does not do so."); } if (input instanceof DeclaresProvidingSchema) { for (StructField translatorExpectingField : translator.getExpectingSchema().fields()) { boolean expectedFieldFound = false; for (StructField inputProvidingField : ((DeclaresProvidingSchema) input).getProvidingSchema().fields()) { if (translatorExpectingField.name().equals(inputProvidingField.name()) && translatorExpectingField.dataType().equals(inputProvidingField.dataType())) { expectedFieldFound = true; } } if (!expectedFieldFound) { return new ValidationResult(this, Validity.INVALID, inputClass + " is not compatible with " + translatorClass + " because " + inputClass + " does not provide expected " + "field '" + translatorExpectingField.name() + "' with data type '" + translatorExpectingField.dataType() + "'"); } } } return new ValidationResult(this, Validity.VALID, "Input and translator are compatible"); }
Example 17
Source File: ColumnExploder.java From jpmml-evaluator-spark with GNU Affero General Public License v3.0 | 4 votes |
private StructType getStructSchema(StructType schema){ StructField structField = schema.apply(getStructCol()); return (StructType)structField.dataType(); }
Example 18
Source File: SqlResultsWriter.java From geowave with Apache License 2.0 | 4 votes |
public void writeResults(String typeName) { if (typeName == null) { typeName = DEFAULT_TYPE_NAME; LOGGER.warn( "Using default type name (adapter id): '" + DEFAULT_TYPE_NAME + "' for SQL output"); } final StructType schema = results.schema(); final SimpleFeatureType featureType = SchemaConverter.schemaToFeatureType(schema, typeName); final SimpleFeatureBuilder sfBuilder = new SimpleFeatureBuilder(featureType); final FeatureDataAdapter featureAdapter = new FeatureDataAdapter(featureType); final DataStore featureStore = outputDataStore.createDataStore(); final Index featureIndex = new SpatialDimensionalityTypeProvider().createIndex(new SpatialOptions()); featureStore.addType(featureAdapter, featureIndex); try (Writer writer = featureStore.createWriter(featureAdapter.getTypeName())) { final List<Row> rows = results.collectAsList(); for (int r = 0; r < rows.size(); r++) { final Row row = rows.get(r); for (int i = 0; i < schema.fields().length; i++) { final StructField field = schema.apply(i); final Object rowObj = row.apply(i); if (rowObj != null) { if (field.name().equals("geom")) { final Geometry geom = (Geometry) rowObj; sfBuilder.set("geom", geom); } else if (field.dataType() == DataTypes.TimestampType) { final long millis = ((Timestamp) rowObj).getTime(); final Date date = new Date(millis); sfBuilder.set(field.name(), date); } else { sfBuilder.set(field.name(), rowObj); } } } final SimpleFeature sf = sfBuilder.buildFeature("result-" + nf.format(r)); writer.write(sf); } } }
Example 19
Source File: SparkRowConverterTest.java From bunsen with Apache License 2.0 | 3 votes |
/** * Recursively walks the schema to ensure there are no struct fields that are empty. */ private void checkNoEmptyStructs(StructType schema, String fieldName) { Assert.assertNotEquals("Struct field " + fieldName + " is empty", 0, schema.fields().length); for (StructField field : schema.fields()) { if (field.dataType() instanceof StructType) { checkNoEmptyStructs((StructType) field.dataType(), field.name()); } else if (field.dataType() instanceof ArrayType) { ArrayType arrayType = (ArrayType) field.dataType(); if (arrayType.elementType() instanceof StructType) { if (!field.name().equals("contained")) { checkNoEmptyStructs((StructType) arrayType.elementType(), field.name()); } } } } }