org.kitesdk.data.ValidationException Java Examples
The following examples show how to use
org.kitesdk.data.ValidationException.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestColumnMappingParser.java From kite with Apache License 2.0 | 6 votes |
@Test public void testOCCVersionMapping() { checkParser(new ColumnMapping.Builder() .occ("s") .build(), "[ {\"source\": \"s\", \"type\": \"occVersion\"} ]"); checkParser(new ColumnMapping.Builder() .version("s") .build(), "[ {\"source\": \"s\", \"type\": \"occVersion\"} ]"); TestHelpers.assertThrows("Should reject missing source", ValidationException.class, new Runnable() { @Override public void run() { ColumnMappingParser.parse("[ {\"type\": \"occVersion\"} ]"); } } ); }
Example #2
Source File: TestPartitionStrategyParser.java From kite with Apache License 2.0 | 6 votes |
@Test public void testMissingSource() { String[] types = new String[] { "identity", "hash", "year", "month", "day", "hour", "minute", "dateFormat"}; for (final String type : types) { TestHelpers.assertThrows("Should reject missing source", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"" + type + "\"} ]"); } } ); } }
Example #3
Source File: TestFileSystemDataset.java From kite with Apache License 2.0 | 6 votes |
@Test(expected = ValidationException.class) public void testCannotMergeDatasetsWithDifferentFormats() throws IOException { FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>() .namespace("ns") .name("users") .configuration(getConfiguration()) .descriptor(new DatasetDescriptor.Builder() .schema(USER_SCHEMA) .format(Formats.AVRO) .location(testDirectory) .build()) .type(Record.class) .build(); FileSystemDataset<Record> dsUpdate = new FileSystemDataset.Builder<Record>() .namespace("ns") .name("users") .configuration(getConfiguration()) .descriptor(new DatasetDescriptor.Builder() .schema(USER_SCHEMA) .format(Formats.PARQUET) .location(testDirectory) .build()) .type(Record.class) .build(); ds.merge(dsUpdate); }
Example #4
Source File: TestFileSystemDataset.java From kite with Apache License 2.0 | 6 votes |
@Test(expected = ValidationException.class) public void testCannotMergeDatasetsWithDifferentSchemas() throws IOException { FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>() .namespace("ns") .name("users") .configuration(getConfiguration()) .descriptor(new DatasetDescriptor.Builder() .schema(STRING_SCHEMA) .location(testDirectory) .build()) .type(Record.class) .build(); FileSystemDataset<Record> dsUpdate = new FileSystemDataset.Builder<Record>() .namespace("ns") .name("users") .configuration(getConfiguration()) .descriptor(new DatasetDescriptor.Builder() .schema(USER_SCHEMA) .location(testDirectory) .build()) .type(Record.class) .build(); ds.merge(dsUpdate); }
Example #5
Source File: TestCompatibilityChecks.java From kite with Apache License 2.0 | 6 votes |
@Test public void testProvidedPartitionNameUpdate() { final PartitionStrategy provided = new PartitionStrategy.Builder() .provided("part", "string") .build(); TestHelpers.assertThrows("Should not allow changing the partition name", ValidationException.class, new Runnable() { @Override public void run() { Compatibility.checkStrategyUpdate( provided, new PartitionStrategy.Builder() .identity("s", "other") .build(), PROVIDED_TEST_SCHEMA); } }); }
Example #6
Source File: SchemaUtil.java From kite with Apache License 2.0 | 6 votes |
/** * Returns the nested {@link Schema} for the given field name. * * @param schema a record Schema * @param name a String field name * @return the nested Schema for the field */ public static Schema fieldSchema(Schema schema, String name) { Schema nested = unwrapNullable(schema); List<String> levels = Lists.newArrayList(); for (String level : NAME_SPLITTER.split(name)) { levels.add(level); ValidationException.check(Schema.Type.RECORD == schema.getType(), "Cannot get schema for %s: %s is not a record schema: %s", name, NAME_JOINER.join(levels), nested.toString(true)); Schema.Field field = nested.getField(level); ValidationException.check(field != null, "Cannot get schema for %s: %s is not a field", name, NAME_JOINER.join(levels)); nested = unwrapNullable(field.schema()); } return nested; }
Example #7
Source File: SchemaUtil.java From kite with Apache License 2.0 | 6 votes |
/** * Builds a Schema for the FieldPartitioner using the given Schema to * determine types not fixed by the FieldPartitioner. * * @param fp a FieldPartitioner * @param schema an entity Schema that will be partitioned * @return a Schema for the field partitioner */ public static Schema partitionFieldSchema(FieldPartitioner<?, ?> fp, Schema schema) { if (fp instanceof IdentityFieldPartitioner) { // copy the schema directly from the entity to preserve annotations return fieldSchema(schema, fp.getSourceName()); } else { Class<?> fieldType = getPartitionType(fp, schema); if (fieldType == Integer.class) { return Schema.create(Schema.Type.INT); } else if (fieldType == Long.class) { return Schema.create(Schema.Type.LONG); } else if (fieldType == String.class) { return Schema.create(Schema.Type.STRING); } else { throw new ValidationException( "Cannot encode partition " + fp.getName() + " with type " + fp.getSourceType() ); } } }
Example #8
Source File: TestCompatibilityChecks.java From kite with Apache License 2.0 | 6 votes |
@Test public void testUpdateNonProvided() { final PartitionStrategy provided = new PartitionStrategy.Builder() .identity("s", "part") .build(); TestHelpers.assertThrows("Should not allow replacing if not provided", ValidationException.class, new Runnable() { @Override public void run() { Compatibility.checkStrategyUpdate( provided, new PartitionStrategy.Builder() .dateFormat("l", "part", "yyyy-MM-dd") .build(), PROVIDED_TEST_SCHEMA); } }); }
Example #9
Source File: PartitionedDatasetWriter.java From kite with Apache License 2.0 | 6 votes |
@Override public void initialize() { Preconditions.checkState(state.equals(ReaderWriterState.NEW), "Unable to open a writer from state:%s", state); DatasetDescriptor descriptor = view.getDataset().getDescriptor(); ValidationException.check( FileSystemWriter.isSupportedFormat(descriptor), "Not a supported format: %s", descriptor.getFormat()); LOG.debug("Opening partitioned dataset writer w/strategy:{}", partitionStrategy); cachedWriters = CacheBuilder.newBuilder().maximumSize(maxWriters) .removalListener(new DatasetWriterCloser<E>()) .build(createCacheLoader()); state = ReaderWriterState.OPEN; }
Example #10
Source File: TestFileSystemDatasetRepository.java From kite with Apache License 2.0 | 6 votes |
@Test public void testUpdateFailsWithFormatChange() { Dataset<Record> dataset = repo.create(NAMESPACE, NAME, new DatasetDescriptor.Builder(testDescriptor) .format(Formats.AVRO) .build()); DatasetDescriptor changed = new DatasetDescriptor.Builder(dataset.getDescriptor()) .format(Formats.PARQUET) .build(); try { repo.update(NAMESPACE, NAME, changed); Assert.fail("Should fail due to format change"); } catch (ValidationException e) { // expected } Assert.assertEquals( Formats.AVRO, repo.load(NAMESPACE, NAME).getDescriptor().getFormat()); }
Example #11
Source File: TestFileSystemDatasetRepository.java From kite with Apache License 2.0 | 6 votes |
@Test public void testUpdateFailsWithLocationChange() { ensureCreated(); Dataset<Record> dataset = repo.load(NAMESPACE, NAME); URI location = dataset.getDescriptor().getLocation(); DatasetDescriptor changed = new DatasetDescriptor.Builder(dataset.getDescriptor()) .location(new Path(testDirectory, "newDataLocation").toUri()) .build(); try { repo.update(NAMESPACE, NAME, changed); Assert.fail("Should fail due to data location change"); } catch (ValidationException ex) { // expected } Assert.assertEquals( location, repo.load(NAMESPACE, NAME).getDescriptor().getLocation()); }
Example #12
Source File: AvroEntitySerDe.java From kite with Apache License 2.0 | 6 votes |
@Override public byte[] serializeColumnValueToBytes(String fieldName, Object columnValue) { Field field = avroSchema.getAvroSchema().getField(fieldName); DatumWriter<Object> datumWriter = fieldDatumWriters.get(fieldName); if (field == null) { throw new ValidationException("Invalid field name " + fieldName + " for schema " + avroSchema.toString()); } if (datumWriter == null) { throw new ValidationException("No datum writer for field name: " + fieldName); } ByteArrayOutputStream byteOut = new ByteArrayOutputStream(); Encoder encoder = getColumnEncoder(field.schema(), byteOut); AvroUtils.writeAvroEntity(columnValue, encoder, fieldDatumWriters.get(fieldName)); return byteOut.toByteArray(); }
Example #13
Source File: TestCreateColumnMappingCommand.java From kite with Apache License 2.0 | 6 votes |
@Test public void testMissingKeyPartition() throws Exception { // does not include an identity partition for email command.partitionStrategyFile = "resource:test-partitions/email-hash-part.json"; command.partitions = Lists.newArrayList( "email:key" ); TestHelpers.assertThrows("Should reject missing partitioner", ValidationException.class, new Callable() { @Override public Object call() throws Exception { command.run(); return null; } }); }
Example #14
Source File: AvroEntitySerDe.java From kite with Apache License 2.0 | 6 votes |
@Override public Object deserializeColumnValueFromBytes(String fieldName, byte[] bytes) { Field field = avroSchema.getAvroSchema().getField(fieldName); DatumReader<Object> datumReader = fieldDatumReaders.get(fieldName); if (field == null) { throw new ValidationException("Invalid field name " + fieldName + " for schema " + avroSchema.toString()); } if (datumReader == null) { throw new ValidationException("No datum reader for field name: " + fieldName); } ByteArrayInputStream byteIn = new ByteArrayInputStream(bytes); Decoder decoder = getColumnDecoder(field.schema(), byteIn); return AvroUtils.readAvroEntity(decoder, datumReader); }
Example #15
Source File: AvroEntitySerDe.java From kite with Apache License 2.0 | 6 votes |
@Override public CharSequence deserializeKeyAsColumnKeyFromBytes(String fieldName, byte[] columnKeyBytes) { Field field = avroSchema.getAvroSchema().getField(fieldName); if (field == null) { throw new ValidationException("Invalid field name " + fieldName + " for schema " + avroSchema.toString()); } Schema.Type schemaType = field.schema().getType(); if (schemaType == Schema.Type.MAP) { String stringProp = field.schema().getProp("avro.java.string"); if (stringProp != null && stringProp.equals("String")) { return new String(columnKeyBytes); } else { return new Utf8(columnKeyBytes); } } else if (schemaType == Schema.Type.RECORD) { return new String(columnKeyBytes); } else { throw new ValidationException("Unsupported type for keyAsColumn: " + schemaType); } }
Example #16
Source File: EntitySerDe.java From kite with Apache License 2.0 | 6 votes |
/** * Serialize an entity's field value to a PutAction. * * @param keyBytes * The bytes of the serialized key (needed to construct a PutAction). * @param fieldMapping * The FieldMapping that specifies this field's mapping type and * field name. * @param fieldValue * The value of the field to serialize. * @return The PutAction with column's populated with the field's serialized * values. */ public PutAction serialize(byte[] keyBytes, FieldMapping fieldMapping, Object fieldValue) { Put put = new Put(keyBytes); PutAction putAction = new PutAction(put); String fieldName = fieldMapping.getFieldName(); if (fieldMapping.getMappingType() == MappingType.COLUMN || fieldMapping.getMappingType() == MappingType.COUNTER) { serializeColumn(fieldName, fieldMapping.getFamily(), fieldMapping.getQualifier(), fieldValue, put); } else if (fieldMapping.getMappingType() == MappingType.KEY_AS_COLUMN) { serializeKeyAsColumn(fieldName, fieldMapping.getFamily(), fieldMapping.getPrefix(), fieldValue, put); } else if (fieldMapping.getMappingType() == MappingType.OCC_VERSION) { serializeOCCColumn(fieldValue, putAction); } else { throw new ValidationException( "Invalid field mapping for field with name: " + fieldMapping.getFieldName()); } return putAction; }
Example #17
Source File: EntitySerDe.java From kite with Apache License 2.0 | 6 votes |
/** * Deserialize an entity field from the HBase Result. * * @param fieldMapping * The FieldMapping that specifies this field's mapping type and * field name. * @param result * The HBase Result that represents a row in HBase. * @return The field Object we deserialized from the Result. */ public Object deserialize(FieldMapping fieldMapping, Result result) { String fieldName = fieldMapping.getFieldName(); MappingType mappingType = fieldMapping.getMappingType(); if (mappingType == MappingType.COLUMN || mappingType == MappingType.COUNTER) { return deserializeColumn(fieldMapping.getFieldName(), fieldMapping.getFamily(), fieldMapping.getQualifier(), result); } else if (mappingType == MappingType.KEY_AS_COLUMN) { return deserializeKeyAsColumn(fieldMapping.getFieldName(), fieldMapping.getFamily(), fieldMapping.getPrefix(), result); } else if (mappingType == MappingType.OCC_VERSION) { return deserializeOCCColumn(result); } else { throw new ValidationException( "Invalid field mapping for field with name: " + fieldName); } }
Example #18
Source File: SchemaTool.java From kite with Apache License 2.0 | 5 votes |
/** * Prepare managed schema for this entitySchema */ private boolean prepareManagedSchema(String tableName, String entitySchemaString) { String entityName = getEntityNameFromSchemaString(entitySchemaString); AvroEntitySchema entitySchema = parser .parseEntitySchema(entitySchemaString); AvroKeySchema keySchema = parser.parseKeySchema(entitySchemaString); // Verify there are no ambiguities with the managed schemas if (schemaManager.hasManagedSchema(tableName, entityName)) { KeySchema currentKeySchema = schemaManager .getKeySchema(tableName, entityName); if (!keySchema.equals(currentKeySchema)) { String msg = "Migrating schema with different keys. Current: " + currentKeySchema .getRawSchema() + " New: " + keySchema.getRawSchema(); LOG.error(msg); throw new ValidationException(msg); } if (!schemaManager .hasSchemaVersion(tableName, entityName, entitySchema)) { LOG.info("Migrating Schema: (" + tableName + ", " + entityName + ")"); schemaManager.migrateSchema(tableName, entityName, entitySchemaString); } else { LOG.info("Schema hasn't changed, not migrating: (" + tableName + ", " + entityName + ")"); return false; } } else { LOG.info("Creating Schema: (" + tableName + ", " + entityName + ")"); parser.parseEntitySchema(entitySchemaString).getColumnMappingDescriptor() .getRequiredColumnFamilies(); schemaManager.createSchema(tableName, entityName, entitySchemaString, "org.kitesdk.data.hbase.avro.AvroKeyEntitySchemaParser", "org.kitesdk.data.hbase.avro.AvroKeySerDe", "org.kitesdk.data.hbase.avro.AvroEntitySerDe"); } return true; }
Example #19
Source File: AvroEntityComposer.java From kite with Apache License 2.0 | 5 votes |
@Override public Object buildKeyAsColumnField(String fieldName, Map<CharSequence, Object> keyAsColumnValues) { Schema schema = avroSchema.getAvroSchema(); Field field = schema.getField(fieldName); if (field == null) { throw new ValidationException("No field named " + fieldName + " in schema " + schema); } Schema.Type fieldType = field.schema().getType(); if (fieldType == Schema.Type.MAP) { Map<CharSequence, Object> retMap = new HashMap<CharSequence, Object>(); for (Entry<CharSequence, Object> entry : keyAsColumnValues.entrySet()) { retMap.put(entry.getKey(), entry.getValue()); } return retMap; } else if (fieldType == Schema.Type.RECORD) { AvroRecordBuilder<E> builder = kacRecordBuilderFactories.get(fieldName) .getBuilder(); for (Entry<CharSequence, Object> keyAsColumnEntry : keyAsColumnValues .entrySet()) { builder.put(keyAsColumnEntry.getKey().toString(), keyAsColumnEntry.getValue()); } return builder.build(); } else { throw new ValidationException( "Only MAP or RECORD type valid for keyAsColumn fields. Found " + fieldType); } }
Example #20
Source File: AvroEntityComposer.java From kite with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public Map<CharSequence, Object> extractKeyAsColumnValues(String fieldName, Object fieldValue) { Schema schema = avroSchema.getAvroSchema(); Field field = schema.getField(fieldName); if (field == null) { throw new ValidationException("No field named " + fieldName + " in schema " + schema); } if (field.schema().getType() == Schema.Type.MAP) { return new HashMap<CharSequence, Object>( (Map<CharSequence, Object>) fieldValue); } else if (field.schema().getType() == Schema.Type.RECORD) { Map<CharSequence, Object> keyAsColumnValues = new HashMap<CharSequence, Object>(); IndexedRecord avroRecord = (IndexedRecord) fieldValue; for (Field avroRecordField : avroRecord.getSchema().getFields()) { keyAsColumnValues.put(avroRecordField.name(), avroRecord.get(avroRecordField.pos())); } return keyAsColumnValues; } else { throw new ValidationException( "Only MAP or RECORD type valid for keyAsColumn fields. Found " + field.schema().getType()); } }
Example #21
Source File: AvroEntityComposer.java From kite with Apache License 2.0 | 5 votes |
@Override public Object extractField(E entity, String fieldName) { // make sure the field is a direct child of the schema ValidationException.check( accessor.getReadSchema().getField(fieldName) != null, "No field named %s in schema %s", fieldName, accessor.getReadSchema()); return accessor.get(entity, fieldName); }
Example #22
Source File: TestConstraints.java From kite with Apache License 2.0 | 5 votes |
@Test public void testRejectsNonSchemaOrPartitionFields() { TestHelpers.assertThrows("Should reject unknown field name", ValidationException.class, new Runnable() { @Override public void run() { emptyConstraints.with("prescription", 34); } }); }
Example #23
Source File: TestPartitionStrategyParser.java From kite with Apache License 2.0 | 5 votes |
@Test public void testDateFormat() { checkParser(new PartitionStrategy.Builder() .dateFormat("time", "date", "yyyyMMdd") .build(), "[ {\"type\": \"dateFormat\", " + "\"source\": \"time\", " + "\"name\": \"date\", " + "\"format\": \"yyyyMMdd\"} ]"); TestHelpers.assertThrows("Should reject missing format", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"dateFormat\", " + "\"source\": \"time\", " + "\"name\": \"date\"} ]"); } } ); TestHelpers.assertThrows("Should reject missing name", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"dateFormat\", " + "\"source\": \"time\", " + "\"format\": \"yyyyMMdd\"} ]"); } } ); }
Example #24
Source File: TestPartitionStrategyParser.java From kite with Apache License 2.0 | 5 votes |
@Test public void testFixedSizedRange() { checkParser(new PartitionStrategy.Builder().fixedSizeRange("id", 64).build(), "[ {\"type\": \"range\", \"source\": \"id\", \"size\": 64} ]"); checkParser(new PartitionStrategy.Builder().fixedSizeRange("id", "rng", 64).build(), "[ {\"type\": \"range\", " + "\"source\": \"id\", " + "\"name\": \"rng\", " + "\"size\": 64} ]" ); TestHelpers.assertThrows("Should reject missing size", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"range\", " + "\"source\": \"id\", " + "\"name\": \"rng\"} ]"); } } ); TestHelpers.assertThrows("Should reject invalid size", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"range\", " + "\"source\": \"id\", " + "\"name\": \"rng\", " + "\"size\": \"green\"} ]"); } } ); }
Example #25
Source File: TestPartitionStrategyParser.java From kite with Apache License 2.0 | 5 votes |
@Test public void testHash() { checkParser(new PartitionStrategy.Builder().hash("id", 64).build(), "[ {\"type\": \"hash\", \"source\": \"id\", \"buckets\": 64} ]"); checkParser(new PartitionStrategy.Builder().hash("id", "h", 64).build(), "[ {\"type\": \"hash\", " + "\"source\": \"id\", " + "\"name\": \"h\", " + "\"buckets\": 64} ]" ); TestHelpers.assertThrows("Should reject missing buckets", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"hash\", " + "\"source\": \"id\", " + "\"name\": \"h\"} ]"); } } ); TestHelpers.assertThrows("Should reject invalid buckets", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"hash\", " + "\"source\": \"id\", " + "\"name\": \"h\", " + "\"buckets\": \"green\"} ]"); } } ); }
Example #26
Source File: TestColumnMappingParser.java From kite with Apache License 2.0 | 5 votes |
@Test public void testInvalidJson() { TestHelpers.assertThrows("Should reject bad JSON", ValidationException.class, new Runnable() { @Override public void run() { ColumnMappingParser.parse("[ {\"type\", \"key\", \"source\": \"banana\"} ]"); } } ); }
Example #27
Source File: TestColumnMappingParser.java From kite with Apache License 2.0 | 5 votes |
@Test public void testNonRecordMapping() { TestHelpers.assertThrows("Should reject JSON string mapping", ValidationException.class, new Runnable() { @Override public void run() { ColumnMappingParser.parse("[ " + "{\"type\": \"key\", \"source\": \"id\"}," + "\"cheese!\"" + " ]"); } } ); TestHelpers.assertThrows("Should reject JSON number mapping", ValidationException.class, new Runnable() { @Override public void run() { ColumnMappingParser.parse("[ " + "{\"type\": \"key\", \"source\": \"id\"}," + "34" + " ]"); } } ); TestHelpers.assertThrows("Should reject JSON array mapping", ValidationException.class, new Runnable() { @Override public void run() { ColumnMappingParser.parse("[ " + "{\"type\": \"key\", \"source\": \"id\"}," + "[ 1, 2, 3 ]" + " ]"); } } ); }
Example #28
Source File: TestColumnMappingParser.java From kite with Apache License 2.0 | 5 votes |
@Test public void testMissingType() { TestHelpers.assertThrows("Should reject missing mapping type", ValidationException.class, new Runnable() { @Override public void run() { ColumnMappingParser.parse("[ {\"source\": \"banana\"} ]"); } } ); }
Example #29
Source File: TestColumnMappingParser.java From kite with Apache License 2.0 | 5 votes |
@Test public void testUnknownType() { TestHelpers.assertThrows("Should reject unknown mapping type", ValidationException.class, new Runnable() { @Override public void run() { ColumnMappingParser.parse("[ {\"type\": \"cats\", \"source\": \"banana\"} ]"); } } ); }
Example #30
Source File: TestColumnMappingParser.java From kite with Apache License 2.0 | 5 votes |
@Test public void testJsonObject() { TestHelpers.assertThrows("Should reject non-array strategy", ValidationException.class, new Runnable() { @Override public void run() { ColumnMappingParser.parse("{\"type\": \"year\", \"source\": \"banana\"}"); } } ); }