Java Code Examples for org.apache.avro.generic.GenericData#Record
The following examples show how to use
org.apache.avro.generic.GenericData#Record .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestJsonUtil.java From kite with Apache License 2.0 | 6 votes |
@Test public void testSchemaInferenceMap() throws Exception { Schema recordSchema = SchemaBuilder.record("Test").fields() .requiredString("aString") .name("aMap").type().map().values().stringType().noDefault() .endRecord(); String jsonSample = "{" + "\"aString\": \"triangle\"," + "\"aMap\": { \"left\": \"timid\", \"right\": \"dictionary\" }" + "}"; JsonNode datum = JsonUtil.parse(jsonSample); Assert.assertEquals("Should produce expected schema", recordSchema, JsonUtil.inferSchemaWithMaps(datum, "Test")); Map<String, Object> aMap = Maps.newLinkedHashMap(); aMap.put("left", "timid"); aMap.put("right", "dictionary"); GenericData.Record expected = new GenericData.Record(recordSchema); expected.put("aString", "triangle"); expected.put("aMap", aMap); Assert.assertEquals("Should convert to record", expected, convertGeneric(datum, recordSchema)); }
Example 2
Source File: TestTransformCommandCluster.java From kite with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("unchecked") public void testCopyWithNumWriters() throws Exception { Assume.assumeTrue(setLocalReducerMax(getConfiguration(), 3)); command.repoURI = repoUri; command.numWriters = 3; command.datasets = Lists.newArrayList(source, dest); int rc = command.run(); Assert.assertEquals("Should return success", 0, rc); DatasetRepository repo = DatasetRepositories.repositoryFor("repo:" + repoUri); FileSystemDataset<GenericData.Record> ds = (FileSystemDataset<GenericData.Record>) repo.<GenericData.Record> load("default", dest); int size = DatasetTestUtilities.datasetSize(ds); Assert.assertEquals("Should contain copied records", 6, size); Assert.assertEquals("Should produce 3 files", 3, Iterators.size(ds.pathIterator())); verify(console).info("Added {} records to \"{}\"", 6l, dest); verifyNoMoreInteractions(console); }
Example 3
Source File: AggregateCombineFnTest.java From components with Apache License 2.0 | 6 votes |
@Test public void MinDoubleAccumulatorFnTest() { List<GenericData.Record> testData = genRecords(Arrays.asList(3.3, 2.2, 10.10, 1.1, 5.5), AvroUtils._double()); AggregateCombineFn.MinDoubleAccumulatorFn fn1 = new AggregateCombineFn.MinDoubleAccumulatorFn(); fn1.createAccumulator(); AggregateCombineFn.MinDoubleAccumulatorFn fn2 = new AggregateCombineFn.MinDoubleAccumulatorFn(); fn2.createAccumulator(); AggregateCombineFn.MinDoubleAccumulatorFn fn3 = new AggregateCombineFn.MinDoubleAccumulatorFn(); fn3.createAccumulator(); double delta = 0.0; fn1.addInput(testData.get(0)); Assert.assertEquals(3.3, fn1.extractOutput(), delta); fn1.addInput(testData.get(1)); Assert.assertEquals(2.2, fn1.getAccumulators(), delta); fn2.addInput(testData.get(2)); fn2.addInput(testData.get(3)); Assert.assertEquals(1.1, fn2.extractOutput(), delta); fn3.addInput(testData.get(4)); Assert.assertEquals(5.5, fn3.extractOutput(), delta); fn1.mergeAccumulators(Arrays.asList(fn2.getAccumulators(), fn3.getAccumulators())); Assert.assertEquals(1.1, fn1.extractOutput(), delta); }
Example 4
Source File: ValueReaders.java From iceberg with Apache License 2.0 | 5 votes |
@Override protected GenericData.Record reuseOrCreate(Object reuse) { if (reuse instanceof GenericData.Record) { return (GenericData.Record) reuse; } else { return new GenericData.Record(recordSchema); } }
Example 5
Source File: MarketoInputReaderTest.java From components with Apache License 2.0 | 5 votes |
@Test public void testStart() throws Exception { MarketoRecordResult mkto = new MarketoRecordResult(); mkto.setSuccess(false); mkto.setErrors(Arrays.asList(new MarketoError("REST", "error"))); when(client.bulkImport(any())).thenReturn(mkto); when(client.getLead(any(), any())).thenReturn(mkto); when(client.getMultipleLeads(any(), any())).thenReturn(mkto); when(client.getLeadActivity(any(), any())).thenReturn(mkto); when(client.getLeadChanges(any(), any())).thenReturn(mkto); when(client.describeCustomObject(any())).thenReturn(mkto); when(client.listCustomObjects(any())).thenReturn(mkto); when(client.getCustomObjects(any(), any())).thenReturn(mkto); try { assertFalse(reader.start()); fail("Should not be here"); } catch (Exception e) { } IndexedRecord record = new GenericData.Record(MarketoConstants.getEmptySchema()); mkto.setSuccess(true); mkto.setRecords(Arrays.asList(record)); when(client.bulkImport(any())).thenReturn(mkto); when(client.getLead(any(), any())).thenReturn(mkto); when(client.getMultipleLeads(any(), any())).thenReturn(mkto); when(client.getLeadActivity(any(), any())).thenReturn(mkto); when(client.getLeadChanges(any(), any())).thenReturn(mkto); when(client.describeCustomObject(any())).thenReturn(mkto); when(client.listCustomObjects(any())).thenReturn(mkto); when(client.getCustomObjects(any(), any())).thenReturn(mkto); assertFalse(reader.start()); }
Example 6
Source File: SchemaRegistryProducer.java From blog with MIT License | 5 votes |
public static void main(String[] args) { /** TODO: 使用 Avro 解析默认 */ Schema.Parser parser = new Schema.Parser(); Schema schema = parser.parse(userSchema); /** TODO: 设置 Producer 属性 */ Properties properties = new Properties(); /** TODO: 设置 Kafka 服务地址 */ properties.put( ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "node-160:9092,node-161:9092,node-162:9092"); /** TODO: 设置 Key 序列化类 */ properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); /** TODO: 设置 Value 序列化类 */ properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaAvroSerializer.class); /** TODO: 设置 schema.registry */ properties.put("schema.registry.url", "http://node-160:8081"); /** TODO: 创建 Producer */ KafkaProducer<Object, Object> producer = new KafkaProducer<>(properties); /** TODO: 创建消息 */ GenericData.Record record = new GenericData.Record(schema); record.put("name", "hvkcoder"); producer.send(new ProducerRecord<>("topic01", record)); /** TODO: 关闭 Producer */ producer.close(); }
Example 7
Source File: NetSuiteOutputWriterIT.java From components with Apache License 2.0 | 5 votes |
private static List<IndexedRecord> makeRecordRefIndexedRecords(Schema schema, List<RecordRef> refList) { List<IndexedRecord> indexedRecordList = new ArrayList<>(refList.size()); for (RecordRef ref : refList) { GenericRecord indexedRecord = new GenericData.Record(schema); indexedRecord.put("InternalId", ref.getInternalId()); indexedRecordList.add(indexedRecord); } return indexedRecordList; }
Example 8
Source File: RegressionAdmmTrain.java From ml-ease with Apache License 2.0 | 5 votes |
private void updateLogLikBestModel(JobConf conf, int niter, Map<String, LinearModel> z, String testPath, boolean ignoreValue, MutableFloat bestTestLoglik, String outBasePath, int numClickReplicates) throws IOException { Map<String, Double> loglik; loglik = testloglik(conf, z, testPath, 1, ignoreValue); AvroHdfsFileWriter<GenericRecord> writer = new AvroHdfsFileWriter<GenericRecord>(conf, outBasePath + "/sample-test-loglik/iteration-"+niter +".avro", SampleTestLoglik.SCHEMA$); DataFileWriter<GenericRecord> testRecordWriter = writer.get(); for (String k : z.keySet()) { GenericData.Record valuemap = new GenericData.Record(SampleTestLoglik.SCHEMA$); valuemap.put("iter", niter); valuemap.put("testLoglik", loglik.get(k).floatValue()); valuemap.put("lambda", k); testRecordWriter.append(valuemap); _logger.info("Sample test loglik for lambda=" + k + " is: " + String.valueOf(loglik.get(k))); // output best model up to now if (loglik.get(k) > bestTestLoglik.floatValue() && niter>0) { String bestModelPath = outBasePath + "/best-model/best-iteration-" + niter + ".avro"; FileSystem fs = FileSystem.get(conf); fs.delete(new Path(outBasePath + "/best-model"), true); LinearModelUtils.writeLinearModel(conf, bestModelPath, z.get(k), k); bestTestLoglik.setValue(loglik.get(k).floatValue()); } } testRecordWriter.close(); }
Example 9
Source File: MapTypeClusterIntegrationTest.java From incubator-pinot with Apache License 2.0 | 5 votes |
private File createAvroFile() throws Exception { org.apache.avro.Schema avroSchema = org.apache.avro.Schema.createRecord("myRecord", null, null, false); org.apache.avro.Schema stringKeyMapAvroSchema = org.apache.avro.Schema.createMap(org.apache.avro.Schema.create(Type.INT)); org.apache.avro.Schema intKeyMapAvroSchema = org.apache.avro.Schema.createMap(org.apache.avro.Schema.create(Type.STRING)); List<Field> fields = Arrays.asList(new Field(STRING_KEY_MAP_FIELD_NAME, stringKeyMapAvroSchema, null, null), new Field(INT_KEY_MAP_FIELD_NAME, intKeyMapAvroSchema, null, null)); avroSchema.setFields(fields); File avroFile = new File(_tempDir, "data.avro"); try (DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(new GenericDatumWriter<>(avroSchema))) { fileWriter.create(avroSchema, avroFile); for (int i = 0; i < NUM_DOCS; i++) { Map<String, Integer> stringKeyMap = new HashMap<>(); stringKeyMap.put("k1", i); stringKeyMap.put("k2", NUM_DOCS + i); Map<Integer, String> intKeyMap = new HashMap<>(); intKeyMap.put(95, Integer.toString(i)); intKeyMap.put(717, Integer.toString(NUM_DOCS + i)); GenericData.Record record = new GenericData.Record(avroSchema); record.put(STRING_KEY_MAP_FIELD_NAME, stringKeyMap); record.put(INT_KEY_MAP_FIELD_NAME, intKeyMap); fileWriter.append(record); } } return avroFile; }
Example 10
Source File: ParquetFileReader.java From kafka-connect-fs with Apache License 2.0 | 5 votes |
@Override protected GenericRecord nextRecord() { GenericRecord record; if (this.projection != null) { record = new GenericData.Record(this.projection); this.projection.getFields().forEach(field -> record.put(field.name(), currentRecord.get(field.name()))); } else { record = currentRecord; } currentRecord = null; incrementOffset(); return record; }
Example 11
Source File: ClusterIntegrationTestUtils.java From incubator-pinot with Apache License 2.0 | 4 votes |
/** * Push random generated * * @param avroFile Sample Avro file used to extract the Avro schema * @param kafkaBroker Kafka broker config * @param kafkaTopic Kafka topic * @param numKafkaMessagesToPush Number of Kafka messages to push * @param maxNumKafkaMessagesPerBatch Maximum number of Kafka messages per batch * @param header Optional Kafka message header * @param partitionColumn Optional partition column * @throws Exception */ @SuppressWarnings("unused") public static void pushRandomAvroIntoKafka(File avroFile, String kafkaBroker, String kafkaTopic, int numKafkaMessagesToPush, int maxNumKafkaMessagesPerBatch, @Nullable byte[] header, @Nullable String partitionColumn) throws Exception { Properties properties = new Properties(); properties.put("metadata.broker.list", kafkaBroker); properties.put("serializer.class", "kafka.serializer.DefaultEncoder"); properties.put("request.required.acks", "1"); properties.put("partitioner.class", "kafka.producer.ByteArrayPartitioner"); StreamDataProducer producer = StreamDataProvider.getStreamDataProducer(KafkaStarterUtils.KAFKA_PRODUCER_CLASS_NAME, properties); try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536)) { try (DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile)) { BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null); Schema avroSchema = reader.getSchema(); GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(avroSchema); GenericRecord genericRecord = new GenericData.Record(avroSchema); while (numKafkaMessagesToPush > 0) { generateRandomRecord(genericRecord, avroSchema); outputStream.reset(); if (header != null && 0 < header.length) { outputStream.write(header); } datumWriter.write(genericRecord, binaryEncoder); binaryEncoder.flush(); byte[] keyBytes = (partitionColumn == null) ? Longs.toByteArray(System.currentTimeMillis()) : (genericRecord.get(partitionColumn)).toString().getBytes(); byte[] bytes = outputStream.toByteArray(); producer.produce(kafkaTopic, keyBytes, bytes); numKafkaMessagesToPush--; } } } }
Example 12
Source File: TestMergeContent.java From nifi with Apache License 2.0 | 4 votes |
@Test public void testAvroConcatWithDifferentMetadataDoNotMerge() throws IOException, InterruptedException { final TestRunner runner = TestRunners.newTestRunner(new MergeContent()); runner.setProperty(MergeContent.MAX_ENTRIES, "3"); runner.setProperty(MergeContent.MIN_ENTRIES, "3"); runner.setProperty(MergeContent.MERGE_FORMAT, MergeContent.MERGE_FORMAT_AVRO); runner.setProperty(MergeContent.METADATA_STRATEGY, MergeContent.METADATA_STRATEGY_DO_NOT_MERGE); final Schema schema = new Schema.Parser().parse(new File("src/test/resources/TestMergeContent/user.avsc")); final GenericRecord user1 = new GenericData.Record(schema); user1.put("name", "Alyssa"); user1.put("favorite_number", 256); final Map<String, String> userMeta1 = new HashMap<String, String>() {{ put("test_metadata1", "Test 1"); }}; final GenericRecord user2 = new GenericData.Record(schema); user2.put("name", "Ben"); user2.put("favorite_number", 7); user2.put("favorite_color", "red"); final Map<String, String> userMeta2 = new HashMap<String, String>() {{ put("test_metadata1", "Test 2"); // Test non-matching values }}; final GenericRecord user3 = new GenericData.Record(schema); user3.put("name", "John"); user3.put("favorite_number", 5); user3.put("favorite_color", "blue"); final Map<String, String> userMeta3 = new HashMap<String, String>() {{ put("test_metadata1", "Test 1"); put("test_metadata2", "Test"); // Test unique }}; final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); final ByteArrayOutputStream out1 = serializeAvroRecord(schema, user1, datumWriter, userMeta1); final ByteArrayOutputStream out2 = serializeAvroRecord(schema, user2, datumWriter, userMeta2); final ByteArrayOutputStream out3 = serializeAvroRecord(schema, user3, datumWriter, userMeta3); runner.enqueue(out1.toByteArray()); runner.enqueue(out2.toByteArray()); runner.enqueue(out3.toByteArray()); runner.run(); runner.assertQueueEmpty(); runner.assertTransferCount(MergeContent.REL_MERGED, 1); runner.assertTransferCount(MergeContent.REL_FAILURE, 2); runner.assertTransferCount(MergeContent.REL_ORIGINAL, 3); final MockFlowFile bundle = runner.getFlowFilesForRelationship(MergeContent.REL_MERGED).get(0); bundle.assertAttributeEquals(CoreAttributes.MIME_TYPE.key(), "application/avro-binary"); // create a reader for the merged content byte[] data = runner.getContentAsByteArray(bundle); final Map<String, GenericRecord> users = getGenericRecordMap(data, schema, "name"); Assert.assertEquals(1, users.size()); Assert.assertTrue(users.containsKey("Alyssa")); }
Example 13
Source File: JDBCSPTestIT.java From components with Apache License 2.0 | 4 votes |
@SuppressWarnings("rawtypes") @Test public void test_basic_as_output_and_input() throws Exception { TJDBCSPDefinition definition = new TJDBCSPDefinition(); TJDBCSPProperties properties = DBTestUtils.createCommonJDBCSPProperties(allSetting, definition); properties.spName.setValue("SYSCS_UTIL.SYSCS_DISABLE_LOG_ARCHIVE_MODE"); Schema schema = DBTestUtils.createSPSchema3(tablename); properties.main.schema.setValue(schema); properties.schemaFlow.schema.setValue(schema); properties.spParameterTable.parameterTypes.setValue(Arrays.asList(SPParameterTable.ParameterType.IN.name())); properties.spParameterTable.schemaColumns.setValue(Arrays.asList("PARAMETER1")); JDBCSPSink sink = new JDBCSPSink(); sink.initialize(null, properties); ValidationResult result = sink.validate(null); Assert.assertTrue(result.getStatus() == ValidationResult.Result.OK); WriteOperation operation = sink.createWriteOperation(); JDBCSPWriter writer = (JDBCSPWriter) operation.createWriter(null); try { writer.open("wid"); IndexedRecord r1 = new GenericData.Record(properties.main.schema.getValue()); r1.put(0, 0); r1.put(1, "wangwei"); writer.write(r1); List<IndexedRecord> writeResult = writer.getSuccessfulWrites(); Assert.assertEquals(1, writeResult.size()); IndexedRecord record = writeResult.get(0); Assert.assertEquals(Integer.valueOf(0), record.get(0)); Assert.assertEquals("wangwei", record.get(1)); writer.cleanWrites(); } finally { writer.close(); } }
Example 14
Source File: SalesforceWriter.java From components with Apache License 2.0 | 4 votes |
private void handleReject(IndexedRecord input, Error[] resultErrors, String[] changedItemKeys, int batchIdx) throws IOException { String changedItemKey = null; if (batchIdx < changedItemKeys.length) { if (changedItemKeys[batchIdx] != null) { changedItemKey = changedItemKeys[batchIdx]; } else { changedItemKey = String.valueOf(batchIdx + 1); } } else { changedItemKey = "Batch index out of bounds"; } StringBuilder errors = SalesforceRuntime.addLog(resultErrors, changedItemKey, logWriter); if (exceptionForErrors) { if (errors.toString().length() > 0) { if (logWriter != null) { logWriter.close(); } throw new IOException(errors.toString()); } } else { rejectCount++; Schema outSchema = sprops.schemaReject.schema.getValue(); if (outSchema == null || outSchema.getFields().size() == 0) { return; } if (input.getSchema().equals(outSchema)) { rejectedWrites.add(input); } else { IndexedRecord reject = null; if (AvroUtils.isIncludeAllFields(outSchema)) { Schema runtimeSchema = input.getSchema(); List<Schema.Field> addedFields = new ArrayList<>(); // Check whether design schema has additional field Schema.Field errorCodeField = outSchema.getField(TSalesforceOutputProperties.FIELD_ERROR_CODE); Schema.Field errorField = outSchema.getField(TSalesforceOutputProperties.FIELD_ERROR_FIELDS); Schema.Field errorMsgField = outSchema.getField(TSalesforceOutputProperties.FIELD_ERROR_MESSAGE); if (errorCodeField != null) { addedFields.add(new Schema.Field(errorCodeField.name(), errorCodeField.schema(), errorCodeField.doc(), errorCodeField.defaultVal())); } if (errorField != null) { addedFields.add(new Schema.Field(errorField.name(), errorField.schema(), errorField.doc(), errorField.defaultVal())); } if (errorMsgField != null) { addedFields.add(new Schema.Field(errorMsgField.name(), errorMsgField.schema(), errorMsgField.doc(), errorMsgField.defaultVal())); } if (addedFields.size() > 0) { // Append additional fields to the runtime schema runtimeSchema = AvroUtils.appendFields(runtimeSchema, addedFields.toArray(new Schema.Field[addedFields.size()])); } reject = new GenericData.Record(runtimeSchema); } else { reject = new GenericData.Record(outSchema); } for (Schema.Field outField : reject.getSchema().getFields()) { Object outValue = null; Schema.Field inField = input.getSchema().getField(outField.name()); if (inField != null) { outValue = input.get(inField.pos()); } else if (resultErrors.length > 0) { Error error = resultErrors[0]; if (TSalesforceOutputProperties.FIELD_ERROR_CODE.equals(outField.name())) { outValue = error.getStatusCode() != null ? error.getStatusCode().toString() : null; } else if (TSalesforceOutputProperties.FIELD_ERROR_FIELDS.equals(outField.name())) { StringBuffer fields = new StringBuffer(); for (String field : error.getFields()) { fields.append(field); fields.append(","); } if (fields.length() > 0) { fields.deleteCharAt(fields.length() - 1); } outValue = fields.toString(); } else if (TSalesforceOutputProperties.FIELD_ERROR_MESSAGE.equals(outField.name())) { outValue = error.getMessage(); } } reject.put(outField.pos(), outValue); } rejectedWrites.add(reject); } Property<OutputAction> outputAction = sprops.outputAction; LOGGER.info(MESSAGES.getMessage("info.rejectedRecord", sprops.outputAction.getPossibleValuesDisplayName(outputAction.getValue()).toLowerCase(), dataCount)); } }
Example 15
Source File: ConverterTest.java From xml-avro with Apache License 2.0 | 4 votes |
@Test public void recordWithWildcardField() { String xsd = "<xs:schema xmlns:xs='http://www.w3.org/2001/XMLSchema'>" + " <xs:complexType name='type'>" + " <xs:sequence>" + " <xs:element name='field' type='xs:string'/>" + " <xs:any/>" + " </xs:sequence>" + " </xs:complexType>" + " <xs:element name='root' type='type'/>" + "</xs:schema>"; Schema schema = Converter.createSchema(xsd); assertEquals(2, schema.getFields().size()); Schema.Field wildcardField = schema.getField(Source.WILDCARD); assertEquals(Schema.Type.MAP, wildcardField.schema().getType()); // Two wildcard-matched elements String xml = "<root>" + " <field>field</field>" + " <field0>field0</field0>" + " <field1>field1</field1>" + "</root>"; GenericData.Record record = Converter.createDatum(schema, xml); assertEquals("field", record.get("field")); @SuppressWarnings("unchecked") java.util.Map<String, String> map = (java.util.Map<String, String>) record.get(Source.WILDCARD); assertEquals(2, map.size()); assertEquals("field0", map.get("field0")); assertEquals("field1", map.get("field1")); // No wildcard-matched element xml = "<root><field>field</field></root>"; record = Converter.createDatum(schema, xml); assertEquals("field", record.get("field")); assertEquals(Collections.emptyMap(), record.get(Source.WILDCARD)); }
Example 16
Source File: SimpleAvroJob.java From datafu with Apache License 2.0 | 4 votes |
public TheMapper() { key = new GenericData.Record(KEY_SCHEMA); value = new GenericData.Record(VALUE_SCHEMA); value.put("count", 1L); }
Example 17
Source File: FastGenericDeserializerGeneratorTest.java From avro-util with BSD 2-Clause "Simplified" License | 4 votes |
@Test(groups = {"deserializationTest"}, dataProvider = "Implementation") public void shouldReadArrayOfRecords(Implementation implementation) { // given Schema recordSchema = createRecord("record", createPrimitiveUnionFieldSchema("field", Schema.Type.STRING)); Schema arrayRecordSchema = Schema.createArray(recordSchema); GenericData.Record subRecordBuilder = new GenericData.Record(recordSchema); subRecordBuilder.put("field", "abc"); GenericData.Array<GenericData.Record> recordsArray = new GenericData.Array<>(0, arrayRecordSchema); recordsArray.add(subRecordBuilder); recordsArray.add(subRecordBuilder); // when GenericData.Array<GenericRecord> array = implementation.decode(arrayRecordSchema, arrayRecordSchema, genericDataAsDecoder(recordsArray)); // then Assert.assertEquals(2, array.size()); Assert.assertEquals(new Utf8("abc"), array.get(0).get("field")); Assert.assertEquals(new Utf8("abc"), array.get(1).get("field")); // given arrayRecordSchema = Schema.createArray(createUnionSchema(recordSchema)); subRecordBuilder = new GenericData.Record(recordSchema); subRecordBuilder.put("field", "abc"); recordsArray = new GenericData.Array<>(0, arrayRecordSchema); recordsArray.add(subRecordBuilder); recordsArray.add(subRecordBuilder); // when array = implementation.decode(arrayRecordSchema, arrayRecordSchema, genericDataAsDecoder(recordsArray)); // then Assert.assertEquals(2, array.size()); Assert.assertEquals(new Utf8("abc"), array.get(0).get("field")); Assert.assertEquals(new Utf8("abc"), array.get(1).get("field")); }
Example 18
Source File: ItemModelTrain.java From ml-ease with Apache License 2.0 | 4 votes |
@Override public void reduce(Utf8 key, Iterable<RegressionPrepareOutput> values, AvroCollector<GenericData.Record> collector, Reporter reporter) throws IOException { // Prepare the data set LibLinearDataset dataset; if (_binaryFeature) { dataset = new LibLinearBinaryDataset(1.0, _shortFeatureIndex); } else { dataset = new LibLinearDataset(1.0); } for (RegressionPrepareOutput value : values) { dataset.addInstanceAvro(value); } dataset.finish(); // First determine the prior mean for the intercept Map<String, Double> priorMeanMap = new HashMap<String, Double>(); double interceptPriorMean = _interceptDefaultPriorMean; if (_interceptPriorMeanMap.containsKey(key.toString())) { interceptPriorMean = _interceptPriorMeanMap.get(key.toString()); reporter.incrCounter("ItemModelTrainV3", "Found intercept prior mean in intercept prior mean map", 1); } priorMeanMap.put(LibLinearDataset.INTERCEPT_NAME, interceptPriorMean); // now cross product the lambdas for intercept and default for (float interceptLambda : _interceptLambdas) for (float defaultLambda : _defaultLambdas) { _priorVarMap.put(LibLinearDataset.INTERCEPT_NAME, 1.0/interceptLambda); GenericData.Record output = new GenericData.Record(LinearModelWithVarAvro.SCHEMA$); // Run liblinear LibLinear liblinear = new LibLinear(); liblinear.setReporter(reporter, _reportfreq); String option = "epsilon=" + String.valueOf(_liblinearEpsilon); try { liblinear.train(dataset, null, priorMeanMap, _priorVarMap, 0, 1.0 / defaultLambda, option, _computeVar); LinearModel model = liblinear.getLinearModel(); output.put("key", String.valueOf(interceptLambda) + ":" + String.valueOf(defaultLambda)+ "#" + key); output.put("model", model.toAvro(LIBLINEAR_INTERCEPT_KEY)); if (_computeVar) { LinearModel posteriorVar = new LinearModel(LIBLINEAR_INTERCEPT_KEY,liblinear.getPostVarMap()); output.put("posteriorVar", posteriorVar.toAvro(LIBLINEAR_INTERCEPT_KEY)); } else { output.put("posteriorVar", new LinearModel().toAvro(LIBLINEAR_INTERCEPT_KEY)); } } catch (Exception e) { // output everything to debug _logger.info("Dataset size=" + dataset.y.length); _logger.info("Number of features=" + dataset.nFeatures()); _logger.info("Model size=" + liblinear.getParamMap().size()); _logger.info("bias=" + liblinear.bias); _logger.info("Model:"); for (String k : liblinear.getParamMap().keySet()) { _logger.info(k + " " + liblinear.getParamMap().get(k).toString()); } throw new IOException("Model fitting error!", e); } collector.collect(output); } }
Example 19
Source File: FastGenericDeserializerGeneratorTest.java From avro-util with BSD 2-Clause "Simplified" License | 4 votes |
@Test(groups = {"deserializationTest"}, dataProvider = "Implementation") public void shouldReadSubRecordComplexCollectionsField(Implementation implementation) { // given Schema subRecordSchema = createRecord("subRecord", createPrimitiveUnionFieldSchema("subField", Schema.Type.STRING)); Schema recordSchema = createRecord( createArrayFieldSchema("recordsArrayMap", Schema.createMap(createUnionSchema(subRecordSchema))), createMapFieldSchema("recordsMapArray", Schema.createArray(createUnionSchema(subRecordSchema))), createUnionField("recordsArrayMapUnion", Schema.createArray(Schema.createMap(createUnionSchema(subRecordSchema)))), createUnionField("recordsMapArrayUnion", Schema.createMap(Schema.createArray(createUnionSchema(subRecordSchema))))); GenericData.Record subRecordBuilder = new GenericData.Record(subRecordSchema); subRecordBuilder.put("subField", "abc"); GenericData.Record builder = new GenericData.Record(recordSchema); List<Map<String, GenericRecord>> recordsArrayMap = new ArrayList<>(); Map<String, GenericRecord> recordMap = new HashMap<>(); recordMap.put("1", subRecordBuilder); recordsArrayMap.add(recordMap); builder.put("recordsArrayMap", recordsArrayMap); builder.put("recordsArrayMapUnion", recordsArrayMap); Map<String, List<GenericRecord>> recordsMapArray = new HashMap<>(); List<GenericRecord> recordList = new ArrayList<>(); recordList.add(subRecordBuilder); recordsMapArray.put("1", recordList); builder.put("recordsMapArray", recordsMapArray); builder.put("recordsMapArrayUnion", recordsMapArray); // when GenericRecord record = implementation.decode(recordSchema, recordSchema, genericDataAsDecoder(builder)); // then Assert.assertEquals(new Utf8("abc"), ((List<Map<String, GenericRecord>>) record.get("recordsArrayMap")).get(0).get(new Utf8("1")).get("subField")); Assert.assertEquals(new Utf8("abc"), ((Map<String, List<GenericRecord>>) record.get("recordsMapArray")).get(new Utf8("1")).get(0).get("subField")); Assert.assertEquals(new Utf8("abc"), ((List<Map<String, GenericRecord>>) record.get("recordsArrayMapUnion")).get(0) .get(new Utf8("1")) .get("subField")); Assert.assertEquals(new Utf8("abc"), ((Map<String, List<GenericRecord>>) record.get("recordsMapArrayUnion")).get(new Utf8("1")) .get(0) .get("subField")); }
Example 20
Source File: TestMergeContent.java From nifi with Apache License 2.0 | 4 votes |
@Test public void testSimpleAvroConcat() throws IOException, InterruptedException { final TestRunner runner = TestRunners.newTestRunner(new MergeContent()); runner.setProperty(MergeContent.MAX_ENTRIES, "3"); runner.setProperty(MergeContent.MIN_ENTRIES, "3"); runner.setProperty(MergeContent.MERGE_FORMAT, MergeContent.MERGE_FORMAT_AVRO); final Schema schema = new Schema.Parser().parse(new File("src/test/resources/TestMergeContent/user.avsc")); final GenericRecord user1 = new GenericData.Record(schema); user1.put("name", "Alyssa"); user1.put("favorite_number", 256); final GenericRecord user2 = new GenericData.Record(schema); user2.put("name", "Ben"); user2.put("favorite_number", 7); user2.put("favorite_color", "red"); final GenericRecord user3 = new GenericData.Record(schema); user3.put("name", "John"); user3.put("favorite_number", 5); user3.put("favorite_color", "blue"); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); final ByteArrayOutputStream out1 = serializeAvroRecord(schema, user1, datumWriter); final ByteArrayOutputStream out2 = serializeAvroRecord(schema, user2, datumWriter); final ByteArrayOutputStream out3 = serializeAvroRecord(schema, user3, datumWriter); runner.enqueue(out1.toByteArray()); runner.enqueue(out2.toByteArray()); runner.enqueue(out3.toByteArray()); runner.run(); runner.assertQueueEmpty(); runner.assertTransferCount(MergeContent.REL_MERGED, 1); runner.assertTransferCount(MergeContent.REL_FAILURE, 0); runner.assertTransferCount(MergeContent.REL_ORIGINAL, 3); final MockFlowFile bundle = runner.getFlowFilesForRelationship(MergeContent.REL_MERGED).get(0); bundle.assertAttributeEquals(CoreAttributes.MIME_TYPE.key(), "application/avro-binary"); // create a reader for the merged content byte[] data = runner.getContentAsByteArray(bundle); final Map<String, GenericRecord> users = getGenericRecordMap(data, schema, "name"); Assert.assertEquals(3, users.size()); Assert.assertTrue(users.containsKey("Alyssa")); Assert.assertTrue(users.containsKey("Ben")); Assert.assertTrue(users.containsKey("John")); }