org.apache.avro.generic.GenericData#Record

Source File: TestJsonUtil.java From kite with Apache License 2.0

6 votes

@Test
public void testSchemaInferenceMap() throws Exception {
  Schema recordSchema = SchemaBuilder.record("Test").fields()
      .requiredString("aString")
      .name("aMap").type().map().values().stringType().noDefault()
      .endRecord();

  String jsonSample = "{" +
      "\"aString\": \"triangle\"," +
      "\"aMap\": { \"left\": \"timid\", \"right\": \"dictionary\" }" +
      "}";

  JsonNode datum = JsonUtil.parse(jsonSample);
  Assert.assertEquals("Should produce expected schema",
      recordSchema, JsonUtil.inferSchemaWithMaps(datum, "Test"));

  Map<String, Object> aMap = Maps.newLinkedHashMap();
  aMap.put("left", "timid");
  aMap.put("right", "dictionary");
  GenericData.Record expected = new GenericData.Record(recordSchema);
  expected.put("aString", "triangle");
  expected.put("aMap", aMap);
  Assert.assertEquals("Should convert to record",
      expected, convertGeneric(datum, recordSchema));
}

Source File: TestTransformCommandCluster.java From kite with Apache License 2.0

6 votes

@Test
@SuppressWarnings("unchecked")
public void testCopyWithNumWriters() throws Exception {
  Assume.assumeTrue(setLocalReducerMax(getConfiguration(), 3));

  command.repoURI = repoUri;
  command.numWriters = 3;
  command.datasets = Lists.newArrayList(source, dest);

  int rc = command.run();
  Assert.assertEquals("Should return success", 0, rc);

  DatasetRepository repo = DatasetRepositories.repositoryFor("repo:" + repoUri);
  FileSystemDataset<GenericData.Record> ds =
      (FileSystemDataset<GenericData.Record>) repo.<GenericData.Record>
          load("default", dest);
  int size = DatasetTestUtilities.datasetSize(ds);
  Assert.assertEquals("Should contain copied records", 6, size);

  Assert.assertEquals("Should produce 3 files",
      3, Iterators.size(ds.pathIterator()));

  verify(console).info("Added {} records to \"{}\"", 6l, dest);
  verifyNoMoreInteractions(console);
}

Source File: AggregateCombineFnTest.java From components with Apache License 2.0

6 votes

@Test
public void MinDoubleAccumulatorFnTest() {
    List<GenericData.Record> testData = genRecords(Arrays.asList(3.3, 2.2, 10.10, 1.1, 5.5), AvroUtils._double());

    AggregateCombineFn.MinDoubleAccumulatorFn fn1 = new AggregateCombineFn.MinDoubleAccumulatorFn();
    fn1.createAccumulator();
    AggregateCombineFn.MinDoubleAccumulatorFn fn2 = new AggregateCombineFn.MinDoubleAccumulatorFn();
    fn2.createAccumulator();
    AggregateCombineFn.MinDoubleAccumulatorFn fn3 = new AggregateCombineFn.MinDoubleAccumulatorFn();
    fn3.createAccumulator();
    double delta = 0.0;
    fn1.addInput(testData.get(0));
    Assert.assertEquals(3.3, fn1.extractOutput(), delta);
    fn1.addInput(testData.get(1));
    Assert.assertEquals(2.2, fn1.getAccumulators(), delta);

    fn2.addInput(testData.get(2));
    fn2.addInput(testData.get(3));
    Assert.assertEquals(1.1, fn2.extractOutput(), delta);

    fn3.addInput(testData.get(4));
    Assert.assertEquals(5.5, fn3.extractOutput(), delta);

    fn1.mergeAccumulators(Arrays.asList(fn2.getAccumulators(), fn3.getAccumulators()));
    Assert.assertEquals(1.1, fn1.extractOutput(), delta);
}

Source File: ValueReaders.java From iceberg with Apache License 2.0

5 votes

@Override
protected GenericData.Record reuseOrCreate(Object reuse) {
  if (reuse instanceof GenericData.Record) {
    return (GenericData.Record) reuse;
  } else {
    return new GenericData.Record(recordSchema);
  }
}

Source File: MarketoInputReaderTest.java From components with Apache License 2.0

5 votes

@Test
public void testStart() throws Exception {
    MarketoRecordResult mkto = new MarketoRecordResult();
    mkto.setSuccess(false);
    mkto.setErrors(Arrays.asList(new MarketoError("REST", "error")));

    when(client.bulkImport(any())).thenReturn(mkto);
    when(client.getLead(any(), any())).thenReturn(mkto);
    when(client.getMultipleLeads(any(), any())).thenReturn(mkto);
    when(client.getLeadActivity(any(), any())).thenReturn(mkto);
    when(client.getLeadChanges(any(), any())).thenReturn(mkto);
    when(client.describeCustomObject(any())).thenReturn(mkto);
    when(client.listCustomObjects(any())).thenReturn(mkto);
    when(client.getCustomObjects(any(), any())).thenReturn(mkto);

    try {
        assertFalse(reader.start());
        fail("Should not be here");
    } catch (Exception e) {
    }

    IndexedRecord record = new GenericData.Record(MarketoConstants.getEmptySchema());
    mkto.setSuccess(true);
    mkto.setRecords(Arrays.asList(record));

    when(client.bulkImport(any())).thenReturn(mkto);
    when(client.getLead(any(), any())).thenReturn(mkto);
    when(client.getMultipleLeads(any(), any())).thenReturn(mkto);
    when(client.getLeadActivity(any(), any())).thenReturn(mkto);
    when(client.getLeadChanges(any(), any())).thenReturn(mkto);
    when(client.describeCustomObject(any())).thenReturn(mkto);
    when(client.listCustomObjects(any())).thenReturn(mkto);
    when(client.getCustomObjects(any(), any())).thenReturn(mkto);

    assertFalse(reader.start());

}

Source File: SchemaRegistryProducer.java From blog with MIT License

5 votes

public static void main(String[] args) {

    /** TODO: 使用 Avro 解析默认 */
    Schema.Parser parser = new Schema.Parser();
    Schema schema = parser.parse(userSchema);

    /** TODO: 设置 Producer 属性 */
    Properties properties = new Properties();
    /** TODO: 设置 Kafka 服务地址 */
    properties.put(
        ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "node-160:9092,node-161:9092,node-162:9092");
    /** TODO: 设置 Key 序列化类 */
    properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    /** TODO: 设置 Value 序列化类 */
    properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaAvroSerializer.class);

    /** TODO: 设置 schema.registry */
    properties.put("schema.registry.url", "http://node-160:8081");

    /** TODO: 创建 Producer */
    KafkaProducer<Object, Object> producer = new KafkaProducer<>(properties);

    /** TODO: 创建消息 */
    GenericData.Record record = new GenericData.Record(schema);
    record.put("name", "hvkcoder");
    producer.send(new ProducerRecord<>("topic01", record));

    /** TODO: 关闭 Producer */
    producer.close();
  }

Source File: NetSuiteOutputWriterIT.java From components with Apache License 2.0

5 votes

private static List<IndexedRecord> makeRecordRefIndexedRecords(Schema schema, List<RecordRef> refList) {
    List<IndexedRecord> indexedRecordList = new ArrayList<>(refList.size());
    for (RecordRef ref : refList) {
        GenericRecord indexedRecord = new GenericData.Record(schema);
        indexedRecord.put("InternalId", ref.getInternalId());
        indexedRecordList.add(indexedRecord);
    }
    return indexedRecordList;
}

Source File: RegressionAdmmTrain.java From ml-ease with Apache License 2.0

5 votes

private void updateLogLikBestModel(JobConf conf, int niter,  Map<String, LinearModel> z, String testPath, 
                                  boolean ignoreValue, MutableFloat bestTestLoglik, String outBasePath, 
                                  int  numClickReplicates) throws IOException
{   
  Map<String, Double> loglik;
  loglik = testloglik(conf, z, testPath, 1, ignoreValue);
  
  AvroHdfsFileWriter<GenericRecord> writer =
      new AvroHdfsFileWriter<GenericRecord>(conf, outBasePath
          + "/sample-test-loglik/iteration-"+niter +".avro", SampleTestLoglik.SCHEMA$);
  DataFileWriter<GenericRecord> testRecordWriter = writer.get();  

  for (String k : z.keySet())
  {     
    GenericData.Record valuemap = new GenericData.Record(SampleTestLoglik.SCHEMA$);
    valuemap.put("iter", niter);
    valuemap.put("testLoglik", loglik.get(k).floatValue());
    valuemap.put("lambda", k);
    testRecordWriter.append(valuemap);
    _logger.info("Sample test loglik for lambda=" + k + " is: "
        + String.valueOf(loglik.get(k)));
   
    // output best model up to now
    if (loglik.get(k) > bestTestLoglik.floatValue() && niter>0)
    {
      String bestModelPath = outBasePath + "/best-model/best-iteration-" + niter + ".avro";
      FileSystem fs = FileSystem.get(conf);
      fs.delete(new Path(outBasePath + "/best-model"), true);
      LinearModelUtils.writeLinearModel(conf, bestModelPath, z.get(k), k);
      bestTestLoglik.setValue(loglik.get(k).floatValue());
    }
  }
  testRecordWriter.close();
}

Source File: MapTypeClusterIntegrationTest.java From incubator-pinot with Apache License 2.0

5 votes

private File createAvroFile()
    throws Exception {
  org.apache.avro.Schema avroSchema = org.apache.avro.Schema.createRecord("myRecord", null, null, false);
  org.apache.avro.Schema stringKeyMapAvroSchema =
      org.apache.avro.Schema.createMap(org.apache.avro.Schema.create(Type.INT));
  org.apache.avro.Schema intKeyMapAvroSchema =
      org.apache.avro.Schema.createMap(org.apache.avro.Schema.create(Type.STRING));
  List<Field> fields = Arrays.asList(new Field(STRING_KEY_MAP_FIELD_NAME, stringKeyMapAvroSchema, null, null),
      new Field(INT_KEY_MAP_FIELD_NAME, intKeyMapAvroSchema, null, null));
  avroSchema.setFields(fields);

  File avroFile = new File(_tempDir, "data.avro");
  try (DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(new GenericDatumWriter<>(avroSchema))) {
    fileWriter.create(avroSchema, avroFile);
    for (int i = 0; i < NUM_DOCS; i++) {
      Map<String, Integer> stringKeyMap = new HashMap<>();
      stringKeyMap.put("k1", i);
      stringKeyMap.put("k2", NUM_DOCS + i);
      Map<Integer, String> intKeyMap = new HashMap<>();
      intKeyMap.put(95, Integer.toString(i));
      intKeyMap.put(717, Integer.toString(NUM_DOCS + i));
      GenericData.Record record = new GenericData.Record(avroSchema);
      record.put(STRING_KEY_MAP_FIELD_NAME, stringKeyMap);
      record.put(INT_KEY_MAP_FIELD_NAME, intKeyMap);
      fileWriter.append(record);
    }
  }

  return avroFile;
}

Source File: ParquetFileReader.java From kafka-connect-fs with Apache License 2.0

5 votes

@Override
protected GenericRecord nextRecord() {
    GenericRecord record;
    if (this.projection != null) {
        record = new GenericData.Record(this.projection);
        this.projection.getFields().forEach(field -> record.put(field.name(), currentRecord.get(field.name())));
    } else {
        record = currentRecord;
    }
    currentRecord = null;
    incrementOffset();
    return record;
}

Source File: ClusterIntegrationTestUtils.java From incubator-pinot with Apache License 2.0

4 votes

/**
 * Push random generated
 *
 * @param avroFile Sample Avro file used to extract the Avro schema
 * @param kafkaBroker Kafka broker config
 * @param kafkaTopic Kafka topic
 * @param numKafkaMessagesToPush Number of Kafka messages to push
 * @param maxNumKafkaMessagesPerBatch Maximum number of Kafka messages per batch
 * @param header Optional Kafka message header
 * @param partitionColumn Optional partition column
 * @throws Exception
 */
@SuppressWarnings("unused")
public static void pushRandomAvroIntoKafka(File avroFile, String kafkaBroker, String kafkaTopic,
    int numKafkaMessagesToPush, int maxNumKafkaMessagesPerBatch, @Nullable byte[] header,
    @Nullable String partitionColumn)
    throws Exception {
  Properties properties = new Properties();
  properties.put("metadata.broker.list", kafkaBroker);
  properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
  properties.put("request.required.acks", "1");
  properties.put("partitioner.class", "kafka.producer.ByteArrayPartitioner");

  StreamDataProducer producer =
      StreamDataProvider.getStreamDataProducer(KafkaStarterUtils.KAFKA_PRODUCER_CLASS_NAME, properties);
  try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536)) {
    try (DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile)) {
      BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null);
      Schema avroSchema = reader.getSchema();
      GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(avroSchema);
      GenericRecord genericRecord = new GenericData.Record(avroSchema);

      while (numKafkaMessagesToPush > 0) {
        generateRandomRecord(genericRecord, avroSchema);

        outputStream.reset();
        if (header != null && 0 < header.length) {
          outputStream.write(header);
        }
        datumWriter.write(genericRecord, binaryEncoder);
        binaryEncoder.flush();

        byte[] keyBytes = (partitionColumn == null) ? Longs.toByteArray(System.currentTimeMillis())
            : (genericRecord.get(partitionColumn)).toString().getBytes();
        byte[] bytes = outputStream.toByteArray();

        producer.produce(kafkaTopic, keyBytes, bytes);
        numKafkaMessagesToPush--;
      }
    }
  }
}

Source File: TestMergeContent.java From nifi with Apache License 2.0

4 votes

@Test
public void testAvroConcatWithDifferentMetadataDoNotMerge() throws IOException, InterruptedException {
    final TestRunner runner = TestRunners.newTestRunner(new MergeContent());
    runner.setProperty(MergeContent.MAX_ENTRIES, "3");
    runner.setProperty(MergeContent.MIN_ENTRIES, "3");
    runner.setProperty(MergeContent.MERGE_FORMAT, MergeContent.MERGE_FORMAT_AVRO);
    runner.setProperty(MergeContent.METADATA_STRATEGY, MergeContent.METADATA_STRATEGY_DO_NOT_MERGE);

    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/TestMergeContent/user.avsc"));

    final GenericRecord user1 = new GenericData.Record(schema);
    user1.put("name", "Alyssa");
    user1.put("favorite_number", 256);
    final Map<String, String> userMeta1 = new HashMap<String, String>() {{
        put("test_metadata1", "Test 1");
    }};

    final GenericRecord user2 = new GenericData.Record(schema);
    user2.put("name", "Ben");
    user2.put("favorite_number", 7);
    user2.put("favorite_color", "red");
    final Map<String, String> userMeta2 = new HashMap<String, String>() {{
        put("test_metadata1", "Test 2"); // Test non-matching values
    }};

    final GenericRecord user3 = new GenericData.Record(schema);
    user3.put("name", "John");
    user3.put("favorite_number", 5);
    user3.put("favorite_color", "blue");
    final Map<String, String> userMeta3 = new HashMap<String, String>() {{
        put("test_metadata1", "Test 1");
        put("test_metadata2", "Test"); // Test unique
    }};

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    final ByteArrayOutputStream out1 = serializeAvroRecord(schema, user1, datumWriter, userMeta1);
    final ByteArrayOutputStream out2 = serializeAvroRecord(schema, user2, datumWriter, userMeta2);
    final ByteArrayOutputStream out3 = serializeAvroRecord(schema, user3, datumWriter, userMeta3);

    runner.enqueue(out1.toByteArray());
    runner.enqueue(out2.toByteArray());
    runner.enqueue(out3.toByteArray());

    runner.run();
    runner.assertQueueEmpty();
    runner.assertTransferCount(MergeContent.REL_MERGED, 1);
    runner.assertTransferCount(MergeContent.REL_FAILURE, 2);
    runner.assertTransferCount(MergeContent.REL_ORIGINAL, 3);

    final MockFlowFile bundle = runner.getFlowFilesForRelationship(MergeContent.REL_MERGED).get(0);
    bundle.assertAttributeEquals(CoreAttributes.MIME_TYPE.key(), "application/avro-binary");

    // create a reader for the merged content
    byte[] data = runner.getContentAsByteArray(bundle);
    final Map<String, GenericRecord> users = getGenericRecordMap(data, schema, "name");

    Assert.assertEquals(1, users.size());
    Assert.assertTrue(users.containsKey("Alyssa"));
}

Source File: JDBCSPTestIT.java From components with Apache License 2.0

4 votes

@SuppressWarnings("rawtypes")
@Test
public void test_basic_as_output_and_input() throws Exception {
    TJDBCSPDefinition definition = new TJDBCSPDefinition();
    TJDBCSPProperties properties = DBTestUtils.createCommonJDBCSPProperties(allSetting, definition);

    properties.spName.setValue("SYSCS_UTIL.SYSCS_DISABLE_LOG_ARCHIVE_MODE");
    Schema schema = DBTestUtils.createSPSchema3(tablename);
    properties.main.schema.setValue(schema);
    properties.schemaFlow.schema.setValue(schema);
    properties.spParameterTable.parameterTypes.setValue(Arrays.asList(SPParameterTable.ParameterType.IN.name()));
    properties.spParameterTable.schemaColumns.setValue(Arrays.asList("PARAMETER1"));

    JDBCSPSink sink = new JDBCSPSink();

    sink.initialize(null, properties);
    ValidationResult result = sink.validate(null);
    Assert.assertTrue(result.getStatus() == ValidationResult.Result.OK);

    WriteOperation operation = sink.createWriteOperation();
    JDBCSPWriter writer = (JDBCSPWriter) operation.createWriter(null);

    try {
        writer.open("wid");

        IndexedRecord r1 = new GenericData.Record(properties.main.schema.getValue());
        r1.put(0, 0);
        r1.put(1, "wangwei");
        
        writer.write(r1);
        
        List<IndexedRecord> writeResult = writer.getSuccessfulWrites();
        Assert.assertEquals(1, writeResult.size());
        
        IndexedRecord record = writeResult.get(0);
        Assert.assertEquals(Integer.valueOf(0), record.get(0));
        Assert.assertEquals("wangwei", record.get(1));
        
        writer.cleanWrites();
    } finally {
        writer.close();
    }
}

Source File: SalesforceWriter.java From components with Apache License 2.0

4 votes

private void handleReject(IndexedRecord input, Error[] resultErrors, String[] changedItemKeys, int batchIdx)
        throws IOException {
    String changedItemKey = null;
    if (batchIdx < changedItemKeys.length) {
        if (changedItemKeys[batchIdx] != null) {
            changedItemKey = changedItemKeys[batchIdx];
        } else {
            changedItemKey = String.valueOf(batchIdx + 1);
        }
    } else {
        changedItemKey = "Batch index out of bounds";
    }
    StringBuilder errors = SalesforceRuntime.addLog(resultErrors, changedItemKey, logWriter);
    if (exceptionForErrors) {
        if (errors.toString().length() > 0) {
            if (logWriter != null) {
                logWriter.close();
            }
            throw new IOException(errors.toString());
        }
    } else {
        rejectCount++;
        Schema outSchema = sprops.schemaReject.schema.getValue();
        if (outSchema == null || outSchema.getFields().size() == 0) {
            return;
        }
        if (input.getSchema().equals(outSchema)) {
            rejectedWrites.add(input);
        } else {
            IndexedRecord reject = null;
            if (AvroUtils.isIncludeAllFields(outSchema)) {
                Schema runtimeSchema = input.getSchema();
                List<Schema.Field> addedFields = new ArrayList<>();
                // Check whether design schema has additional field
                Schema.Field errorCodeField = outSchema.getField(TSalesforceOutputProperties.FIELD_ERROR_CODE);
                Schema.Field errorField = outSchema.getField(TSalesforceOutputProperties.FIELD_ERROR_FIELDS);
                Schema.Field errorMsgField = outSchema.getField(TSalesforceOutputProperties.FIELD_ERROR_MESSAGE);
                if (errorCodeField != null) {
                    addedFields.add(new Schema.Field(errorCodeField.name(), errorCodeField.schema(),
                            errorCodeField.doc(), errorCodeField.defaultVal()));
                }
                if (errorField != null) {
                    addedFields.add(new Schema.Field(errorField.name(), errorField.schema(), errorField.doc(),
                            errorField.defaultVal()));
                }
                if (errorMsgField != null) {
                    addedFields.add(new Schema.Field(errorMsgField.name(), errorMsgField.schema(),
                            errorMsgField.doc(), errorMsgField.defaultVal()));
                }
                if (addedFields.size() > 0) {
                    // Append additional fields to the runtime schema
                    runtimeSchema = AvroUtils.appendFields(runtimeSchema,
                            addedFields.toArray(new Schema.Field[addedFields.size()]));
                }
                reject = new GenericData.Record(runtimeSchema);
            } else {
                reject = new GenericData.Record(outSchema);
            }
            for (Schema.Field outField : reject.getSchema().getFields()) {
                Object outValue = null;
                Schema.Field inField = input.getSchema().getField(outField.name());
                if (inField != null) {
                    outValue = input.get(inField.pos());
                } else if (resultErrors.length > 0) {
                    Error error = resultErrors[0];
                    if (TSalesforceOutputProperties.FIELD_ERROR_CODE.equals(outField.name())) {
                        outValue = error.getStatusCode() != null ? error.getStatusCode().toString() : null;
                    } else if (TSalesforceOutputProperties.FIELD_ERROR_FIELDS.equals(outField.name())) {
                        StringBuffer fields = new StringBuffer();
                        for (String field : error.getFields()) {
                            fields.append(field);
                            fields.append(",");
                        }
                        if (fields.length() > 0) {
                            fields.deleteCharAt(fields.length() - 1);
                        }
                        outValue = fields.toString();
                    } else if (TSalesforceOutputProperties.FIELD_ERROR_MESSAGE.equals(outField.name())) {
                        outValue = error.getMessage();
                    }
                }
                reject.put(outField.pos(), outValue);
            }
            rejectedWrites.add(reject);
        }
        Property<OutputAction> outputAction = sprops.outputAction;
        LOGGER.info(MESSAGES.getMessage("info.rejectedRecord",
                sprops.outputAction.getPossibleValuesDisplayName(outputAction.getValue()).toLowerCase(), dataCount));
    }
}

Source File: ConverterTest.java From xml-avro with Apache License 2.0

4 votes

@Test
public void recordWithWildcardField() {
    String xsd =
            "<xs:schema xmlns:xs='http://www.w3.org/2001/XMLSchema'>" +
            "  <xs:complexType name='type'>" +
            "    <xs:sequence>" +
            "      <xs:element name='field' type='xs:string'/>" +
            "      <xs:any/>" +
            "    </xs:sequence>" +
            "  </xs:complexType>" +
            "  <xs:element name='root' type='type'/>" +
            "</xs:schema>";

    Schema schema = Converter.createSchema(xsd);
    assertEquals(2, schema.getFields().size());

    Schema.Field wildcardField = schema.getField(Source.WILDCARD);
    assertEquals(Schema.Type.MAP, wildcardField.schema().getType());

    // Two wildcard-matched elements
    String xml =
            "<root>" +
            "  <field>field</field>" +
            "  <field0>field0</field0>" +
            "  <field1>field1</field1>" +
            "</root>";

    GenericData.Record record = Converter.createDatum(schema, xml);
    assertEquals("field", record.get("field"));

    @SuppressWarnings("unchecked")
    java.util.Map<String, String> map = (java.util.Map<String, String>) record.get(Source.WILDCARD);

    assertEquals(2, map.size());
    assertEquals("field0", map.get("field0"));
    assertEquals("field1", map.get("field1"));

    // No wildcard-matched element
    xml = "<root><field>field</field></root>";
    record = Converter.createDatum(schema, xml);

    assertEquals("field", record.get("field"));
    assertEquals(Collections.emptyMap(), record.get(Source.WILDCARD));
}

Source File: SimpleAvroJob.java From datafu with Apache License 2.0

4 votes

public TheMapper()
{
  key = new GenericData.Record(KEY_SCHEMA);
  value = new GenericData.Record(VALUE_SCHEMA);
  value.put("count", 1L);
}

Source File: FastGenericDeserializerGeneratorTest.java From avro-util with BSD 2-Clause "Simplified" License

4 votes

@Test(groups = {"deserializationTest"}, dataProvider = "Implementation")
public void shouldReadArrayOfRecords(Implementation implementation) {
  // given
  Schema recordSchema = createRecord("record", createPrimitiveUnionFieldSchema("field", Schema.Type.STRING));

  Schema arrayRecordSchema = Schema.createArray(recordSchema);

  GenericData.Record subRecordBuilder = new GenericData.Record(recordSchema);
  subRecordBuilder.put("field", "abc");

  GenericData.Array<GenericData.Record> recordsArray = new GenericData.Array<>(0, arrayRecordSchema);
  recordsArray.add(subRecordBuilder);
  recordsArray.add(subRecordBuilder);

  // when
  GenericData.Array<GenericRecord> array = implementation.decode(arrayRecordSchema, arrayRecordSchema, genericDataAsDecoder(recordsArray));

  // then
  Assert.assertEquals(2, array.size());
  Assert.assertEquals(new Utf8("abc"), array.get(0).get("field"));
  Assert.assertEquals(new Utf8("abc"), array.get(1).get("field"));

  // given

  arrayRecordSchema = Schema.createArray(createUnionSchema(recordSchema));

  subRecordBuilder = new GenericData.Record(recordSchema);
  subRecordBuilder.put("field", "abc");

  recordsArray = new GenericData.Array<>(0, arrayRecordSchema);
  recordsArray.add(subRecordBuilder);
  recordsArray.add(subRecordBuilder);

  // when
  array = implementation.decode(arrayRecordSchema, arrayRecordSchema, genericDataAsDecoder(recordsArray));

  // then
  Assert.assertEquals(2, array.size());
  Assert.assertEquals(new Utf8("abc"), array.get(0).get("field"));
  Assert.assertEquals(new Utf8("abc"), array.get(1).get("field"));
}

Source File: ItemModelTrain.java From ml-ease with Apache License 2.0

4 votes

@Override
public void reduce(Utf8 key,
                   Iterable<RegressionPrepareOutput> values,
                   AvroCollector<GenericData.Record> collector,
                   Reporter reporter) throws IOException
{
  // Prepare the data set
  LibLinearDataset dataset;
  if (_binaryFeature)
  {
    dataset = new LibLinearBinaryDataset(1.0, _shortFeatureIndex);
  }
  else
  {
    dataset = new LibLinearDataset(1.0);
  }
  for (RegressionPrepareOutput value : values)
  {
    dataset.addInstanceAvro(value);
  }
  dataset.finish();
  // First determine the prior mean for the intercept
  Map<String, Double> priorMeanMap = new HashMap<String, Double>();
  double interceptPriorMean = _interceptDefaultPriorMean;
  if (_interceptPriorMeanMap.containsKey(key.toString()))
  {
    interceptPriorMean = _interceptPriorMeanMap.get(key.toString());
    reporter.incrCounter("ItemModelTrainV3", "Found intercept prior mean in intercept prior mean map", 1);
  }
  priorMeanMap.put(LibLinearDataset.INTERCEPT_NAME, interceptPriorMean);
  
  // now cross product the lambdas for intercept and default
  for (float interceptLambda : _interceptLambdas)
    for (float defaultLambda : _defaultLambdas)
    {
      _priorVarMap.put(LibLinearDataset.INTERCEPT_NAME, 1.0/interceptLambda);
      GenericData.Record output = new GenericData.Record(LinearModelWithVarAvro.SCHEMA$);
      // Run liblinear
      LibLinear liblinear = new LibLinear();
      liblinear.setReporter(reporter, _reportfreq);
      String option = "epsilon=" + String.valueOf(_liblinearEpsilon);
      try
      {
        liblinear.train(dataset, null, priorMeanMap, _priorVarMap, 0, 1.0 / defaultLambda, option, _computeVar);
        LinearModel model = liblinear.getLinearModel();
        
        output.put("key", String.valueOf(interceptLambda) + ":" + String.valueOf(defaultLambda)+ "#" + key);
        output.put("model", model.toAvro(LIBLINEAR_INTERCEPT_KEY));
        if (_computeVar)
        {
          LinearModel posteriorVar = new LinearModel(LIBLINEAR_INTERCEPT_KEY,liblinear.getPostVarMap());
          output.put("posteriorVar", posteriorVar.toAvro(LIBLINEAR_INTERCEPT_KEY));
        } else
        {
          output.put("posteriorVar", new LinearModel().toAvro(LIBLINEAR_INTERCEPT_KEY));
        }
      }
      catch (Exception e)
      {
        // output everything to debug
        _logger.info("Dataset size=" + dataset.y.length);
        _logger.info("Number of features=" + dataset.nFeatures());
        _logger.info("Model size=" + liblinear.getParamMap().size());
        _logger.info("bias=" + liblinear.bias);
        _logger.info("Model:");
        for (String k : liblinear.getParamMap().keySet())
        {
          _logger.info(k + " " + liblinear.getParamMap().get(k).toString());
        }
        throw new IOException("Model fitting error!", e);
      }
      collector.collect(output);
    }     
}

Source File: FastGenericDeserializerGeneratorTest.java From avro-util with BSD 2-Clause "Simplified" License

4 votes

@Test(groups = {"deserializationTest"}, dataProvider = "Implementation")
public void shouldReadSubRecordComplexCollectionsField(Implementation implementation) {
  // given
  Schema subRecordSchema = createRecord("subRecord", createPrimitiveUnionFieldSchema("subField", Schema.Type.STRING));
  Schema recordSchema = createRecord(
      createArrayFieldSchema("recordsArrayMap", Schema.createMap(createUnionSchema(subRecordSchema))),
      createMapFieldSchema("recordsMapArray", Schema.createArray(createUnionSchema(subRecordSchema))),
      createUnionField("recordsArrayMapUnion",
          Schema.createArray(Schema.createMap(createUnionSchema(subRecordSchema)))),
      createUnionField("recordsMapArrayUnion",
          Schema.createMap(Schema.createArray(createUnionSchema(subRecordSchema)))));

  GenericData.Record subRecordBuilder = new GenericData.Record(subRecordSchema);
  subRecordBuilder.put("subField", "abc");

  GenericData.Record builder = new GenericData.Record(recordSchema);
  List<Map<String, GenericRecord>> recordsArrayMap = new ArrayList<>();
  Map<String, GenericRecord> recordMap = new HashMap<>();
  recordMap.put("1", subRecordBuilder);
  recordsArrayMap.add(recordMap);

  builder.put("recordsArrayMap", recordsArrayMap);
  builder.put("recordsArrayMapUnion", recordsArrayMap);

  Map<String, List<GenericRecord>> recordsMapArray = new HashMap<>();
  List<GenericRecord> recordList = new ArrayList<>();
  recordList.add(subRecordBuilder);
  recordsMapArray.put("1", recordList);

  builder.put("recordsMapArray", recordsMapArray);
  builder.put("recordsMapArrayUnion", recordsMapArray);

  // when
  GenericRecord record = implementation.decode(recordSchema, recordSchema, genericDataAsDecoder(builder));

  // then
  Assert.assertEquals(new Utf8("abc"),
      ((List<Map<String, GenericRecord>>) record.get("recordsArrayMap")).get(0).get(new Utf8("1")).get("subField"));
  Assert.assertEquals(new Utf8("abc"),
      ((Map<String, List<GenericRecord>>) record.get("recordsMapArray")).get(new Utf8("1")).get(0).get("subField"));
  Assert.assertEquals(new Utf8("abc"), ((List<Map<String, GenericRecord>>) record.get("recordsArrayMapUnion")).get(0)
      .get(new Utf8("1"))
      .get("subField"));
  Assert.assertEquals(new Utf8("abc"),
      ((Map<String, List<GenericRecord>>) record.get("recordsMapArrayUnion")).get(new Utf8("1"))
          .get(0)
          .get("subField"));
}

Source File: TestMergeContent.java From nifi with Apache License 2.0

4 votes

@Test
public void testSimpleAvroConcat() throws IOException, InterruptedException {
    final TestRunner runner = TestRunners.newTestRunner(new MergeContent());
    runner.setProperty(MergeContent.MAX_ENTRIES, "3");
    runner.setProperty(MergeContent.MIN_ENTRIES, "3");
    runner.setProperty(MergeContent.MERGE_FORMAT, MergeContent.MERGE_FORMAT_AVRO);

    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/TestMergeContent/user.avsc"));

    final GenericRecord user1 = new GenericData.Record(schema);
    user1.put("name", "Alyssa");
    user1.put("favorite_number", 256);

    final GenericRecord user2 = new GenericData.Record(schema);
    user2.put("name", "Ben");
    user2.put("favorite_number", 7);
    user2.put("favorite_color", "red");

    final GenericRecord user3 = new GenericData.Record(schema);
    user3.put("name", "John");
    user3.put("favorite_number", 5);
    user3.put("favorite_color", "blue");

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    final ByteArrayOutputStream out1 = serializeAvroRecord(schema, user1, datumWriter);
    final ByteArrayOutputStream out2 = serializeAvroRecord(schema, user2, datumWriter);
    final ByteArrayOutputStream out3 = serializeAvroRecord(schema, user3, datumWriter);

    runner.enqueue(out1.toByteArray());
    runner.enqueue(out2.toByteArray());
    runner.enqueue(out3.toByteArray());

    runner.run();
    runner.assertQueueEmpty();
    runner.assertTransferCount(MergeContent.REL_MERGED, 1);
    runner.assertTransferCount(MergeContent.REL_FAILURE, 0);
    runner.assertTransferCount(MergeContent.REL_ORIGINAL, 3);

    final MockFlowFile bundle = runner.getFlowFilesForRelationship(MergeContent.REL_MERGED).get(0);
    bundle.assertAttributeEquals(CoreAttributes.MIME_TYPE.key(), "application/avro-binary");

    // create a reader for the merged content
    byte[] data = runner.getContentAsByteArray(bundle);
    final Map<String, GenericRecord> users = getGenericRecordMap(data, schema, "name");

    Assert.assertEquals(3, users.size());
    Assert.assertTrue(users.containsKey("Alyssa"));
    Assert.assertTrue(users.containsKey("Ben"));
    Assert.assertTrue(users.containsKey("John"));
}

Java Code Examples for org.apache.avro.generic.GenericData#Record