Java Code Examples for org.apache.avro.generic.GenericRecord#get()
The following examples show how to use
org.apache.avro.generic.GenericRecord#get() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LobAvroImportTestCase.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
/** Import blob data that is smaller than inline lob limit. Blob data * should be saved as Avro bytes. * @throws IOException * @throws SQLException */ public void testBlobAvroImportInline() throws IOException, SQLException { String [] types = { getBlobType() }; String expectedVal = "This is short BLOB data"; String [] vals = { getBlobInsertStr(expectedVal) }; createTableWithColTypes(types, vals); runImport(getArgv()); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); GenericRecord record = reader.next(); // Verify that blob data is imported as Avro bytes. ByteBuffer buf = (ByteBuffer) record.get(getColName(0)); String returnVal = new String(buf.array()); assertEquals(getColName(0), expectedVal, returnVal); }
Example 2
Source File: KafkaValueDeserializer.java From kareldb with Apache License 2.0 | 6 votes |
private NavigableMap<Long, VersionedValue> toValue(GenericArray<GenericRecord> array) { NavigableMap<Long, VersionedValue> map = new TreeMap<>(); Schema recordSchema = avroSchema.getElementType(); List<Schema.Field> fields = recordSchema.getFields(); int size = fields.size(); for (GenericRecord record : array) { Long version = (Long) record.get(0); Long commit = (Long) record.get(1); boolean deleted = (Boolean) record.get(2); Comparable[] row = new Comparable[size - 3]; for (int i = 0; i < row.length; i++) { Schema schema = fields.get(i + 3).schema(); Comparable value = (Comparable) record.get(i + 3); row[i] = AvroSchema.fromAvroValue(schema, value); } map.put(version, new VersionedValue(version, commit, deleted, row)); } return map; }
Example 3
Source File: Purge.java From Cubert with Apache License 2.0 | 6 votes |
private void loadMembersToPurge(String filename) throws IOException { // TODO: "memberId" column name should be configurable DataFileReader<GenericRecord> dataFileReader = createDataFileReader(filename, true); while (dataFileReader.hasNext()) { GenericRecord record = dataFileReader.next(); Integer memberId = (Integer) record.get("memberId"); if (memberId == null) { throw new NullPointerException("memberId is null"); } membersToPurge.add(((Number) record.get("memberId")).intValue()); } dataFileReader.close(); }
Example 4
Source File: DefaultJoinKeyExtractor.java From incubator-pinot with Apache License 2.0 | 6 votes |
@Override public String extractJoinKey(String sourceName, GenericRecord record) { String joinKey = defaultJoinKey; if (joinKeyMap != null && joinKeyMap.containsKey(sourceName)) { joinKey = joinKeyMap.get(sourceName); } String ret = "INVALID"; if (joinKey != null) { Object object = record.get(joinKey); if (object != null) { ret = object.toString(); } } LOGGER.info("source:{} JoinKey:{} value:{}", sourceName, joinKey, ret); return ret; }
Example 5
Source File: AvroGenericUtils.java From simplesource with Apache License 2.0 | 6 votes |
static <K> CommandResponse<GenericRecord> fromCommandResponse( final GenericRecord record) { final GenericRecord aggregateKey = (GenericRecord) record.get(AGGREGATE_KEY); final Sequence readSequence = Sequence.position((Long) record.get(READ_SEQUENCE)); final UUID commandId = UUID.fromString(String.valueOf(record.get(COMMAND_ID))); final GenericRecord genericResult = (GenericRecord) record.get(RESULT); final Result<CommandError, Sequence> result; if (nonNull(genericResult.get(WRITE_SEQUENCE))) { final Sequence writeSequence = Sequence.position((Long) genericResult.get(WRITE_SEQUENCE)); result = Result.success(writeSequence); } else { final CommandError commandError = toCommandError((GenericRecord) genericResult.get(REASON)); final List<CommandError> additionalCommandErrors = ((List<GenericRecord>) genericResult.get(ADDITIONAL_REASONS)) .stream() .map(AggregateUpdateResultAvroHelper::toCommandError) .collect(Collectors.toList()); result = Result.failure(new NonEmptyList<>(commandError, additionalCommandErrors)); } return CommandResponse.of(CommandId.of(commandId), aggregateKey, readSequence, result); }
Example 6
Source File: GenericRecordExportMapper.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
protected SqoopRecord toSqoopRecord(GenericRecord record) throws IOException { Schema avroSchema = record.getSchema(); for (Map.Entry<Writable, Writable> e : columnTypes.entrySet()) { String columnName = e.getKey().toString(); String columnType = e.getValue().toString(); String cleanedCol = ClassWriter.toIdentifier(columnName); Schema.Field field = getFieldIgnoreCase(avroSchema, cleanedCol); if (null == field) { throw new IOException("Cannot find field " + cleanedCol + " in Avro schema " + avroSchema); } Object avroObject = record.get(field.name()); Object fieldVal = AvroUtil.fromAvro(avroObject, field.schema(), columnType); recordImpl.setField(cleanedCol, fieldVal); } return recordImpl; }
Example 7
Source File: AvroMessageParser.java From secor with Apache License 2.0 | 6 votes |
@Override public long extractTimestampMillis(final Message message) { try { GenericRecord record = schemaRegistry.deserialize(message.getTopic(), message.getPayload()); if (record != null) { Object fieldValue = record.get(mConfig.getMessageTimestampName()); if (fieldValue != null) { return toMillis(Double.valueOf(fieldValue.toString()).longValue()); } } else if (m_timestampRequired) { throw new RuntimeException("Missing timestamp field for message: " + message); } } catch (Exception e) { LOG.error("Failed to parse record", e); } return 0; }
Example 8
Source File: ImpressionClickPartitionPreservingJob.java From datafu with Apache License 2.0 | 6 votes |
@Override public void accumulate(GenericRecord value) { if (value.get("type").toString().equals("click")) { clicks++; } else if (value.get("type").toString().equals("impression")) { impressions++; } else { throw new RuntimeException("Didn't expect: " + value.get("type")); } }
Example 9
Source File: FastGenericSerializerGeneratorTest.java From avro-util with BSD 2-Clause "Simplified" License | 6 votes |
@Test(groups = {"serializationTest"}) public void shouldWriteRightUnionIndex() { // Create two record schemas Schema recordSchema1 = createRecord("record1", createField("record1_field1", Schema.create(Schema.Type.STRING))); Schema recordSchema2 = createRecord("record2", createField("record2_field1", Schema.create(Schema.Type.STRING))); Schema unionSchema = createUnionSchema(recordSchema1, recordSchema2); Schema recordWrapperSchema = createRecord(createField("union_field", unionSchema)); GenericData.Record objectOfRecordSchema2 = new GenericData.Record(recordSchema2); objectOfRecordSchema2.put("record2_field1", "abc"); GenericData.Record wrapperObject = new GenericData.Record(recordWrapperSchema); wrapperObject.put("union_field", objectOfRecordSchema2); GenericRecord record = decodeRecord(recordWrapperSchema, dataAsBinaryDecoder(wrapperObject)); Object unionField = record.get("union_field"); Assert.assertTrue(unionField instanceof GenericData.Record); GenericData.Record unionRecord = (GenericData.Record)unionField; Assert.assertEquals(unionRecord.getSchema().getName(), "record2"); }
Example 10
Source File: ThirdeyeAvroUtils.java From incubator-pinot with Apache License 2.0 | 5 votes |
public static Number getMetricFromRecord(GenericRecord record, String metricName, MetricType metricType) { Number metricValue = (Number) record.get(metricName); if (metricValue == null) { metricValue = metricType.getDefaultNullValue(); } return metricValue; }
Example 11
Source File: EnvelopeSchemaConverter.java From incubator-gobblin with Apache License 2.0 | 5 votes |
/** * Get payload field from GenericRecord and convert to byte array */ public byte[] getPayload(GenericRecord inputRecord, String payloadFieldName) { ByteBuffer bb = (ByteBuffer) inputRecord.get(payloadFieldName); byte[] payloadBytes; if (bb.hasArray()) { payloadBytes = bb.array(); } else { payloadBytes = new byte[bb.remaining()]; bb.get(payloadBytes); } String hexString = new String(payloadBytes, StandardCharsets.UTF_8); return DatatypeConverter.parseHexBinary(hexString); }
Example 12
Source File: AvroSplitByFieldMessageParser.java From secor with Apache License 2.0 | 5 votes |
protected long extractTimestampMillis(GenericRecord record) { try { if (record != null) { Object fieldValue = record.get(mConfig.getMessageTimestampName()); if (fieldValue != null) { return toMillis(Double.valueOf(fieldValue.toString()).longValue()); } } else if (m_timestampRequired) { throw new RuntimeException("Missing timestamp field for message: " + record.toString()); } } catch (SerializationException e) { LOG.error("Failed to parse record", e); } return 0; }
Example 13
Source File: AvroLoader.java From incubator-samoa with Apache License 2.0 | 4 votes |
/** * Method to read Sparse Instances from Avro File * * @return Instance */ protected Instance readInstanceSparse(GenericRecord record) { Instance instance = new SparseInstance(1.0, null); int numAttribute = -1; ArrayList<Double> attributeValues = new ArrayList<Double>(); List<Integer> indexValues = new ArrayList<Integer>(); for (Attribute attribute : attributes) { numAttribute++; Object value = record.get(attribute.name); boolean isNumeric = attributes.get(numAttribute).isNumeric(); boolean isNominal = attributes.get(numAttribute).isNominal(); /** If value is empty/null iterate to the next attribute. **/ if (value == null) continue; if (isNumeric) { if (value instanceof Double) { Double v = (double) value; //if (Double.isFinite(v)) if (!Double.isNaN(v) && !Double.isInfinite(v)) this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (double) value); } else if (value instanceof Long) this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (long) value); else if (value instanceof Integer) this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (int) value); else throw new RuntimeException(AVRO_LOADER_INVALID_TYPE_ERROR + " : " + attribute.name); } else if (isNominal) { double valueAttribute; if (!(value instanceof EnumSymbol)) throw new RuntimeException(AVRO_LOADER_INVALID_TYPE_ERROR + " : " + attribute.name); EnumSymbol enumSymbolalue = (EnumSymbol) value; String stringValue = enumSymbolalue.toString(); if (("?".equals(stringValue)) || (stringValue == null)) { valueAttribute = Double.NaN; } else { valueAttribute = this.instanceInformation.attribute(numAttribute).indexOfValue(stringValue); } this.setSparseValue(instance, indexValues, attributeValues, numAttribute, valueAttribute); } } int[] arrayIndexValues = new int[attributeValues.size()]; double[] arrayAttributeValues = new double[attributeValues.size()]; for (int i = 0; i < arrayIndexValues.length; i++) { arrayIndexValues[i] = indexValues.get(i).intValue(); arrayAttributeValues[i] = attributeValues.get(i).doubleValue(); } instance.addSparseValues(arrayIndexValues, arrayAttributeValues, this.instanceInformation.numAttributes()); return instance; }
Example 14
Source File: AvroRecordInputFormatTest.java From flink with Apache License 2.0 | 4 votes |
/** * Helper method to test GenericRecord serialisation. * * @param format * the format to test * @param parameters * the configuration to use * @throws IOException * thrown id there is a issue */ @SuppressWarnings("unchecked") private void doTestDeserializationGenericRecord(final AvroInputFormat<GenericRecord> format, final Configuration parameters) throws IOException { try { format.configure(parameters); FileInputSplit[] splits = format.createInputSplits(1); assertEquals(splits.length, 1); format.open(splits[0]); GenericRecord u = format.nextRecord(null); assertNotNull(u); assertEquals("The schemas should be equal", userSchema, u.getSchema()); String name = u.get("name").toString(); assertNotNull("empty record", name); assertEquals("name not equal", TEST_NAME, name); // check arrays List<CharSequence> sl = (List<CharSequence>) u.get("type_array_string"); assertEquals("element 0 not equal", TEST_ARRAY_STRING_1, sl.get(0).toString()); assertEquals("element 1 not equal", TEST_ARRAY_STRING_2, sl.get(1).toString()); List<Boolean> bl = (List<Boolean>) u.get("type_array_boolean"); assertEquals("element 0 not equal", TEST_ARRAY_BOOLEAN_1, bl.get(0)); assertEquals("element 1 not equal", TEST_ARRAY_BOOLEAN_2, bl.get(1)); // check enums GenericData.EnumSymbol enumValue = (GenericData.EnumSymbol) u.get("type_enum"); assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), enumValue.toString()); // check maps Map<CharSequence, Long> lm = (Map<CharSequence, Long>) u.get("type_map"); assertEquals("map value of key 1 not equal", TEST_MAP_VALUE1, lm.get(new Utf8(TEST_MAP_KEY1)).longValue()); assertEquals("map value of key 2 not equal", TEST_MAP_VALUE2, lm.get(new Utf8(TEST_MAP_KEY2)).longValue()); assertFalse("expecting second element", format.reachedEnd()); assertNotNull("expecting second element", format.nextRecord(u)); assertNull(format.nextRecord(u)); assertTrue(format.reachedEnd()); } finally { format.close(); } }
Example 15
Source File: JsonIntermediateToAvroConverterTest.java From incubator-gobblin with Apache License 2.0 | 4 votes |
@Test public void testConverter() throws Exception { initResources("/converter/schema.json"); JsonIntermediateToAvroConverter converter = new JsonIntermediateToAvroConverter(); Schema avroSchema = converter.convertSchema(jsonSchema, state); GenericRecord record = converter.convertRecord(avroSchema, jsonRecord, state).iterator().next(); //testing output values are expected types and values Assert.assertEquals(jsonRecord.get("Id").getAsString(), record.get("Id").toString()); Assert.assertEquals(jsonRecord.get("IsDeleted").getAsBoolean(), record.get("IsDeleted")); if (!(record.get("Salutation") instanceof GenericArray)) { Assert.fail("expected array, found " + record.get("Salutation").getClass().getName()); } if (!(record.get("MapAccount") instanceof Map)) { Assert.fail("expected map, found " + record.get("MapAccount").getClass().getName()); } Assert.assertEquals(jsonRecord.get("Industry").getAsString(), record.get("Industry").toString()); DateTimeFormatter format = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss") .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); Assert.assertEquals(jsonRecord.get("LastModifiedDate").getAsString(), new DateTime(record.get("LastModifiedDate")).toString(format)); Assert.assertEquals(jsonRecord.get("date_type").getAsString(), new DateTime(record.get("date_type")).toString(format)); format = DateTimeFormat.forPattern("HH:mm:ss").withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); Assert.assertEquals(jsonRecord.get("time_type").getAsString(), new DateTime(record.get("time_type")).toString(format)); Assert.assertEquals(jsonRecord.get("bytes_type").getAsString().getBytes(), ((ByteBuffer) record.get("bytes_type")).array()); Assert.assertEquals(jsonRecord.get("int_type").getAsInt(), record.get("int_type")); Assert.assertEquals(jsonRecord.get("long_type").getAsLong(), record.get("long_type")); Assert.assertEquals(jsonRecord.get("float_type").getAsFloat(), record.get("float_type")); Assert.assertEquals(jsonRecord.get("double_type").getAsDouble(), record.get("double_type")); //Testing timezone state.setProp(ConfigurationKeys.CONVERTER_AVRO_DATE_TIMEZONE, "EST"); avroSchema = converter.convertSchema(jsonSchema, state); GenericRecord record2 = converter.convertRecord(avroSchema, jsonRecord, state).iterator().next(); Assert.assertNotEquals(record.get("LastModifiedDate"), record2.get("LastModifiedDate")); }
Example 16
Source File: AVROIntermediateDataFormat.java From sqoop-on-spark with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") private String toCSV(GenericRecord record) { Column[] columns = this.schema.getColumnsArray(); StringBuilder csvString = new StringBuilder(); for (int i = 0; i < columns.length; i++) { Object obj = record.get(columns[i].getName()); if (obj == null && !columns[i].isNullable()) { throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0005, columns[i].getName() + " does not support null values"); } if (obj == null) { csvString.append(NULL_VALUE); } else { switch (columns[i].getType()) { case ARRAY: case SET: List<Object> objList = (List<Object>) obj; csvString.append(toCSVList(toObjectArray(objList), columns[i])); break; case MAP: Map<Object, Object> objMap = (Map<Object, Object>) obj; csvString.append(toCSVMap(objMap, columns[i])); break; case ENUM: case TEXT: csvString.append(toCSVString(obj.toString())); break; case BINARY: case UNKNOWN: csvString.append(toCSVByteArray(getBytesFromByteBuffer(obj))); break; case FIXED_POINT: csvString.append(toCSVFixedPoint(obj, columns[i])); break; case FLOATING_POINT: csvString.append(toCSVFloatingPoint(obj, columns[i])); break; case DECIMAL: // stored as string csvString.append(toCSVDecimal(obj)); break; case DATE: // stored as long Long dateInMillis = (Long) obj; csvString.append(toCSVDate(new org.joda.time.LocalDate(dateInMillis))); break; case TIME: // stored as long Long timeInMillis = (Long) obj; csvString.append(toCSVTime(new org.joda.time.LocalTime(timeInMillis), columns[i])); break; case DATE_TIME: // stored as long Long dateTimeInMillis = (Long) obj; csvString.append(toCSVDateTime(new org.joda.time.DateTime(dateTimeInMillis), columns[i])); break; case BIT: csvString.append(toCSVBit(obj)); break; default: throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0001, "Column type from schema was not recognized for " + columns[i].getType()); } } if (i < columns.length - 1) { csvString.append(CSV_SEPARATOR_CHARACTER); } } return csvString.toString(); }
Example 17
Source File: TestApacheAvroEventSerializer.java From flume-plugins with MIT License | 4 votes |
@Test public void test() throws FileNotFoundException, IOException { // create the file, write some data OutputStream out = new FileOutputStream(testFile); String builderName = ApacheLogAvroEventSerializer.Builder.class.getName(); Context ctx = new Context(); ctx.put("syncInterval", "4096"); EventSerializer serializer = EventSerializerFactory.getInstance(builderName, ctx, out); serializer.afterCreate(); // must call this when a file is newly created List<Event> events = generateApacheEvents(); for (Event e : events) { serializer.write(e); } serializer.flush(); serializer.beforeClose(); out.flush(); out.close(); // now try to read the file back DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(testFile, reader); GenericRecord record = new GenericData.Record(fileReader.getSchema()); int numEvents = 0; while (fileReader.hasNext()) { fileReader.next(record); String ip = record.get("ip").toString(); String uri = record.get("uri").toString(); Integer statuscode = (Integer) record.get("statuscode"); String original = record.get("original").toString(); String connectionstatus = record.get("connectionstatus").toString(); Assert.assertEquals("Ip should be 80.79.194.3", "80.79.194.3", ip); System.out.println("IP " + ip + " requested: " + uri + " with status code " + statuscode + " and connectionstatus: " + connectionstatus); System.out.println("Original logline: " + original); numEvents++; } fileReader.close(); Assert.assertEquals("Should have found a total of 3 events", 2, numEvents); FileUtils.forceDelete(testFile); }
Example 18
Source File: TestAvroDataGenerator.java From datacollector with Apache License 2.0 | 4 votes |
@Test public void testAvroGeneratorShortType() throws Exception { final String SCHEMA_JSON = "{\n" +"\"type\": \"record\",\n" +"\"name\": \"WithDecimal\",\n" +"\"fields\": [\n" +" {\"name\": \"short\", \"type\": \"int\"}" +"]}"; final Schema SCHEMA = new Schema.Parser().parse(SCHEMA_JSON); Map<String, Field> map = new LinkedHashMap<>(); map.put("short", Field.create(Field.Type.SHORT, (short)1)); Record record = RecordCreator.create(); record.set(Field.create(map)); ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataGenerator gen = new AvroDataOutputStreamGenerator( false, baos, COMPRESSION_CODEC_DEFAULT, SCHEMA, new HashMap<String, Object>(), null, null, 0 ); gen.write(record); gen.close(); //reader schema must be extracted from the data file GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>( new SeekableByteArrayInput(baos.toByteArray()), reader); Assert.assertTrue(dataFileReader.hasNext()); GenericRecord readRecord = dataFileReader.next(); Object retrievedField = readRecord.get("short"); Assert.assertEquals(1, retrievedField); Assert.assertFalse(dataFileReader.hasNext()); }
Example 19
Source File: AvroGenericUtils.java From simplesource with Apache License 2.0 | 4 votes |
public static ValueWithSequence<GenericRecord> fromGenericRecord(final GenericRecord record) { final GenericRecord genericValue = (GenericRecord) record.get(VALUE); final Sequence sequence = Sequence.position((Long) record.get(SEQUENCE)); return new ValueWithSequence<>(genericValue, sequence); }
Example 20
Source File: GenericPartitioner.java From incubator-pinot with Apache License 2.0 | 4 votes |
@Override public int getPartition(T genericRecordAvroKey, AvroValue<GenericRecord> genericRecordAvroValue, int numPartitions) { final GenericRecord inputRecord = genericRecordAvroValue.datum(); final Object partitionColumnValue = inputRecord.get(_partitionColumn); return _partitionFunction.getPartition(partitionColumnValue); }