org.apache.avro.file.DataFileStream#hasNext

Source File: SegmentCreationPhaseMapReduceJob.java From incubator-pinot with Apache License 2.0

6 votes

private LongColumnPreIndexStatsCollector getTimeColumnStatsCollector(Schema schema, File localAvroFile)
    throws FileNotFoundException, IOException {
  String timeColumnName = schema.getTimeColumnName();
  FieldSpec spec =  schema.getTimeFieldSpec();
  LOGGER.info("Spec for " + timeColumnName + " is " + spec);
  LongColumnPreIndexStatsCollector timeColumnStatisticsCollector = new LongColumnPreIndexStatsCollector(spec.getName(), new StatsCollectorConfig(schema, null));
  LOGGER.info("StatsCollector :" + timeColumnStatisticsCollector);
  DataFileStream<GenericRecord> dataStream =
      new DataFileStream<GenericRecord>(new FileInputStream(localAvroFile), new GenericDatumReader<GenericRecord>());
  while (dataStream.hasNext()) {
    GenericRecord next = dataStream.next();
    timeColumnStatisticsCollector.collect(next.get(timeColumnName));
  }
  dataStream.close();
  timeColumnStatisticsCollector.seal();

  return timeColumnStatisticsCollector;
}

Source File: Examples.java From datafu with Apache License 2.0

5 votes

private HashMap<Long,Integer> loadOutputCounts(Path path, String timestamp) throws IOException
{
  HashMap<Long,Integer> counts = new HashMap<Long,Integer>();
  FileSystem fs = getFileSystem();
  Assert.assertTrue(fs.exists(new Path(path, timestamp)));
  for (FileStatus stat : fs.globStatus(new Path(path,timestamp + "/*.avro")))
  {
    _log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
    FSDataInputStream is = fs.open(stat.getPath());
    DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
    
    try
    {
      while (dataFileStream.hasNext())
      {          
        GenericRecord r = dataFileStream.next();
        _log.info("found: " + r.toString());
        Long memberId = (Long)((GenericRecord)r.get("key")).get("member_id");
        Assert.assertNotNull(memberId);
        Integer count = (Integer)((GenericRecord)r.get("value")).get("count");   
        Assert.assertNotNull(count);     
        Assert.assertFalse(counts.containsKey(memberId));
        counts.put(memberId, count);
      }
    }
    finally
    {
      dataFileStream.close();
    }
  }
  return counts;
}

Source File: TestHDFSCompressedDataStream.java From mt-flume with Apache License 2.0

5 votes

@Test
public void testGzipDurabilityWithSerializer() throws Exception {
  Context context = new Context();
  context.put("serializer", "AVRO_EVENT");

  HDFSCompressedDataStream writer = new HDFSCompressedDataStream();
  writer.configure(context);

  writer.open(fileURI, factory.getCodec(new Path(fileURI)),
      SequenceFile.CompressionType.BLOCK);

  String[] bodies = { "yarf!", "yarfing!" };
  writeBodies(writer, bodies);

  int found = 0;
  int expected = bodies.length;
  List<String> expectedBodies = Lists.newArrayList(bodies);

  GZIPInputStream cmpIn = new GZIPInputStream(new FileInputStream(file));
  DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileStream<GenericRecord> avroStream =
      new DataFileStream<GenericRecord>(cmpIn, reader);
  GenericRecord record = new GenericData.Record(avroStream.getSchema());
  while (avroStream.hasNext()) {
    avroStream.next(record);
    CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
    String bodyStr = decoder.decode((ByteBuffer) record.get("body"))
        .toString();
    expectedBodies.remove(bodyStr);
    found++;
  }
  avroStream.close();
  cmpIn.close();

  Assert.assertTrue("Found = " + found + ", Expected = " + expected
      + ", Left = " + expectedBodies.size() + " " + expectedBodies,
      expectedBodies.size() == 0);
}

Source File: TestHDFSEventSink.java From mt-flume with Apache License 2.0

5 votes

private void verifyOutputAvroFiles(FileSystem fs, Configuration conf, String dir, String prefix, List<String> bodies) throws IOException {
  int found = 0;
  int expected = bodies.size();
  for(String outputFile : getAllFiles(dir)) {
    String name = (new File(outputFile)).getName();
    if(name.startsWith(prefix)) {
      FSDataInputStream input = fs.open(new Path(outputFile));
      DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
      DataFileStream<GenericRecord> avroStream =
          new DataFileStream<GenericRecord>(input, reader);
      GenericRecord record = new GenericData.Record(avroStream.getSchema());
      while (avroStream.hasNext()) {
        avroStream.next(record);
        ByteBuffer body = (ByteBuffer) record.get("body");
        CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
        String bodyStr = decoder.decode(body).toString();
        LOG.debug("Removing event: {}", bodyStr);
        bodies.remove(bodyStr);
        found++;
      }
      avroStream.close();
      input.close();
    }
  }
  Assert.assertTrue("Found = " + found + ", Expected = "  +
      expected + ", Left = " + bodies.size() + " " + bodies,
        bodies.size() == 0);
}

Source File: PartitionPreservingCollapsingIntegrationTests.java From datafu with Apache License 2.0

5 votes

private HashMap<Long,Long> loadOutputCounts(Path path, String timestamp) throws IOException
{
  HashMap<Long,Long> counts = new HashMap<Long,Long>();
  FileSystem fs = getFileSystem();
  Assert.assertTrue(fs.exists(new Path(path, timestamp)));
  for (FileStatus stat : fs.globStatus(new Path(path,timestamp + "/*.avro")))
  {
    _log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
    FSDataInputStream is = fs.open(stat.getPath());
    DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
    
    try
    {
      while (dataFileStream.hasNext())
      {
        GenericRecord r = dataFileStream.next();
        Long memberId = (Long)((GenericRecord)r.get("key")).get("id");
        Long count = (Long)((GenericRecord)r.get("value")).get("count");        
        Assert.assertFalse(counts.containsKey(memberId));
        counts.put(memberId, count);
      }
    }
    finally
    {
      dataFileStream.close();
    }
  }
  return counts;
}

Source File: PartitionPreservingCollapsingIntegrationTests.java From datafu with Apache License 2.0

5 votes

private HashMap<Long,Long> loadIntermediateCounts(Path path, String timestamp) throws IOException
{
  HashMap<Long,Long> counts = new HashMap<Long,Long>();
  FileSystem fs = getFileSystem();
  String nestedPath = getNestedPathFromTimestamp(timestamp);
  Assert.assertTrue(fs.exists(new Path(_intermediatePath, nestedPath)));
  for (FileStatus stat : fs.globStatus(new Path(_intermediatePath,nestedPath + "/*.avro")))
  {
    _log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
    FSDataInputStream is = fs.open(stat.getPath());
    DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
    
    try
    {
      while (dataFileStream.hasNext())
      {
        GenericRecord r = dataFileStream.next();
        Long memberId = (Long)((GenericRecord)r.get("key")).get("id");
        Long count = (Long)((GenericRecord)r.get("value")).get("count");        
        Assert.assertFalse(counts.containsKey(memberId));
        counts.put(memberId, count);
      }
    }
    finally
    {
      dataFileStream.close();
    }
  }
  return counts;
}

Source File: PartitionPreservingJoinTests.java From datafu with Apache License 2.0

5 votes

private HashMap<Long,ImpressionClick> loadOutputCounts(String timestamp) throws IOException
{
  HashMap<Long,ImpressionClick> counts = new HashMap<Long,ImpressionClick>();
  FileSystem fs = getFileSystem();
  String nestedPath = getNestedPathFromTimestamp(timestamp);
  Assert.assertTrue(fs.exists(new Path(_outputPath, nestedPath)));
  for (FileStatus stat : fs.globStatus(new Path(_outputPath,nestedPath + "/*.avro")))
  {
    _log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
    FSDataInputStream is = fs.open(stat.getPath());
    DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
    
    try
    {
      while (dataFileStream.hasNext())
      {
        GenericRecord r = dataFileStream.next();
        Long memberId = (Long)((GenericRecord)r.get("key")).get("id");
        Integer impressions = (Integer)((GenericRecord)r.get("value")).get("impressions");    
        Integer clicks = (Integer)((GenericRecord)r.get("value")).get("clicks");         
        Assert.assertFalse(counts.containsKey(memberId));
        ImpressionClick data = new ImpressionClick();
        data.clicks = clicks;
        data.impressions = impressions;
        counts.put(memberId, data);
      }
    }
    finally
    {
      dataFileStream.close();
    }
  }
  return counts;
}

Source File: PartitionCollapsingJoinTest.java From datafu with Apache License 2.0

5 votes

private HashMap<Long,ImpressionClick> loadOutputCounts(String timestamp) throws IOException
{
  HashMap<Long,ImpressionClick> counts = new HashMap<Long,ImpressionClick>();
  FileSystem fs = getFileSystem();
  Assert.assertTrue(fs.exists(new Path(_outputPath, timestamp)));
  for (FileStatus stat : fs.globStatus(new Path(_outputPath,timestamp + "/*.avro")))
  {
    _log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
    FSDataInputStream is = fs.open(stat.getPath());
    DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
    
    try
    {
      while (dataFileStream.hasNext())
      {
        GenericRecord r = dataFileStream.next();
        Long memberId = (Long)((GenericRecord)r.get("key")).get("id");
        Integer impressions = (Integer)((GenericRecord)r.get("value")).get("impressions");    
        Integer clicks = (Integer)((GenericRecord)r.get("value")).get("clicks");         
        Assert.assertFalse(counts.containsKey(memberId));
        ImpressionClick data = new ImpressionClick();
        data.clicks = clicks;
        data.impressions = impressions;
        counts.put(memberId, data);
      }
    }
    finally
    {
      dataFileStream.close();
    }
  }
  return counts;
}

Source File: PartitionCollapsingTests.java From datafu with Apache License 2.0

5 votes

private HashMap<Long,Long> loadOutputCounts(String timestamp) throws IOException
{
  HashMap<Long,Long> counts = new HashMap<Long,Long>();
  FileSystem fs = getFileSystem();
  Assert.assertTrue(fs.exists(new Path(_outputPath, timestamp)));
  for (FileStatus stat : fs.globStatus(new Path(_outputPath,timestamp + "/*.avro")))
  {
    _log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
    FSDataInputStream is = fs.open(stat.getPath());
    DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
    
    try
    {
      while (dataFileStream.hasNext())
      {
        GenericRecord r = dataFileStream.next();
        Long memberId = (Long)((GenericRecord)r.get("key")).get("id");
        Long count = (Long)((GenericRecord)r.get("value")).get("count");        
        Assert.assertFalse(counts.containsKey(memberId));
        counts.put(memberId, count);
      }
    }
    finally
    {
      dataFileStream.close();
    }
  }
  return counts;
}

Source File: TestAvroJob.java From datafu with Apache License 2.0

5 votes

private HashMap<Long,Long> loadOutputCounts(String timestamp) throws IOException
{
  HashMap<Long,Long> counts = new HashMap<Long,Long>();
  FileSystem fs = getFileSystem();
  Assert.assertTrue(fs.exists(new Path(_outputPath, timestamp)));
  for (FileStatus stat : fs.globStatus(new Path(_outputPath,timestamp + "/*.avro")))
  {
    _log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
    FSDataInputStream is = fs.open(stat.getPath());
    DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
    
    try
    {
      while (dataFileStream.hasNext())
      {
        GenericRecord r = dataFileStream.next();
        Long memberId = (Long)r.get("id");
        Long count = (Long)r.get("count");   
        Assert.assertFalse(counts.containsKey(memberId));
        counts.put(memberId, count);
      }
    }
    finally
    {
      dataFileStream.close();
    }
  }
  return counts;
}

Source File: PartitionPreservingTests.java From datafu with Apache License 2.0

5 votes

private HashMap<Long,Long> loadOutputCounts(String timestamp) throws IOException
{
  HashMap<Long,Long> counts = new HashMap<Long,Long>();
  FileSystem fs = getFileSystem();
  String nestedPath = getNestedPathFromTimestamp(timestamp);
  Assert.assertTrue(fs.exists(new Path(_outputPath, nestedPath)));
  for (FileStatus stat : fs.globStatus(new Path(_outputPath,nestedPath + "/*.avro")))
  {
    _log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
    FSDataInputStream is = fs.open(stat.getPath());
    DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
    
    try
    {
      while (dataFileStream.hasNext())
      {
        GenericRecord r = dataFileStream.next();
        Long memberId = (Long)((GenericRecord)r.get("key")).get("id");
        Long count = (Long)((GenericRecord)r.get("value")).get("count");        
        Assert.assertFalse(counts.containsKey(memberId));
        counts.put(memberId, count);
      }
    }
    finally
    {
      dataFileStream.close();
    }
  }
  return counts;
}

Source File: AvroRowDecoder.java From presto with Apache License 2.0

5 votes

@Override
public Optional<Map<DecoderColumnHandle, FieldValueProvider>> decodeRow(byte[] data, Map<String, String> dataMap)
{
    GenericRecord avroRecord;
    DataFileStream<GenericRecord> dataFileReader = null;
    try {
        // Assumes producer uses DataFileWriter or data comes in this particular format.
        // TODO: Support other forms for producers
        dataFileReader = new DataFileStream<>(new ByteArrayInputStream(data), avroRecordReader);
        if (!dataFileReader.hasNext()) {
            throw new PrestoException(GENERIC_INTERNAL_ERROR, "No avro record found");
        }
        avroRecord = dataFileReader.next();
        if (dataFileReader.hasNext()) {
            throw new PrestoException(GENERIC_INTERNAL_ERROR, "Unexpected extra record found");
        }
    }
    catch (Exception e) {
        throw new PrestoException(GENERIC_INTERNAL_ERROR, "Decoding Avro record failed.", e);
    }
    finally {
        closeQuietly(dataFileReader);
    }

    return Optional.of(columnDecoders.entrySet().stream()
            .collect(toImmutableMap(
                    Map.Entry::getKey,
                    entry -> entry.getValue().decodeField(avroRecord))));
}

Source File: TestAvroStorage.java From spork with Apache License 2.0

5 votes

private Set<Object> getExpected (String pathstr ) throws IOException {

        Set<Object> ret = new HashSet<Object>();
        FileSystem fs = FileSystem.getLocal(new Configuration());

        /* read in output results and compare */
        Path output = new Path(pathstr);
        assertTrue("Expected output does not exists!", fs.exists(output));

        Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter));
        assertTrue("Split field dirs not found!", paths != null);

        for (Path path : paths) {
            Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter));
            assertTrue("No files found for path: " + path.toUri().getPath(), files != null);
            for (Path filePath : files) {
                assertTrue("This shouldn't be a directory", fs.isFile(filePath));

                GenericDatumReader<Object> reader = new GenericDatumReader<Object>();

                DataFileStream<Object> in = new DataFileStream<Object>(fs.open(filePath), reader);

                while (in.hasNext()) {
                    Object obj = in.next();
                    ret.add(obj);
                }
                in.close();
            }
        }
        return ret;
  }

Source File: TestAvroStorage.java From spork with Apache License 2.0

5 votes

private Set<GenericData.Record> getExpected (String pathstr ) throws IOException {

        Set<GenericData.Record> ret = new TreeSet<GenericData.Record>(
                new Comparator<GenericData.Record>() {
                    @Override
                    public int compare(Record o1, Record o2) {
                        return o1.toString().compareTo(o2.toString());
                    }}
                );
        FileSystem fs = FileSystem.getLocal(new Configuration());

        /* read in output results and compare */
        Path output = new Path(pathstr);
        assertTrue("Expected output does not exists!", fs.exists(output));

        Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter));
        assertTrue("Split field dirs not found!", paths != null);

        for (Path path : paths) {
            Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter));
            assertTrue("No files found for path: " + path.toUri().getPath(), files != null);
            for (Path filePath : files) {
                assertTrue("This shouldn't be a directory", fs.isFile(filePath));

                GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<GenericData.Record>();

                DataFileStream<GenericData.Record> in = new DataFileStream<GenericData.Record>(fs.open(filePath), reader);

                while (in.hasNext()) {
                    GenericData.Record obj = in.next();
                    ret.add(obj);
                }
                in.close();
            }
        }
        return ret;
    }

Source File: TestAvroStorage.java From spork with Apache License 2.0

5 votes

private void verifyResults(String outPath, String expectedOutpath, String expectedCodec) throws IOException {
    FileSystem fs = FileSystem.getLocal(new Configuration()) ;

    /* read in expected results*/
    Set<GenericData.Record> expected = getExpected (expectedOutpath);

    /* read in output results and compare */
    Path output = new Path(outPath);
    assertTrue("Output dir does not exists!", fs.exists(output)
            && fs.getFileStatus(output).isDir());

    Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter));
    assertTrue("Split field dirs not found!", paths != null);

    for (Path path : paths) {
      Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter));
      assertTrue("No files found for path: " + path.toUri().getPath(),
              files != null);
      for (Path filePath : files) {
        assertTrue("This shouldn't be a directory", fs.isFile(filePath));

        GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<GenericData.Record>();

        DataFileStream<GenericData.Record> in = new DataFileStream<GenericData.Record>(
                                        fs.open(filePath), reader);
        assertEquals("codec", expectedCodec, in.getMetaString("avro.codec"));
        int count = 0;
        while (in.hasNext()) {
            GenericData.Record obj = in.next();
            assertTrue("Avro result object found that's not expected: Found "
                    + (obj != null ? obj.getSchema() : "null") + ", " + obj.toString()
                    + "\nExpected " + (expected != null ? expected.toString() : "null") + "\n"
                    , expected.contains(obj));
            count++;
        }
        in.close();
        assertEquals(expected.size(), count);
      }
    }
}

Source File: BloomFilterCreator.java From hiped2 with Apache License 2.0

5 votes

public static BloomFilter readFromAvro(InputStream is) throws IOException {
  DataFileStream<Object> reader =
      new DataFileStream<Object>(
          is, new GenericDatumReader<Object>());

  reader.hasNext();
  BloomFilter filter = new BloomFilter();
  AvroBytesRecord
      .fromGenericRecord((GenericRecord) reader.next(), filter);
  IOUtils.closeQuietly(is);
  IOUtils.closeQuietly(reader);

  return filter;
}

Source File: BloomFilterDumper.java From hiped2 with Apache License 2.0

5 votes

public static BloomFilter readFromAvro(InputStream is) throws IOException {
  DataFileStream<Object> reader =
      new DataFileStream<Object>(
          is, new GenericDatumReader<Object>());

  reader.hasNext();
  BloomFilter filter = new BloomFilter();
  AvroBytesRecord
      .fromGenericRecord((GenericRecord) reader.next(), filter);
  IOUtils.closeQuietly(is);
  IOUtils.closeQuietly(reader);

  return filter;
}

Source File: RegressionAdmmTrain.java From ml-ease with Apache License 2.0

5 votes

private void computeU(JobConf conf, String uPath, String uplusxPath, Map<String, LinearModel> z) throws IOException
{
  AvroHdfsFileWriter<GenericRecord> writer =
      new AvroHdfsFileWriter<GenericRecord>(conf, uPath, LinearModelAvro.SCHEMA$);
  DataFileWriter<GenericRecord> recordwriter = writer.get();
  // read u+x
  for (Path path : Util.findPartFiles(conf, new Path(uplusxPath)))
  {
    DataFileStream<Object> stream = AvroUtils.getAvroDataStream(conf, path);
    while (stream.hasNext())
    {
      GenericData.Record record = (GenericData.Record) stream.next();
      String partitionID = Util.getStringAvro(record, "key", false);
      if (record.get("uplusx") != null)
      {
        String lambda = Util.getLambda(partitionID);
        LinearModel newu =
            new LinearModel(LibLinearDataset.INTERCEPT_NAME, (List<?>) record.get("uplusx"));
        newu.linearCombine(1.0, -1.0, z.get(lambda));
        GenericData.Record newvaluemap =
            new GenericData.Record(LinearModelAvro.SCHEMA$);
        List modellist = newu.toAvro(LibLinearDataset.INTERCEPT_NAME);
        newvaluemap.put("key", partitionID);
        newvaluemap.put("model", modellist);
        recordwriter.append(newvaluemap);
      }
    }
  }
  recordwriter.close();
}

Source File: AvroFileReader.java From ml-ease with Apache License 2.0

5 votes

public <T> void build(String filePath, AvroConsumer<T> builder) throws IOException
{
  
  List<Path> paths = getPaths(filePath);

  for (Path path: paths)
  {
    DataFileStream<Object> stream = null;
    try
    {
      stream = getAvroDataStream(path);
      while (stream.hasNext())
      {
        builder.consume(stream.next());
      }
    }
    finally
    {
      if (stream != null)
      {
        stream.close();
      }
    }
  }
  
  builder.done();
}

Source File: WholeFileTransformerProcessor.java From datacollector with Apache License 2.0

4 votes

/**
 * Convert Avro record to Parquet
 * @param sourceFileName the source Avro file name
 * @param fileReader the {@link org.apache.avro.file.DataFileStream} Avro file reader
 * @param tempParquetFile the {@link java.nio.file.Path} temporary parquet file path
 */
private void writeParquet(String sourceFileName, DataFileStream<GenericRecord> fileReader, Path tempParquetFile) throws StageException {
  long recordCount = 0;
  GenericRecord avroRecord;
  Schema schema = fileReader.getSchema();

  LOG.debug("Start reading input file : {}", sourceFileName);
  try {
    // initialize parquet writer
    Configuration jobConfiguration = new Configuration();
    String compressionCodecName = compressionElEval.eval(variables, jobConfig.avroParquetConfig.compressionCodec, String.class);
    jobConfiguration.set(AvroParquetConstants.COMPRESSION_CODEC_NAME, compressionCodecName);
    jobConfiguration.setInt(AvroParquetConstants.ROW_GROUP_SIZE, jobConfig.avroParquetConfig.rowGroupSize);
    jobConfiguration.setInt(AvroParquetConstants.PAGE_SIZE, jobConfig.avroParquetConfig.pageSize);
    jobConfiguration.setInt(AvroParquetConstants.DICTIONARY_PAGE_SIZE, jobConfig.avroParquetConfig.dictionaryPageSize);
    jobConfiguration.setInt(AvroParquetConstants.MAX_PADDING_SIZE, jobConfig.avroParquetConfig.maxPaddingSize);

    // Parquet writer
    ParquetWriter.Builder builder = AvroToParquetConverterUtil.initializeWriter(
        new org.apache.hadoop.fs.Path(tempParquetFile.toString()),
        schema,
        jobConfiguration
    );
    parquetWriter = builder.build();

    while (fileReader.hasNext()) {
      avroRecord = fileReader.next();
      parquetWriter.write(avroRecord);
      recordCount++;
    }
    parquetWriter.close();

  } catch (IOException ex) {
    throw new TransformerStageCheckedException(
        Errors.CONVERT_08,
        sourceFileName,
        recordCount,
        ex
    );
  }
  LOG.debug("Finished writing {} records to {}", recordCount, tempParquetFile.getFileName());
}

Java Code Examples for org.apache.avro.file.DataFileStream#hasNext()