Java Code Examples for org.apache.flume.channel.MemoryChannel#put()

The following examples show how to use org.apache.flume.channel.MemoryChannel#put() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestHDFSEventSinkOnMiniCluster.java From mt-flume with Apache License 2.0

4 votes

/**
 * This is a very basic test that writes one event to HDFS and reads it back.
 */
@Test
public void simpleHDFSTest() throws EventDeliveryException, IOException {
  cluster = new MiniDFSCluster(new Configuration(), 1, true, null);
  cluster.waitActive();

  String outputDir = "/flume/simpleHDFSTest";
  Path outputDirPath = new Path(outputDir);

  logger.info("Running test with output dir: {}", outputDir);

  FileSystem fs = cluster.getFileSystem();
  // ensure output directory is empty
  if (fs.exists(outputDirPath)) {
    fs.delete(outputDirPath, true);
  }

  String nnURL = getNameNodeURL(cluster);
  logger.info("Namenode address: {}", nnURL);

  Context chanCtx = new Context();
  MemoryChannel channel = new MemoryChannel();
  channel.setName("simpleHDFSTest-mem-chan");
  channel.configure(chanCtx);
  channel.start();

  Context sinkCtx = new Context();
  sinkCtx.put("hdfs.path", nnURL + outputDir);
  sinkCtx.put("hdfs.fileType", HDFSWriterFactory.DataStreamType);
  sinkCtx.put("hdfs.batchSize", Integer.toString(1));

  HDFSEventSink sink = new HDFSEventSink();
  sink.setName("simpleHDFSTest-hdfs-sink");
  sink.configure(sinkCtx);
  sink.setChannel(channel);
  sink.start();

  // create an event
  String EVENT_BODY = "yarg!";
  channel.getTransaction().begin();
  try {
    channel.put(EventBuilder.withBody(EVENT_BODY, Charsets.UTF_8));
    channel.getTransaction().commit();
  } finally {
    channel.getTransaction().close();
  }

  // store event to HDFS
  sink.process();

  // shut down flume
  sink.stop();
  channel.stop();

  // verify that it's in HDFS and that its content is what we say it should be
  FileStatus[] statuses = fs.listStatus(outputDirPath);
  Assert.assertNotNull("No files found written to HDFS", statuses);
  Assert.assertEquals("Only one file expected", 1, statuses.length);

  for (FileStatus status : statuses) {
    Path filePath = status.getPath();
    logger.info("Found file on DFS: {}", filePath);
    FSDataInputStream stream = fs.open(filePath);
    BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
    String line = reader.readLine();
    logger.info("First line in file {}: {}", filePath, line);
    Assert.assertEquals(EVENT_BODY, line);
  }

  if (!KEEP_DATA) {
    fs.delete(outputDirPath, true);
  }

  cluster.shutdown();
  cluster = null;
}

Example 2

Source File: TestHDFSEventSinkOnMiniCluster.java From mt-flume with Apache License 2.0

4 votes

/**
 * Writes two events in GZIP-compressed serialize.
 */
@Test
public void simpleHDFSGZipCompressedTest() throws EventDeliveryException, IOException {
  cluster = new MiniDFSCluster(new Configuration(), 1, true, null);
  cluster.waitActive();

  String outputDir = "/flume/simpleHDFSGZipCompressedTest";
  Path outputDirPath = new Path(outputDir);

  logger.info("Running test with output dir: {}", outputDir);

  FileSystem fs = cluster.getFileSystem();
  // ensure output directory is empty
  if (fs.exists(outputDirPath)) {
    fs.delete(outputDirPath, true);
  }

  String nnURL = getNameNodeURL(cluster);
  logger.info("Namenode address: {}", nnURL);

  Context chanCtx = new Context();
  MemoryChannel channel = new MemoryChannel();
  channel.setName("simpleHDFSTest-mem-chan");
  channel.configure(chanCtx);
  channel.start();

  Context sinkCtx = new Context();
  sinkCtx.put("hdfs.path", nnURL + outputDir);
  sinkCtx.put("hdfs.fileType", HDFSWriterFactory.CompStreamType);
  sinkCtx.put("hdfs.batchSize", Integer.toString(1));
  sinkCtx.put("hdfs.codeC", "gzip");

  HDFSEventSink sink = new HDFSEventSink();
  sink.setName("simpleHDFSTest-hdfs-sink");
  sink.configure(sinkCtx);
  sink.setChannel(channel);
  sink.start();

  // create an event
  String EVENT_BODY_1 = "yarg1";
  String EVENT_BODY_2 = "yarg2";
  channel.getTransaction().begin();
  try {
    channel.put(EventBuilder.withBody(EVENT_BODY_1, Charsets.UTF_8));
    channel.put(EventBuilder.withBody(EVENT_BODY_2, Charsets.UTF_8));
    channel.getTransaction().commit();
  } finally {
    channel.getTransaction().close();
  }

  // store event to HDFS
  sink.process();

  // shut down flume
  sink.stop();
  channel.stop();

  // verify that it's in HDFS and that its content is what we say it should be
  FileStatus[] statuses = fs.listStatus(outputDirPath);
  Assert.assertNotNull("No files found written to HDFS", statuses);
  Assert.assertEquals("Only one file expected", 1, statuses.length);

  for (FileStatus status : statuses) {
    Path filePath = status.getPath();
    logger.info("Found file on DFS: {}", filePath);
    FSDataInputStream stream = fs.open(filePath);
    BufferedReader reader = new BufferedReader(new InputStreamReader(
        new GZIPInputStream(stream)));
    String line = reader.readLine();
    logger.info("First line in file {}: {}", filePath, line);
    Assert.assertEquals(EVENT_BODY_1, line);

    // The rest of this test is commented-out (will fail) for 2 reasons:
    //
    // (1) At the time of this writing, Hadoop has a bug which causes the
    // non-native gzip implementation to create invalid gzip files when
    // finish() and resetState() are called. See HADOOP-8522.
    //
    // (2) Even if HADOOP-8522 is fixed, the JDK GZipInputStream is unable
    // to read multi-member (concatenated) gzip files. See this Sun bug:
    // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4691425
    //
    //line = reader.readLine();
    //logger.info("Second line in file {}: {}", filePath, line);
    //Assert.assertEquals(EVENT_BODY_2, line);
  }

  if (!KEEP_DATA) {
    fs.delete(outputDirPath, true);
  }

  cluster.shutdown();
  cluster = null;
}

Example 3

Source File: TestHDFSEventSink.java From mt-flume with Apache License 2.0

4 votes

/**
 * Ensure that when a write throws an IOException we are
 * able to continue to progress in the next process() call.
 * This relies on Transactional rollback semantics for durability and
 * the behavior of the BucketWriter class of close()ing upon IOException.
 */
@Test
public void testCloseReopen() throws InterruptedException,
    LifecycleException, EventDeliveryException, IOException {

  LOG.debug("Starting...");
  final int numBatches = 4;
  final String fileName = "FlumeData";
  final long rollCount = 5;
  final long batchSize = 2;
  String newPath = testPath + "/singleBucket";
  int i = 1, j = 1;

  HDFSBadWriterFactory badWriterFactory = new HDFSBadWriterFactory();
  sink = new HDFSEventSink(badWriterFactory);

  // clear the test directory
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  Path dirPath = new Path(newPath);
  fs.delete(dirPath, true);
  fs.mkdirs(dirPath);

  Context context = new Context();

  context.put("hdfs.path", newPath);
  context.put("hdfs.filePrefix", fileName);
  context.put("hdfs.rollCount", String.valueOf(rollCount));
  context.put("hdfs.batchSize", String.valueOf(batchSize));
  context.put("hdfs.fileType", HDFSBadWriterFactory.BadSequenceFileType);

  Configurables.configure(sink, context);

  MemoryChannel channel = new MemoryChannel();
  Configurables.configure(channel, new Context());

  sink.setChannel(channel);
  sink.start();

  Calendar eventDate = Calendar.getInstance();
  List<String> bodies = Lists.newArrayList();
  // push the event batches into channel
  for (i = 1; i < numBatches; i++) {
    channel.getTransaction().begin();
    try {
      for (j = 1; j <= batchSize; j++) {
        Event event = new SimpleEvent();
        eventDate.clear();
        eventDate.set(2011, i, i, i, 0); // yy mm dd
        event.getHeaders().put("timestamp",
            String.valueOf(eventDate.getTimeInMillis()));
        event.getHeaders().put("hostname", "Host" + i);
        String body = "Test." + i + "." + j;
        event.setBody(body.getBytes());
        bodies.add(body);
        // inject fault
        event.getHeaders().put("fault-until-reopen", "");
        channel.put(event);
      }
      channel.getTransaction().commit();
    } finally {
      channel.getTransaction().close();
    }
    LOG.info("execute sink to process the events: " + sink.process());
  }
  LOG.info("clear any events pending due to errors: " + sink.process());
  sink.stop();

  verifyOutputSequenceFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies);
}