Java Code Examples for org.apache.flume.Event#setBody()
The following examples show how to use
org.apache.flume.Event#setBody() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GenerateSearchAnalyticsDataImpl.java From searchanalytics-bigdata with MIT License | 6 votes |
public Event getJsonEvent( final SearchQueryInstruction searchQueryInstruction) throws JsonProcessingException { final String searchQueryInstructionAsString = getObjectMapper() .writeValueAsString(searchQueryInstruction); // String writeValueAsString = // mapper.writerWithDefaultPrettyPrinter().writeValueAsString(searchQueryInstruction); searchEventsLogger.info(searchQueryInstructionAsString); final Event event = new JSONEvent(); event.setBody(searchQueryInstructionAsString.getBytes()); final Map<String, String> headers = new HashMap<String, String>(); headers.put("eventId", searchQueryInstruction.getEventIdSuffix()); headers.put("timestamp", searchQueryInstruction .getCreatedTimeStampInMillis().toString()); if (searchQueryInstruction.getClickedDocId() != null) { if (searchQueryInstruction.getFavourite() != null && searchQueryInstruction.getFavourite()) { headers.put("State", "FAVOURITE"); } else { headers.put("State", "VIEWED"); } } event.setHeaders(headers); return event; }
Example 2
Source File: ActivityJsonToAvroInterceptor.java From big-data-lite with MIT License | 5 votes |
/** * Adds the Schema details to the event. * Also converts the Json body to an encoded Avro record */ @Override public Event intercept(Event event) { Map<String, String> headers = event.getHeaders(); // Add schema spec to header if (!headers.containsKey(key)) headers.put(key, value); try { // Alter the body. Convert to Avro and encode. if (event.getBody().length == 0) return null; Activity record = getActivityRecord(event.getBody()); // Encode outputStream.reset(); datumWriter.write(record, encoder); encoder.flush(); // Set the event body event.setBody(outputStream.toByteArray()); } catch (Exception e) { logger.info("ERROR with JSON: " + event.getBody().toString()); return null; // swallow event } return event; }
Example 3
Source File: ElasticSearchJsonBodyEventSerializerTest.java From searchanalytics-bigdata with MIT License | 5 votes |
@Test public void testESJsonEventSerializer() throws IOException { final Event event = new JSONEvent(); final String writeValueAsString = "{\"hostedmachinename\":\"172.16.9.582\",\"pageurl\":\"http://blahblah:/1881\",\"customerid\":376,\"sessionid\":\"1eaa6cd1-0a71-4d03-aea4-d038921f5c6a\",\"querystring\":null,\"sortorder\":\"asc\",\"pagenumber\":0,\"totalhits\":39,\"hitsshown\":11,\"timestamp\":1397220014988,\"clickeddocid\":null,\"filters\":[{\"code\":\"specification_resolution\",\"value\":\"1024 x 600\"},{\"code\":\"searchfacettype_product_type_level_2\",\"value\":\"Laptops\"}]}"; event.setBody(writeValueAsString.getBytes()); final Map<String, String> headers = new HashMap<String, String>(); headers.put("eventId", UUID.randomUUID().toString()); event.setHeaders(headers); ((XContentBuilder) esSerializer.getContentBuilder(event)).string(); }
Example 4
Source File: EventBuilder.java From mt-flume with Apache License 2.0 | 5 votes |
/** * Instantiate an Event instance based on the provided body and headers. * If <code>headers</code> is <code>null</code>, then it is ignored. * @param body * @param headers * @return */ public static Event withBody(byte[] body, Map<String, String> headers) { Event event = new SimpleEvent(); if(body == null) { body = new byte[0]; } event.setBody(body); if (headers != null) { event.setHeaders(new HashMap<String, String>(headers)); } return event; }
Example 5
Source File: TestFormatSpeed.java From mt-flume with Apache License 2.0 | 5 votes |
@Before public void setUp(){ events = new ArrayList<Event>(); Event event = new SimpleEvent(); Map<String, String> headers = new HashMap<String, String>(); headers.put("category", "test"); event.setHeaders(headers); event.setBody("".getBytes()); for(int i = 0; i < 200000; i++){ events.add(event); } }
Example 6
Source File: TestHDFSEventSink.java From mt-flume with Apache License 2.0 | 4 votes |
@Test public void testCloseOnIdle() throws IOException, EventDeliveryException, InterruptedException { String hdfsPath = testPath + "/idleClose"; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path(hdfsPath); fs.delete(dirPath, true); fs.mkdirs(dirPath); Context context = new Context(); context.put("hdfs.path", hdfsPath); /* * All three rolling methods are disabled so the only * way a file can roll is through the idle timeout. */ context.put("hdfs.rollCount", "0"); context.put("hdfs.rollSize", "0"); context.put("hdfs.rollInterval", "0"); context.put("hdfs.batchSize", "2"); context.put("hdfs.idleTimeout", "1"); Configurables.configure(sink, context); Channel channel = new MemoryChannel(); Configurables.configure(channel, context); sink.setChannel(channel); sink.start(); Transaction txn = channel.getTransaction(); txn.begin(); for(int i=0; i < 10; i++) { Event event = new SimpleEvent(); event.setBody(("test event " + i).getBytes()); channel.put(event); } txn.commit(); txn.close(); sink.process(); sink.process(); Thread.sleep(1001); // previous file should have timed out now // this can throw an IOException(from the bucketWriter having idleClosed) // this is not an issue as the sink will retry and get a fresh bucketWriter // so long as the onIdleClose handler properly removes bucket writers that // were closed due to idling sink.process(); sink.process(); Thread.sleep(500); // shouldn't be enough for a timeout to occur sink.process(); sink.process(); sink.stop(); FileStatus[] dirStat = fs.listStatus(dirPath); Path[] fList = FileUtil.stat2Paths(dirStat); Assert.assertEquals("Incorrect content of the directory " + StringUtils.join(fList, ","), 2, fList.length); Assert.assertTrue(!fList[0].getName().endsWith(".tmp") && !fList[1].getName().endsWith(".tmp")); fs.close(); }
Example 7
Source File: AppendIPInterceptor.java From ehousechina with Apache License 2.0 | 4 votes |
public Event intercept(Event arg0) { String eventBody = new String(arg0.getBody(),Charsets.UTF_8); String fmt="%s - %s"; arg0.setBody(String.format(fmt, serviceId,eventBody).getBytes()); return arg0; }
Example 8
Source File: TestHDFSEventSink.java From mt-flume with Apache License 2.0 | 4 votes |
private void slowAppendTestHelper (long appendTimeout) throws InterruptedException, IOException, LifecycleException, EventDeliveryException, IOException { final String fileName = "FlumeData"; final long rollCount = 5; final long batchSize = 2; final int numBatches = 2; String newPath = testPath + "/singleBucket"; int totalEvents = 0; int i = 1, j = 1; // clear the test directory Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path(newPath); fs.delete(dirPath, true); fs.mkdirs(dirPath); // create HDFS sink with slow writer HDFSBadWriterFactory badWriterFactory = new HDFSBadWriterFactory(); sink = new HDFSEventSink(badWriterFactory); Context context = new Context(); context.put("hdfs.path", newPath); context.put("hdfs.filePrefix", fileName); context.put("hdfs.rollCount", String.valueOf(rollCount)); context.put("hdfs.batchSize", String.valueOf(batchSize)); context.put("hdfs.fileType", HDFSBadWriterFactory.BadSequenceFileType); context.put("hdfs.appendTimeout", String.valueOf(appendTimeout)); Configurables.configure(sink, context); Channel channel = new MemoryChannel(); Configurables.configure(channel, context); sink.setChannel(channel); sink.start(); Calendar eventDate = Calendar.getInstance(); List<String> bodies = Lists.newArrayList(); // push the event batches into channel for (i = 0; i < numBatches; i++) { Transaction txn = channel.getTransaction(); txn.begin(); for (j = 1; j <= batchSize; j++) { Event event = new SimpleEvent(); eventDate.clear(); eventDate.set(2011, i, i, i, 0); // yy mm dd event.getHeaders().put("timestamp", String.valueOf(eventDate.getTimeInMillis())); event.getHeaders().put("hostname", "Host" + i); event.getHeaders().put("slow", "1500"); String body = "Test." + i + "." + j; event.setBody(body.getBytes()); bodies.add(body); channel.put(event); totalEvents++; } txn.commit(); txn.close(); // execute sink to process the events sink.process(); } sink.stop(); // loop through all the files generated and check their contains FileStatus[] dirStat = fs.listStatus(dirPath); Path fList[] = FileUtil.stat2Paths(dirStat); // check that the roll happened correctly for the given data // Note that we'll end up with two files with only a head long expectedFiles = totalEvents / rollCount; if (totalEvents % rollCount > 0) expectedFiles++; Assert.assertEquals("num files wrong, found: " + Lists.newArrayList(fList), expectedFiles, fList.length); verifyOutputSequenceFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies); }
Example 9
Source File: TestHDFSEventSink.java From mt-flume with Apache License 2.0 | 4 votes |
@Test public void testSlowAppendFailure() throws InterruptedException, LifecycleException, EventDeliveryException, IOException { LOG.debug("Starting..."); final String fileName = "FlumeData"; final long rollCount = 5; final long batchSize = 2; final int numBatches = 2; String newPath = testPath + "/singleBucket"; int i = 1, j = 1; // clear the test directory Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path(newPath); fs.delete(dirPath, true); fs.mkdirs(dirPath); // create HDFS sink with slow writer HDFSBadWriterFactory badWriterFactory = new HDFSBadWriterFactory(); sink = new HDFSEventSink(badWriterFactory); Context context = new Context(); context.put("hdfs.path", newPath); context.put("hdfs.filePrefix", fileName); context.put("hdfs.rollCount", String.valueOf(rollCount)); context.put("hdfs.batchSize", String.valueOf(batchSize)); context.put("hdfs.fileType", HDFSBadWriterFactory.BadSequenceFileType); context.put("hdfs.callTimeout", Long.toString(1000)); Configurables.configure(sink, context); Channel channel = new MemoryChannel(); Configurables.configure(channel, context); sink.setChannel(channel); sink.start(); Calendar eventDate = Calendar.getInstance(); // push the event batches into channel for (i = 0; i < numBatches; i++) { Transaction txn = channel.getTransaction(); txn.begin(); for (j = 1; j <= batchSize; j++) { Event event = new SimpleEvent(); eventDate.clear(); eventDate.set(2011, i, i, i, 0); // yy mm dd event.getHeaders().put("timestamp", String.valueOf(eventDate.getTimeInMillis())); event.getHeaders().put("hostname", "Host" + i); event.getHeaders().put("slow", "1500"); event.setBody(("Test." + i + "." + j).getBytes()); channel.put(event); } txn.commit(); txn.close(); // execute sink to process the events Status satus = sink.process(); // verify that the append returned backoff due to timeotu Assert.assertEquals(satus, Status.BACKOFF); } sink.stop(); }
Example 10
Source File: TestHDFSEventSink.java From mt-flume with Apache License 2.0 | 4 votes |
/** * Ensure that when a write throws an IOException we are * able to continue to progress in the next process() call. * This relies on Transactional rollback semantics for durability and * the behavior of the BucketWriter class of close()ing upon IOException. */ @Test public void testCloseReopen() throws InterruptedException, LifecycleException, EventDeliveryException, IOException { LOG.debug("Starting..."); final int numBatches = 4; final String fileName = "FlumeData"; final long rollCount = 5; final long batchSize = 2; String newPath = testPath + "/singleBucket"; int i = 1, j = 1; HDFSBadWriterFactory badWriterFactory = new HDFSBadWriterFactory(); sink = new HDFSEventSink(badWriterFactory); // clear the test directory Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path(newPath); fs.delete(dirPath, true); fs.mkdirs(dirPath); Context context = new Context(); context.put("hdfs.path", newPath); context.put("hdfs.filePrefix", fileName); context.put("hdfs.rollCount", String.valueOf(rollCount)); context.put("hdfs.batchSize", String.valueOf(batchSize)); context.put("hdfs.fileType", HDFSBadWriterFactory.BadSequenceFileType); Configurables.configure(sink, context); MemoryChannel channel = new MemoryChannel(); Configurables.configure(channel, new Context()); sink.setChannel(channel); sink.start(); Calendar eventDate = Calendar.getInstance(); List<String> bodies = Lists.newArrayList(); // push the event batches into channel for (i = 1; i < numBatches; i++) { channel.getTransaction().begin(); try { for (j = 1; j <= batchSize; j++) { Event event = new SimpleEvent(); eventDate.clear(); eventDate.set(2011, i, i, i, 0); // yy mm dd event.getHeaders().put("timestamp", String.valueOf(eventDate.getTimeInMillis())); event.getHeaders().put("hostname", "Host" + i); String body = "Test." + i + "." + j; event.setBody(body.getBytes()); bodies.add(body); // inject fault event.getHeaders().put("fault-until-reopen", ""); channel.put(event); } channel.getTransaction().commit(); } finally { channel.getTransaction().close(); } LOG.info("execute sink to process the events: " + sink.process()); } LOG.info("clear any events pending due to errors: " + sink.process()); sink.stop(); verifyOutputSequenceFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies); }
Example 11
Source File: TestHDFSEventSink.java From mt-flume with Apache License 2.0 | 4 votes |
@Test public void testBadSimpleAppend() throws InterruptedException, LifecycleException, EventDeliveryException, IOException { LOG.debug("Starting..."); final String fileName = "FlumeData"; final long rollCount = 5; final long batchSize = 2; final int numBatches = 4; String newPath = testPath + "/singleBucket"; int totalEvents = 0; int i = 1, j = 1; HDFSBadWriterFactory badWriterFactory = new HDFSBadWriterFactory(); sink = new HDFSEventSink(badWriterFactory); // clear the test directory Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path(newPath); fs.delete(dirPath, true); fs.mkdirs(dirPath); Context context = new Context(); context.put("hdfs.path", newPath); context.put("hdfs.filePrefix", fileName); context.put("hdfs.rollCount", String.valueOf(rollCount)); context.put("hdfs.batchSize", String.valueOf(batchSize)); context.put("hdfs.fileType", HDFSBadWriterFactory.BadSequenceFileType); Configurables.configure(sink, context); Channel channel = new MemoryChannel(); Configurables.configure(channel, context); sink.setChannel(channel); sink.start(); Calendar eventDate = Calendar.getInstance(); List<String> bodies = Lists.newArrayList(); // push the event batches into channel for (i = 1; i < numBatches; i++) { Transaction txn = channel.getTransaction(); txn.begin(); for (j = 1; j <= batchSize; j++) { Event event = new SimpleEvent(); eventDate.clear(); eventDate.set(2011, i, i, i, 0); // yy mm dd event.getHeaders().put("timestamp", String.valueOf(eventDate.getTimeInMillis())); event.getHeaders().put("hostname", "Host" + i); String body = "Test." + i + "." + j; event.setBody(body.getBytes()); bodies.add(body); // inject fault if ((totalEvents % 30) == 1) { event.getHeaders().put("fault-once", ""); } channel.put(event); totalEvents++; } txn.commit(); txn.close(); LOG.info("Process events: " + sink.process()); } LOG.info("Process events to end of transaction max: " + sink.process()); LOG.info("Process events to injected fault: " + sink.process()); LOG.info("Process events remaining events: " + sink.process()); sink.stop(); verifyOutputSequenceFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies); }
Example 12
Source File: TestHDFSEventSink.java From mt-flume with Apache License 2.0 | 4 votes |
@Test public void testAppend() throws InterruptedException, LifecycleException, EventDeliveryException, IOException { LOG.debug("Starting..."); final long rollCount = 3; final long batchSize = 2; final String fileName = "FlumeData"; // clear the test directory Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path(testPath); fs.delete(dirPath, true); fs.mkdirs(dirPath); Context context = new Context(); context.put("hdfs.path", testPath + "/%Y-%m-%d/%H"); context.put("hdfs.timeZone", "UTC"); context.put("hdfs.filePrefix", fileName); context.put("hdfs.rollCount", String.valueOf(rollCount)); context.put("hdfs.batchSize", String.valueOf(batchSize)); Configurables.configure(sink, context); Channel channel = new MemoryChannel(); Configurables.configure(channel, context); sink.setChannel(channel); sink.start(); Calendar eventDate = Calendar.getInstance(); List<String> bodies = Lists.newArrayList(); // push the event batches into channel for (int i = 1; i < 4; i++) { Transaction txn = channel.getTransaction(); txn.begin(); for (int j = 1; j <= batchSize; j++) { Event event = new SimpleEvent(); eventDate.clear(); eventDate.set(2011, i, i, i, 0); // yy mm dd event.getHeaders().put("timestamp", String.valueOf(eventDate.getTimeInMillis())); event.getHeaders().put("hostname", "Host" + i); String body = "Test." + i + "." + j; event.setBody(body.getBytes()); bodies.add(body); channel.put(event); } txn.commit(); txn.close(); // execute sink to process the events sink.process(); } sink.stop(); verifyOutputSequenceFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies); }
Example 13
Source File: TestHDFSEventSink.java From mt-flume with Apache License 2.0 | 4 votes |
@Test public void testSimpleAppendLocalTime() throws InterruptedException, LifecycleException, EventDeliveryException, IOException { final long currentTime = System.currentTimeMillis(); Clock clk = new Clock() { @Override public long currentTimeMillis() { return currentTime; } }; LOG.debug("Starting..."); final String fileName = "FlumeData"; final long rollCount = 5; final long batchSize = 2; final int numBatches = 4; String newPath = testPath + "/singleBucket/%s" ; String expectedPath = testPath + "/singleBucket/" + String.valueOf(currentTime/1000); int totalEvents = 0; int i = 1, j = 1; // clear the test directory Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path(expectedPath); fs.delete(dirPath, true); fs.mkdirs(dirPath); Context context = new Context(); context.put("hdfs.path", newPath); context.put("hdfs.filePrefix", fileName); context.put("hdfs.rollCount", String.valueOf(rollCount)); context.put("hdfs.batchSize", String.valueOf(batchSize)); context.put("hdfs.useLocalTimeStamp", String.valueOf(true)); Configurables.configure(sink, context); Channel channel = new MemoryChannel(); Configurables.configure(channel, context); sink.setChannel(channel); sink.setBucketClock(clk); sink.start(); Calendar eventDate = Calendar.getInstance(); List<String> bodies = Lists.newArrayList(); // push the event batches into channel for (i = 1; i < numBatches; i++) { Transaction txn = channel.getTransaction(); txn.begin(); for (j = 1; j <= batchSize; j++) { Event event = new SimpleEvent(); eventDate.clear(); eventDate.set(2011, i, i, i, 0); // yy mm dd event.getHeaders().put("timestamp", String.valueOf(eventDate.getTimeInMillis())); event.getHeaders().put("hostname", "Host" + i); String body = "Test." + i + "." + j; event.setBody(body.getBytes()); bodies.add(body); channel.put(event); totalEvents++; } txn.commit(); txn.close(); // execute sink to process the events sink.process(); } sink.stop(); // loop through all the files generated and check their contains FileStatus[] dirStat = fs.listStatus(dirPath); Path fList[] = FileUtil.stat2Paths(dirStat); // check that the roll happened correctly for the given data long expectedFiles = totalEvents / rollCount; if (totalEvents % rollCount > 0) expectedFiles++; Assert.assertEquals("num files wrong, found: " + Lists.newArrayList(fList), expectedFiles, fList.length); verifyOutputSequenceFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies); // The clock in bucketpath is static, so restore the real clock sink.setBucketClock(new SystemClock()); }
Example 14
Source File: TestHDFSEventSink.java From mt-flume with Apache License 2.0 | 4 votes |
@Test public void testSimpleAppend() throws InterruptedException, LifecycleException, EventDeliveryException, IOException { LOG.debug("Starting..."); final String fileName = "FlumeData"; final long rollCount = 5; final long batchSize = 2; final int numBatches = 4; String newPath = testPath + "/singleBucket"; int totalEvents = 0; int i = 1, j = 1; // clear the test directory Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path(newPath); fs.delete(dirPath, true); fs.mkdirs(dirPath); Context context = new Context(); context.put("hdfs.path", newPath); context.put("hdfs.filePrefix", fileName); context.put("hdfs.rollCount", String.valueOf(rollCount)); context.put("hdfs.batchSize", String.valueOf(batchSize)); Configurables.configure(sink, context); Channel channel = new MemoryChannel(); Configurables.configure(channel, context); sink.setChannel(channel); sink.start(); Calendar eventDate = Calendar.getInstance(); List<String> bodies = Lists.newArrayList(); // push the event batches into channel for (i = 1; i < numBatches; i++) { Transaction txn = channel.getTransaction(); txn.begin(); for (j = 1; j <= batchSize; j++) { Event event = new SimpleEvent(); eventDate.clear(); eventDate.set(2011, i, i, i, 0); // yy mm dd event.getHeaders().put("timestamp", String.valueOf(eventDate.getTimeInMillis())); event.getHeaders().put("hostname", "Host" + i); String body = "Test." + i + "." + j; event.setBody(body.getBytes()); bodies.add(body); channel.put(event); totalEvents++; } txn.commit(); txn.close(); // execute sink to process the events sink.process(); } sink.stop(); // loop through all the files generated and check their contains FileStatus[] dirStat = fs.listStatus(dirPath); Path fList[] = FileUtil.stat2Paths(dirStat); // check that the roll happened correctly for the given data long expectedFiles = totalEvents / rollCount; if (totalEvents % rollCount > 0) expectedFiles++; Assert.assertEquals("num files wrong, found: " + Lists.newArrayList(fList), expectedFiles, fList.length); verifyOutputSequenceFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies); }
Example 15
Source File: TestHDFSEventSink.java From mt-flume with Apache License 2.0 | 4 votes |
@Test public void testAvroAppend() throws InterruptedException, LifecycleException, EventDeliveryException, IOException { LOG.debug("Starting..."); final long rollCount = 3; final long batchSize = 2; final String fileName = "FlumeData"; String newPath = testPath + "/singleTextBucket"; int totalEvents = 0; int i = 1, j = 1; // clear the test directory Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path(newPath); fs.delete(dirPath, true); fs.mkdirs(dirPath); Context context = new Context(); // context.put("hdfs.path", testPath + "/%Y-%m-%d/%H"); context.put("hdfs.path", newPath); context.put("hdfs.filePrefix", fileName); context.put("hdfs.rollCount", String.valueOf(rollCount)); context.put("hdfs.batchSize", String.valueOf(batchSize)); context.put("hdfs.writeFormat", "Text"); context.put("hdfs.fileType", "DataStream"); context.put("serializer", "AVRO_EVENT"); Configurables.configure(sink, context); Channel channel = new MemoryChannel(); Configurables.configure(channel, context); sink.setChannel(channel); sink.start(); Calendar eventDate = Calendar.getInstance(); List<String> bodies = Lists.newArrayList(); // push the event batches into channel for (i = 1; i < 4; i++) { Transaction txn = channel.getTransaction(); txn.begin(); for (j = 1; j <= batchSize; j++) { Event event = new SimpleEvent(); eventDate.clear(); eventDate.set(2011, i, i, i, 0); // yy mm dd event.getHeaders().put("timestamp", String.valueOf(eventDate.getTimeInMillis())); event.getHeaders().put("hostname", "Host" + i); String body = "Test." + i + "." + j; event.setBody(body.getBytes()); bodies.add(body); channel.put(event); totalEvents++; } txn.commit(); txn.close(); // execute sink to process the events sink.process(); } sink.stop(); // loop through all the files generated and check their contains FileStatus[] dirStat = fs.listStatus(dirPath); Path fList[] = FileUtil.stat2Paths(dirStat); // check that the roll happened correctly for the given data long expectedFiles = totalEvents / rollCount; if (totalEvents % rollCount > 0) expectedFiles++; Assert.assertEquals("num files wrong, found: " + Lists.newArrayList(fList), expectedFiles, fList.length); verifyOutputAvroFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies); }
Example 16
Source File: TestHDFSEventSink.java From mt-flume with Apache License 2.0 | 4 votes |
@Test public void testTextAppend() throws InterruptedException, LifecycleException, EventDeliveryException, IOException { LOG.debug("Starting..."); final long rollCount = 3; final long batchSize = 2; final String fileName = "FlumeData"; String newPath = testPath + "/singleTextBucket"; int totalEvents = 0; int i = 1, j = 1; // clear the test directory Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path(newPath); fs.delete(dirPath, true); fs.mkdirs(dirPath); Context context = new Context(); // context.put("hdfs.path", testPath + "/%Y-%m-%d/%H"); context.put("hdfs.path", newPath); context.put("hdfs.filePrefix", fileName); context.put("hdfs.rollCount", String.valueOf(rollCount)); context.put("hdfs.batchSize", String.valueOf(batchSize)); context.put("hdfs.writeFormat", "Text"); context.put("hdfs.fileType", "DataStream"); Configurables.configure(sink, context); Channel channel = new MemoryChannel(); Configurables.configure(channel, context); sink.setChannel(channel); sink.start(); Calendar eventDate = Calendar.getInstance(); List<String> bodies = Lists.newArrayList(); // push the event batches into channel for (i = 1; i < 4; i++) { Transaction txn = channel.getTransaction(); txn.begin(); for (j = 1; j <= batchSize; j++) { Event event = new SimpleEvent(); eventDate.clear(); eventDate.set(2011, i, i, i, 0); // yy mm dd event.getHeaders().put("timestamp", String.valueOf(eventDate.getTimeInMillis())); event.getHeaders().put("hostname", "Host" + i); String body = "Test." + i + "." + j; event.setBody(body.getBytes()); bodies.add(body); channel.put(event); totalEvents++; } txn.commit(); txn.close(); // execute sink to process the events sink.process(); } sink.stop(); // loop through all the files generated and check their contains FileStatus[] dirStat = fs.listStatus(dirPath); Path fList[] = FileUtil.stat2Paths(dirStat); // check that the roll happened correctly for the given data long expectedFiles = totalEvents / rollCount; if (totalEvents % rollCount > 0) expectedFiles++; Assert.assertEquals("num files wrong, found: " + Lists.newArrayList(fList), expectedFiles, fList.length); verifyOutputTextFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies); }
Example 17
Source File: TestHDFSEventSink.java From mt-flume with Apache License 2.0 | 4 votes |
public void doTestTextBatchAppend(boolean useRawLocalFileSystem) throws Exception { LOG.debug("Starting..."); final long rollCount = 10; final long batchSize = 2; final String fileName = "FlumeData"; String newPath = testPath + "/singleTextBucket"; int totalEvents = 0; int i = 1, j = 1; // clear the test directory Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path(newPath); fs.delete(dirPath, true); fs.mkdirs(dirPath); Context context = new Context(); // context.put("hdfs.path", testPath + "/%Y-%m-%d/%H"); context.put("hdfs.path", newPath); context.put("hdfs.filePrefix", fileName); context.put("hdfs.rollCount", String.valueOf(rollCount)); context.put("hdfs.rollInterval", "0"); context.put("hdfs.rollSize", "0"); context.put("hdfs.batchSize", String.valueOf(batchSize)); context.put("hdfs.writeFormat", "Text"); context.put("hdfs.useRawLocalFileSystem", Boolean.toString(useRawLocalFileSystem)); context.put("hdfs.fileType", "DataStream"); Configurables.configure(sink, context); Channel channel = new MemoryChannel(); Configurables.configure(channel, context); sink.setChannel(channel); sink.start(); Calendar eventDate = Calendar.getInstance(); List<String> bodies = Lists.newArrayList(); // push the event batches into channel to roll twice for (i = 1; i <= (rollCount*10)/batchSize; i++) { Transaction txn = channel.getTransaction(); txn.begin(); for (j = 1; j <= batchSize; j++) { Event event = new SimpleEvent(); eventDate.clear(); eventDate.set(2011, i, i, i, 0); // yy mm dd String body = "Test." + i + "." + j; event.setBody(body.getBytes()); bodies.add(body); channel.put(event); totalEvents++; } txn.commit(); txn.close(); // execute sink to process the events sink.process(); } sink.stop(); // loop through all the files generated and check their contains FileStatus[] dirStat = fs.listStatus(dirPath); Path fList[] = FileUtil.stat2Paths(dirStat); // check that the roll happened correctly for the given data long expectedFiles = totalEvents / rollCount; if (totalEvents % rollCount > 0) expectedFiles++; Assert.assertEquals("num files wrong, found: " + Lists.newArrayList(fList), expectedFiles, fList.length); // check the contents of the all files verifyOutputTextFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies); }
Example 18
Source File: TestRollingFileSink.java From mt-flume with Apache License 2.0 | 4 votes |
@Test public void testAppend() throws InterruptedException, LifecycleException, EventDeliveryException, IOException { Context context = new Context(); context.put("sink.directory", tmpDir.getPath()); context.put("sink.rollInterval", "1"); context.put("sink.batchSize", "1"); Configurables.configure(sink, context); Channel channel = new PseudoTxnMemoryChannel(); Configurables.configure(channel, context); sink.setChannel(channel); sink.start(); for (int i = 0; i < 10; i++) { Event event = new SimpleEvent(); event.setBody(("Test event " + i).getBytes()); channel.put(event); sink.process(); Thread.sleep(500); } sink.stop(); for (String file : sink.getDirectory().list()) { BufferedReader reader = new BufferedReader(new FileReader(new File( sink.getDirectory(), file))); String lastLine = null; String currentLine = null; while ((currentLine = reader.readLine()) != null) { lastLine = currentLine; } logger.debug("Produced file:{} lastLine:{}", file, lastLine); reader.close(); } }
Example 19
Source File: TestRollingFileSink.java From mt-flume with Apache License 2.0 | 2 votes |
@Test public void testAppend2() throws InterruptedException, LifecycleException, EventDeliveryException, IOException { Context context = new Context(); context.put("sink.directory", tmpDir.getPath()); context.put("sink.rollInterval", "0"); context.put("sink.batchSize", "1"); Configurables.configure(sink, context); Channel channel = new PseudoTxnMemoryChannel(); Configurables.configure(channel, context); sink.setChannel(channel); sink.start(); for (int i = 0; i < 10; i++) { Event event = new SimpleEvent(); event.setBody(("Test event " + i).getBytes()); channel.put(event); sink.process(); Thread.sleep(500); } sink.stop(); for (String file : sink.getDirectory().list()) { BufferedReader reader = new BufferedReader(new FileReader(new File( sink.getDirectory(), file))); String lastLine = null; String currentLine = null; while ((currentLine = reader.readLine()) != null) { lastLine = currentLine; logger.debug("Produced file:{} lastLine:{}", file, lastLine); } reader.close(); } }