org.apache.hadoop.io.SequenceFile.Reader Java Examples

The following examples show how to use org.apache.hadoop.io.SequenceFile.Reader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CubeStatsReader.java    From kylin with Apache License 2.0 6 votes vote down vote up
public CubeStatsResult(Path path, int precision) throws IOException {
    Configuration hadoopConf = HadoopUtil.getCurrentConfiguration();
    Option seqInput = SequenceFile.Reader.file(path);
    try (Reader reader = new SequenceFile.Reader(hadoopConf, seqInput)) {
        LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf);
        BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf);
        while (reader.next(key, value)) {
            if (key.get() == 0L) {
                percentage = Bytes.toInt(value.getBytes());
            } else if (key.get() == -1) {
                mapperOverlapRatio = Bytes.toDouble(value.getBytes());
            } else if (key.get() == -2) {
                mapperNumber = Bytes.toInt(value.getBytes());
            } else if (key.get() == -3) {
                sourceRecordCount = Bytes.toLong(value.getBytes());
            } else if (key.get() > 0) {
                HLLCounter hll = new HLLCounter(precision);
                ByteArray byteArray = new ByteArray(value.getBytes());
                hll.readRegisters(byteArray.asBuffer());
                counterMap.put(key.get(), hll);
            }
        }
    }
}
 
Example #2
Source File: TestSequenceFileSerialization.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public void testJavaSerialization() throws Exception {
  Path file = new Path(System.getProperty("test.build.data",".") +
      "/test.seq");
  
  fs.delete(file, true);
  Writer writer = SequenceFile.createWriter(fs, conf, file, Long.class,
      String.class);
  
  writer.append(1L, "one");
  writer.append(2L, "two");
  
  writer.close();
  
  Reader reader = new Reader(fs, file, conf);
  assertEquals(1L, reader.next((Object) null));
  assertEquals("one", reader.getCurrentValue((Object) null));
  assertEquals(2L, reader.next((Object) null));
  assertEquals("two", reader.getCurrentValue((Object) null));
  assertNull(reader.next((Object) null));
  reader.close();
  
}
 
Example #3
Source File: TestSequenceFileSerialization.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void testJavaSerialization() throws Exception {
  Path file = new Path(System.getProperty("test.build.data",".") +
      "/test.seq");
  
  fs.delete(file, true);
  Writer writer = SequenceFile.createWriter(fs, conf, file, Long.class,
      String.class);
  
  writer.append(1L, "one");
  writer.append(2L, "two");
  
  writer.close();
  
  Reader reader = new Reader(fs, file, conf);
  assertEquals(1L, reader.next((Object) null));
  assertEquals("one", reader.getCurrentValue((Object) null));
  assertEquals(2L, reader.next((Object) null));
  assertEquals("two", reader.getCurrentValue((Object) null));
  assertNull(reader.next((Object) null));
  reader.close();
  
}
 
Example #4
Source File: SequenceFileReaderBuilder.java    From mrgeo with Apache License 2.0 6 votes vote down vote up
public Reader build() throws IOException
{
  when(sequenceFileReader.getKeyClass()).thenReturn(keyValueHelper.getKeyClass());
  when(sequenceFileReader.getValueClass()).thenReturn(keyValueHelper.getValueClass());

  when(sequenceFileReader.next(any(Writable.class), any(Writable.class))).thenAnswer(new Answer<Boolean>()
  {
    @Override
    public Boolean answer(InvocationOnMock invocationOnMock) throws Throwable
    {
      // Get the key and value
      Object[] args = invocationOnMock.getArguments();
      Writable key = (Writable) args[0];
      Writable value = (Writable) args[1];
      return keyValueHelper.next(key, value);
    }
  });

  return sequenceFileReader;
}
 
Example #5
Source File: BlurInputFormatTest.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
private void walkOutput(Path output, Configuration conf, ResultReader resultReader) throws IOException {
  FileSystem fileSystem = output.getFileSystem(conf);
  FileStatus fileStatus = fileSystem.getFileStatus(output);
  if (fileStatus.isDir()) {
    FileStatus[] listStatus = fileSystem.listStatus(output, new PathFilter() {
      @Override
      public boolean accept(Path path) {
        return !path.getName().startsWith("_");
      }
    });
    for (FileStatus fs : listStatus) {
      walkOutput(fs.getPath(), conf, resultReader);
    }
  } else {
    Reader reader = new SequenceFile.Reader(fileSystem, output, conf);
    Text rowId = new Text();
    TableBlurRecord tableBlurRecord = new TableBlurRecord();
    while (reader.next(rowId, tableBlurRecord)) {
      resultReader.read(rowId, tableBlurRecord);
    }
    reader.close();
  }
}
 
Example #6
Source File: SnapshotIndexDeletionPolicy.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
private void loadGenerations() throws IOException {
  FileSystem fileSystem = _path.getFileSystem(_configuration);
  FileStatus[] listStatus = fileSystem.listStatus(_path);
  SortedSet<FileStatus> existing = new TreeSet<FileStatus>(Arrays.asList(listStatus));
  if (existing.isEmpty()) {
    return;
  }
  FileStatus last = existing.last();
  Reader reader = new SequenceFile.Reader(fileSystem, last.getPath(), _configuration);
  Text key = new Text();
  LongWritable value = new LongWritable();
  while (reader.next(key, value)) {
    String name = key.toString();
    long gen = value.get();
    _namesToGenerations.put(name, gen);
    Set<String> names = _generationsToNames.get(gen);
    if (names == null) {
      names = Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
      _generationsToNames.put(gen, names);
    }
    names.add(name);
  }
  reader.close();
  existing.remove(last);
  cleanupOldFiles(fileSystem, existing);
}
 
Example #7
Source File: BaseCuboidMapperPerformanceTest.java    From Kylin with Apache License 2.0 6 votes vote down vote up
@Ignore("convenient trial tool for dev")
@Test
public void test() throws IOException, InterruptedException {
    Configuration hconf = new Configuration();
    BaseCuboidMapper mapper = new BaseCuboidMapper();
    Context context = MockupMapContext.create(hconf, metadataUrl, cubeName, null);

    mapper.setup(context);

    Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath));
    Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
    Text value = new Text();

    while (reader.next(key, value)) {
        mapper.map(key, value, context);
    }

    reader.close();
}
 
Example #8
Source File: HiveToBaseCuboidMapperPerformanceTest.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Ignore("convenient trial tool for dev")
@Test
public void test() throws IOException, InterruptedException {
    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    HiveToBaseCuboidMapper mapper = new HiveToBaseCuboidMapper();
    Context context = MockupMapContext.create(hconf, metadataUrl, cubeName, null);

    mapper.doSetup(context);

    Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath));
    Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
    Text value = new Text();

    while (reader.next(key, value)) {
        mapper.map(key, value, context);
    }

    reader.close();
}
 
Example #9
Source File: SequenceFileRead.java    From big-data-lite with MIT License 6 votes vote down vote up
public static void main(String[] args) throws IOException {
String uri = args[0];
       String split = args[1];
Configuration conf = new Configuration();
Path path = new Path(uri);
SequenceFile.Reader reader = null;
try {		
reader = new SequenceFile.Reader(conf, Reader.file(path));
       Text key = new Text();
       OrdImageWritable value = new OrdImageWritable();

       int num = 0;

while (reader.next(key, value)) {
           System.out.println(key.toString() + " " + value.getByteLength());
           ImageIO.write(value.getImage(), "jpg", new File("image" +split+"_" + num++ + ".jpg"));
}
} finally {
	IOUtils.closeStream(reader);
	}		
}
 
Example #10
Source File: TestSequenceFileSerialization.java    From big-c with Apache License 2.0 6 votes vote down vote up
public void testJavaSerialization() throws Exception {
  Path file = new Path(System.getProperty("test.build.data",".") +
      "/testseqser.seq");
  
  fs.delete(file, true);
  Writer writer = SequenceFile.createWriter(fs, conf, file, Long.class,
      String.class);
  
  writer.append(1L, "one");
  writer.append(2L, "two");
  
  writer.close();
  
  Reader reader = new Reader(fs, file, conf);
  assertEquals(1L, reader.next((Object) null));
  assertEquals("one", reader.getCurrentValue((Object) null));
  assertEquals(2L, reader.next((Object) null));
  assertEquals("two", reader.getCurrentValue((Object) null));
  assertNull(reader.next((Object) null));
  reader.close();
  
}
 
Example #11
Source File: DistCpV1.java    From big-c with Apache License 2.0 6 votes vote down vote up
static private void finalize(Configuration conf, JobConf jobconf,
    final Path destPath, String presevedAttributes) throws IOException {
  if (presevedAttributes == null) {
    return;
  }
  EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes);
  if (!preseved.contains(FileAttribute.USER)
      && !preseved.contains(FileAttribute.GROUP)
      && !preseved.contains(FileAttribute.PERMISSION)) {
    return;
  }

  FileSystem dstfs = destPath.getFileSystem(conf);
  Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
  try (SequenceFile.Reader in =
      new SequenceFile.Reader(jobconf, Reader.file(dstdirlist))) {
    Text dsttext = new Text();
    FilePair pair = new FilePair(); 
    for(; in.next(dsttext, pair); ) {
      Path absdst = new Path(destPath, pair.output);
      updateDestStatus(pair.input, dstfs.getFileStatus(absdst),
          preseved, dstfs);
    }
  }
}
 
Example #12
Source File: TestSequenceFileSerialization.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void testJavaSerialization() throws Exception {
  Path file = new Path(System.getProperty("test.build.data",".") +
      "/testseqser.seq");
  
  fs.delete(file, true);
  Writer writer = SequenceFile.createWriter(fs, conf, file, Long.class,
      String.class);
  
  writer.append(1L, "one");
  writer.append(2L, "two");
  
  writer.close();
  
  Reader reader = new Reader(fs, file, conf);
  assertEquals(1L, reader.next((Object) null));
  assertEquals("one", reader.getCurrentValue((Object) null));
  assertEquals(2L, reader.next((Object) null));
  assertEquals("two", reader.getCurrentValue((Object) null));
  assertNull(reader.next((Object) null));
  reader.close();
  
}
 
Example #13
Source File: HiveToBaseCuboidMapperPerformanceTest.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Ignore("convenient trial tool for dev")
@Test
public void test() throws IOException, InterruptedException {
    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    HiveToBaseCuboidMapper mapper = new HiveToBaseCuboidMapper();
    Context context = MockupMapContext.create(hconf, metadataUrl, cubeName, null);

    mapper.doSetup(context);

    Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath));
    Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
    Text value = new Text();

    while (reader.next(key, value)) {
        mapper.map(key, value, context);
    }

    reader.close();
}
 
Example #14
Source File: DistCpV1.java    From hadoop with Apache License 2.0 6 votes vote down vote up
static private void finalize(Configuration conf, JobConf jobconf,
    final Path destPath, String presevedAttributes) throws IOException {
  if (presevedAttributes == null) {
    return;
  }
  EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes);
  if (!preseved.contains(FileAttribute.USER)
      && !preseved.contains(FileAttribute.GROUP)
      && !preseved.contains(FileAttribute.PERMISSION)) {
    return;
  }

  FileSystem dstfs = destPath.getFileSystem(conf);
  Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
  try (SequenceFile.Reader in =
      new SequenceFile.Reader(jobconf, Reader.file(dstdirlist))) {
    Text dsttext = new Text();
    FilePair pair = new FilePair(); 
    for(; in.next(dsttext, pair); ) {
      Path absdst = new Path(destPath, pair.output);
      updateDestStatus(pair.input, dstfs.getFileStatus(absdst),
          preseved, dstfs);
    }
  }
}
 
Example #15
Source File: CubeStatsReader.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public CubeStatsResult(Path path, int precision) throws IOException {
    Configuration hadoopConf = HadoopUtil.getCurrentConfiguration();
    Option seqInput = SequenceFile.Reader.file(path);
    try (Reader reader = new SequenceFile.Reader(hadoopConf, seqInput)) {
        LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf);
        BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf);
        while (reader.next(key, value)) {
            if (key.get() == 0L) {
                percentage = Bytes.toInt(value.getBytes());
            } else if (key.get() == -1) {
                mapperOverlapRatio = Bytes.toDouble(value.getBytes());
            } else if (key.get() == -2) {
                mapperNumber = Bytes.toInt(value.getBytes());
            } else if (key.get() == -3) {
                sourceRecordCount = Bytes.toLong(value.getBytes());
            } else if (key.get() > 0) {
                HLLCounter hll = new HLLCounter(precision);
                ByteArray byteArray = new ByteArray(value.getBytes());
                hll.readRegisters(byteArray.asBuffer());
                counterMap.put(key.get(), hll);
            }
        }
    }
}
 
Example #16
Source File: ValueReader.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public Set<FlowFile> readSequenceFile(final Path file, Configuration configuration, FileSystem fileSystem) throws IOException {

    Set<FlowFile> flowFiles = new HashSet<>();
    final SequenceFile.Reader reader = new SequenceFile.Reader(configuration, Reader.file(fileSystem.makeQualified(file)));
    final String inputfileName = file.getName() + "." + System.nanoTime() + ".";
    int counter = 0;
    LOG.debug("Reading from sequence file {}", new Object[]{file});
    final OutputStreamWritableCallback writer = new OutputStreamWritableCallback(reader);
    Text key = new Text();
    try {
        while (reader.next(key)) {
            String fileName = key.toString();
            // the key may be a file name, and may not
            if (LOOKS_LIKE_FILENAME.matcher(fileName).matches()) {
                if (fileName.contains(File.separator)) {
                    fileName = StringUtils.substringAfterLast(fileName, File.separator);
                }
                fileName = fileName + "." + System.nanoTime();
            } else {
                fileName = inputfileName + ++counter;
            }
            FlowFile flowFile = session.create();
            flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), fileName);
            try {
                flowFile = session.write(flowFile, writer);
                flowFiles.add(flowFile);
            } catch (ProcessException e) {
                LOG.error("Could not write to flowfile {}", new Object[]{flowFile}, e);
                session.remove(flowFile);
            }
            key.clear();
        }
    } finally {
        IOUtils.closeQuietly(reader);
    }

    return flowFiles;
}
 
Example #17
Source File: UpdateClusterJob.java    From recsys-offline with Apache License 2.0 5 votes vote down vote up
public void map(ImmutableBytesWritable row, Result result,
		Context context) throws IOException, InterruptedException {
	String yrstr = Bytes.toString(result.getValue(
			Constants.hbase_column_family.getBytes(),
			Constants.hbase_column_yearrate.getBytes()));
	String rltstr = Bytes.toString(result.getValue(
			Constants.hbase_column_family.getBytes(),
			Constants.hbase_column_repaylimittime.getBytes()));

	List<String> list = HdfsHelper
			.ls(Constants.hdfs_kmeans_point_output_path);
	String clusterid = null;
	for (String file : list) {
		if (file.contains("_")) {
			continue;
		}
		SequenceFile.Reader reader = new SequenceFile.Reader(
				HBaseContext.config, Reader.file(new Path(file)));
		IntWritable clusterId = new IntWritable();
		WeightedPropertyVectorWritable value = new WeightedPropertyVectorWritable();
		while (reader.next(clusterId, value)) {
			String yearrate = String.valueOf(value.getVector().get(0));
			String repaylimittime = String.valueOf(value.getVector()
					.get(1));
			if (yrstr.equals(yearrate) && rltstr.equals(repaylimittime)) {
				clusterid = clusterId.toString();
				break;
			}
		}

		reader.close();
	}

	key.set(row.get());
	value.set(clusterid);
	clusterid = null;
	context.write(key, value);
}
 
Example #18
Source File: ValueReader.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Override
public Set<FlowFile> readSequenceFile(final Path file, Configuration configuration, FileSystem fileSystem) throws IOException {

    Set<FlowFile> flowFiles = new HashSet<>();
    final SequenceFile.Reader reader = new SequenceFile.Reader(configuration, Reader.file(fileSystem.makeQualified(file)));
    final String inputfileName = file.getName() + "." + System.nanoTime() + ".";
    int counter = 0;
    LOG.debug("Reading from sequence file {}", new Object[]{file});
    final OutputStreamWritableCallback writer = new OutputStreamWritableCallback(reader);
    Text key = new Text();
    try {
        while (reader.next(key)) {
            String fileName = key.toString();
            // the key may be a file name, and may not
            if (LOOKS_LIKE_FILENAME.matcher(fileName).matches()) {
                if (fileName.contains(File.separator)) {
                    fileName = StringUtils.substringAfterLast(fileName, File.separator);
                }
                fileName = fileName + "." + System.nanoTime();
            } else {
                fileName = inputfileName + ++counter;
            }
            FlowFile flowFile = session.create();
            flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), fileName);
            try {
                flowFile = session.write(flowFile, writer);
                flowFiles.add(flowFile);
            } catch (ProcessException e) {
                LOG.error("Could not write to flowfile {}", new Object[]{flowFile}, e);
                session.remove(flowFile);
            }
            key.clear();
        }
    } finally {
        IOUtils.closeQuietly(reader);
    }

    return flowFiles;
}
 
Example #19
Source File: GeoWaveNNIT.java    From geowave with Apache License 2.0 5 votes vote down vote up
private int readFile() throws IllegalArgumentException, IOException {
  int count = 0;
  final FileSystem fs = FileSystem.get(MapReduceTestUtils.getConfiguration());
  final FileStatus[] fss =
      fs.listStatus(
          new Path(
              TestUtils.TEMP_DIR
                  + File.separator
                  + MapReduceTestEnvironment.HDFS_BASE_DIRECTORY
                  + "/t1/pairs"));
  for (final FileStatus ifs : fss) {
    if (ifs.isFile() && ifs.getPath().toString().matches(".*part-r-0000[0-9]")) {
      try (SequenceFile.Reader reader =
          new SequenceFile.Reader(
              MapReduceTestUtils.getConfiguration(),
              Reader.file(ifs.getPath()))) {

        final Text key = new Text();
        final Text val = new Text();

        while (reader.next(key, val)) {
          count++;
        }
      }
    }
  }
  return count;
}
 
Example #20
Source File: MergeSortRowIdMatcher.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private MyReader openReader(SegmentKey segmentKey) throws IOException {
  Path file = getCacheFilePath(segmentKey);
  FileSystem fileSystem = _cachePath.getFileSystem(_configuration);
  if (!fileSystem.exists(file)) {
    createCacheFile(file, segmentKey);
  }
  Reader reader = new SequenceFile.Reader(_configuration, SequenceFile.Reader.file(file));
  return new MyReader(reader);
}
 
Example #21
Source File: CopySeq.java    From Kylin with Apache License 2.0 5 votes vote down vote up
public static void copyTo64MB(String src, String dst) throws IOException {
    Configuration hconf = new Configuration();
    Path srcPath = new Path(src);
    Path dstPath = new Path(dst);

    FileSystem fs = FileSystem.get(hconf);
    long srcSize = fs.getFileStatus(srcPath).getLen();
    int copyTimes = (int) (67108864 / srcSize); // 64 MB
    System.out.println("Copy " + copyTimes + " times");

    Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath));
    Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
    Text value = new Text();

    Writer writer = SequenceFile.createWriter(hconf, Writer.file(dstPath), Writer.keyClass(key.getClass()), Writer.valueClass(Text.class), Writer.compression(CompressionType.BLOCK, getLZOCodec(hconf)));

    int count = 0;
    while (reader.next(key, value)) {
        for (int i = 0; i < copyTimes; i++) {
            writer.append(key, value);
            count++;
        }
    }

    System.out.println("Len: " + writer.getLength());
    System.out.println("Rows: " + count);

    reader.close();
    writer.close();
}
 
Example #22
Source File: WritableValueInputFormatTest.java    From kangaroo with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetSplits() throws Exception {
    final WritableValueInputFormat<Text> inputFormat = spy(new WritableValueInputFormat<Text>());

    final Reader reader = mock(Reader.class);
    doReturn(reader).when(inputFormat).getReader(conf, path);
    doReturn(fs).when(inputFormat).getFileSystem(conf);
    when(fs.makeQualified(any(Path.class))).thenReturn(path);
    when(fs.getFileStatus(path)).thenReturn(new FileStatus(100, false, 1, 10, 1, path));

    conf.set(WritableValueInputFormat.INPUT_FILE_LOCATION_CONF, "/tmp/input_file");
    conf.setInt(WritableValueInputFormat.INPUTS_PER_SPLIT_CONF, 2);
    conf.setClass(WritableValueInputFormat.VALUE_TYPE_CONF, Text.class, Writable.class);

    final JobContext jobCtx = mock(JobContext.class);
    when(jobCtx.getConfiguration()).thenReturn(conf);

    // 3 inputs
    when(reader.next(any(NullWritable.class), any(Text.class))).thenReturn(true).thenReturn(true).thenReturn(true)
            .thenReturn(false);

    // getPosition gets called after each for loop invocation, so only need two of these
    when(reader.getPosition()).thenReturn(30l).thenReturn(60l);

    final List<InputSplit> result = inputFormat.getSplits(jobCtx);

    assertEquals(2, result.size());
    final FileSplit fileSplit1 = (FileSplit) result.get(0);
    assertEquals(0, fileSplit1.getStart());
    assertEquals(30, fileSplit1.getLength());
    assertEquals(path, fileSplit1.getPath());

    final FileSplit fileSplit2 = (FileSplit) result.get(1);
    assertEquals(30, fileSplit2.getStart());
    assertEquals(70, fileSplit2.getLength());
    assertEquals(path, fileSplit2.getPath());
}
 
Example #23
Source File: SequenceFileUtil.java    From alchemy with Apache License 2.0 5 votes vote down vote up
public static void readSequenceFile(String path) throws Exception{
	Reader.Option filePath = Reader.file(new Path(path));//指定文件路径
	Reader sqReader = new Reader(configuration,filePath);//构造read而对象
	
	Writable key = (Writable)ReflectionUtils.newInstance(sqReader.getKeyClass(), configuration);
	Writable value = (Writable)ReflectionUtils.newInstance(sqReader.getValueClass(), configuration);
	
	while(sqReader.next(key, value)){
		logger.info("key:"+key+",value:"+value);
	}
}
 
Example #24
Source File: CircusTrainCopyListingTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void typical() throws IOException {
  File input = temp.newFolder("input");
  File inputSub2 = new File(input, "sub1/sub2");
  inputSub2.mkdirs();
  Files.asCharSink(new File(inputSub2, "data"), UTF_8).write("test1");

  File listFile = temp.newFile("listFile");
  Path pathToListFile = new Path(listFile.toURI());

  List<Path> sourceDataLocations = new ArrayList<>();
  sourceDataLocations.add(new Path(inputSub2.toURI()));
  DistCpOptions options = new DistCpOptions(sourceDataLocations, new Path("dummy"));

  CircusTrainCopyListing.setRootPath(conf, new Path(input.toURI()));
  CircusTrainCopyListing copyListing = new CircusTrainCopyListing(conf, null);
  copyListing.doBuildListing(pathToListFile, options);

  try (Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(pathToListFile))) {
    Text key = new Text();
    CopyListingFileStatus value = new CopyListingFileStatus();

    assertTrue(reader.next(key, value));
    assertThat(key.toString(), is("/sub1/sub2"));
    assertThat(value.getPath().toUri().toString(), endsWith("/input/sub1/sub2"));

    assertTrue(reader.next(key, value));
    assertThat(key.toString(), is("/sub1/sub2/data"));
    assertThat(value.getPath().toUri().toString(), endsWith("/input/sub1/sub2/data"));

    assertFalse(reader.next(key, value));

  }
}
 
Example #25
Source File: SeqFileReader.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
public static Reader getSeqFileReader(String filename) throws IOException {
  // read from local filesystem
  Configuration conf = new Configuration();
  if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
    conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
  }
  FileSystem fs = FileSystem.get(conf);
  LOG.info("Opening SequenceFile " + filename);
  return new SequenceFile.Reader(fs, new Path(filename), conf);
}
 
Example #26
Source File: SeqFileReader.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
public static Object getFirstValue(String filename) throws IOException {
  Reader r = null;
  try {
    // read from local filesystem
    Configuration conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
      conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }
    FileSystem fs = FileSystem.get(conf);
    r = new SequenceFile.Reader(fs, new Path(filename), conf);
    Object key = ReflectionUtils.newInstance(r.getKeyClass(), conf);
    Object val = ReflectionUtils.newInstance(r.getValueClass(), conf);
    LOG.info("Reading value of type " + r.getValueClassName()
        + " from SequenceFile " + filename);
    r.next(key);
    r.getCurrentValue(val);
    LOG.info("Value as string: " + val.toString());
    return val;
  } finally {
    if (null != r) {
      try {
        r.close();
      } catch (IOException ioe) {
        LOG.warn("IOException during close: " + ioe.toString());
      }
    }
  }
}
 
Example #27
Source File: ProtoMessageReader.java    From tez with Apache License 2.0 5 votes vote down vote up
public ProtoMessageReader(Configuration conf, Path filePath, Parser<T> parser) throws IOException {
  this.filePath = filePath;
  // The writer does not flush the length during hflush. Using length options lets us read
  // past length in the FileStatus but it will throw EOFException during a read instead
  // of returning null.
  this.reader = new Reader(conf, Reader.file(filePath), Reader.length(Long.MAX_VALUE));
  this.writable = new ProtoMessageWritable<>(parser);
}
 
Example #28
Source File: MergeSortRowIdMatcher.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
public MyReader(Reader reader) {
  _reader = reader;
}
 
Example #29
Source File: ValueReader.java    From nifi with Apache License 2.0 4 votes vote down vote up
private OutputStreamWritableCallback(Reader reader) {
    this.reader = reader;
}
 
Example #30
Source File: KeyValueReader.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public Set<FlowFile> readSequenceFile(Path file, Configuration configuration, FileSystem fileSystem) throws IOException {

    final SequenceFile.Reader reader;

    Set<FlowFile> flowFiles = new HashSet<>();
    reader = new SequenceFile.Reader(configuration, Reader.file(fileSystem.makeQualified(file)));
    final Text key = new Text();
    final KeyValueWriterCallback callback = new KeyValueWriterCallback(reader);
    final String inputfileName = file.getName() + "." + System.nanoTime() + ".";
    int counter = 0;
    LOG.debug("Read from SequenceFile: {} ", new Object[]{file});
    try {
        while (reader.next(key)) {
            String fileName = key.toString();
            // the key may be a file name, and may not
            if (LOOKS_LIKE_FILENAME.matcher(fileName).matches()) {
                if (fileName.contains(File.separator)) {
                    fileName = StringUtils.substringAfterLast(fileName, File.separator);
                }
                fileName = fileName + "." + System.nanoTime();
            } else {
                fileName = inputfileName + ++counter;
            }

            FlowFile flowFile = session.create();
            flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), fileName);
            callback.key = key;
            try {
                flowFile = session.write(flowFile, callback);
                flowFiles.add(flowFile);
            } catch (ProcessException e) {
                LOG.error("Could not write to flowfile {}", new Object[]{flowFile}, e);
                session.remove(flowFile);
            }
            key.clear();
        }
    } finally {
        IOUtils.closeQuietly(reader);
    }

    return flowFiles;
}