org.apache.solr.store.hdfs.HdfsDirectory Java Exaples

Source File: HdfsDirectoryFactory.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void move(Directory fromDir, Directory toDir, String fileName, IOContext ioContext) throws IOException {
  
  Directory baseFromDir = getBaseDir(fromDir);
  Directory baseToDir = getBaseDir(toDir);
  
  if (baseFromDir instanceof HdfsDirectory && baseToDir instanceof HdfsDirectory) {
    Path dir1 = ((HdfsDirectory) baseFromDir).getHdfsDirPath();
    Path dir2 = ((HdfsDirectory) baseToDir).getHdfsDirPath();
    Path file1 = new Path(dir1, fileName);
    Path file2 = new Path(dir2, fileName);
    FileContext fileContext = FileContext.getFileContext(getConf(dir1));
    fileContext.rename(file1, file2);
    return;
  }

  super.move(fromDir, toDir, fileName, ioContext);
}

Source File: HdfsBackupRepository.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void copyFileFrom(Directory sourceDir, String fileName, URI dest) throws IOException {
  try (HdfsDirectory dir = new HdfsDirectory(new Path(dest), NoLockFactory.INSTANCE,
      hdfsConfig, copyBufferSize)) {
    dir.copyFrom(sourceDir, fileName, fileName, DirectoryFactory.IOCONTEXT_NO_CACHE);
  }
}

Source File: HdfsBackupRepository.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void copyFileTo(URI sourceRepo, String fileName, Directory dest) throws IOException {
  try (HdfsDirectory dir = new HdfsDirectory(new Path(sourceRepo), NoLockFactory.INSTANCE,
      hdfsConfig, copyBufferSize)) {
    dest.copyFrom(dir, fileName, fileName, DirectoryFactory.IOCONTEXT_NO_CACHE);
  }
}

Source File: BlockDirectory.java From lucene-solr with Apache License 2.0

5 votes

private long getFileModified(String name) throws IOException {
  if (in instanceof FSDirectory) {
    File directory = ((FSDirectory) in).getDirectory().toFile();
    File file = new File(directory, name);
    if (!file.exists()) {
      throw new FileNotFoundException("File [" + name + "] not found");
    }
    return file.lastModified();
  } else if (in instanceof HdfsDirectory) {
    return ((HdfsDirectory) in).fileModified(name);
  } else {
    throw new UnsupportedOperationException();
  }
}

Source File: CheckHdfsIndexTest.java From lucene-solr with Apache License 2.0

5 votes

@Override
@Before
public void setUp() throws Exception {
  super.setUp();

  Configuration conf = HdfsTestUtil.getClientConfiguration(dfsCluster);
  directory = new HdfsDirectory(path, conf);
}

Source File: HdfsBackupRepositoryTest.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testCopyBufferDefaultSize() throws IOException {
  try (HdfsBackupRepository hdfsBackupRepository = new HdfsBackupRepository()) {
    NamedList<Object> namedList = new SimpleOrderedMap<>();
    namedList.add(HdfsDirectoryFactory.HDFS_HOME, "hdfs://localhost");
    hdfsBackupRepository.init(namedList);
    assertEquals(hdfsBackupRepository.copyBufferSize, HdfsDirectory.DEFAULT_BUFFER_SIZE);
  }
}

Source File: HdfsBackupRepository.java From lucene-solr with Apache License 2.0

4 votes

@Override
public IndexInput openInput(URI dirPath, String fileName, IOContext ctx) throws IOException {
  Path p = new Path(new Path(dirPath), fileName);
  return new HdfsIndexInput(fileName, this.fileSystem, p, HdfsDirectory.DEFAULT_BUFFER_SIZE);
}

Source File: TreeMergeOutputFormat.java From examples with Apache License 2.0

4 votes

@Override
    public void close(TaskAttemptContext context) throws IOException {
      LOG.debug("Task " + context.getTaskAttemptID() + " merging into dstDir: " + workDir + ", srcDirs: " + shards);
      writeShardNumberFile(context);      
      heartBeater.needHeartBeat();
      try {
        Directory mergedIndex = new HdfsDirectory(workDir, context.getConfiguration());
        
        // TODO: shouldn't we pull the Version from the solrconfig.xml?
        IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, null)
            .setOpenMode(OpenMode.CREATE).setUseCompoundFile(false)
            //.setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml?
            //.setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml?
            ;
          
        if (LOG.isDebugEnabled()) {
          writerConfig.setInfoStream(System.out);
        }
//        writerConfig.setRAMBufferSizeMB(100); // improve performance
//        writerConfig.setMaxThreadStates(1);
        
        // disable compound file to improve performance
        // also see http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html
        // also see defaults in SolrIndexConfig
        MergePolicy mergePolicy = writerConfig.getMergePolicy();
        LOG.debug("mergePolicy was: {}", mergePolicy);
        if (mergePolicy instanceof TieredMergePolicy) {
          ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0);
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000);          
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000);       
//          ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000);
        } else if (mergePolicy instanceof LogMergePolicy) {
          ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0);
        }
        LOG.info("Using mergePolicy: {}", mergePolicy);
        
        IndexWriter writer = new IndexWriter(mergedIndex, writerConfig);
        
        Directory[] indexes = new Directory[shards.size()];
        for (int i = 0; i < shards.size(); i++) {
          indexes[i] = new HdfsDirectory(shards.get(i), context.getConfiguration());
        }

        context.setStatus("Logically merging " + shards.size() + " shards into one shard");
        LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir);
        long start = System.nanoTime();
        
        writer.addIndexes(indexes); 
        // TODO: avoid intermediate copying of files into dst directory; rename the files into the dir instead (cp -> rename) 
        // This can improve performance and turns this phase into a true "logical" merge, completing in constant time.
        // See https://issues.apache.org/jira/browse/LUCENE-4746
        
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        float secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Logical merge took {} secs", secs);        
        int maxSegments = context.getConfiguration().getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE);
        context.setStatus("Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments");
        LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments);
        start = System.nanoTime();
        if (maxSegments < Integer.MAX_VALUE) {
          writer.forceMerge(maxSegments); 
          // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data 
          // see http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html
        }
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {} secs", maxSegments, secs);
        
        start = System.nanoTime();
        LOG.info("Optimizing Solr: Closing index writer");
        writer.close();
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: Done closing index writer in {} secs", secs);
        context.setStatus("Done");
      } finally {
        heartBeater.cancelHeartBeat();
        heartBeater.close();
      }
    }

org.apache.solr.store.hdfs.HdfsDirectory Java Examples