Java Code Examples for org.apache.nifi.provenance.serialization.RecordReaders#newRecordReader()

The following examples show how to use org.apache.nifi.provenance.serialization.RecordReaders#newRecordReader() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IndexConfiguration.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
private Long getFirstEntryTime(final File provenanceLogFile) {
    if (provenanceLogFile == null) {
        return null;
    }

    try (final RecordReader reader = RecordReaders.newRecordReader(provenanceLogFile, null, Integer.MAX_VALUE)) {
        final StandardProvenanceEventRecord firstRecord = reader.nextRecord();
        if (firstRecord == null) {
            return provenanceLogFile.lastModified();
        }
        return firstRecord.getEventTime();
    } catch (final FileNotFoundException | EOFException fnf) {
        return null; // file no longer exists or there's no record in this file
    } catch (final IOException ioe) {
        logger.warn("Failed to read first entry in file {} due to {}", provenanceLogFile, ioe.toString());
        logger.warn("", ioe);
        return null;
    }
}
 
Example 2
Source File: PersistentProvenanceRepository.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
private long determineFirstEventTimestamp() {
    // Get the timestamp of the first event in the first Provenance Event Log File and the ID of the last event
    // in the event file.
    final List<File> logFiles = getSortedLogFiles();
    if (logFiles.isEmpty()) {
        return 0L;
    }

    for (final File logFile : logFiles) {
        try (final RecordReader reader = RecordReaders.newRecordReader(logFile, null, Integer.MAX_VALUE)) {
            final StandardProvenanceEventRecord event = reader.nextRecord();
            if (event != null) {
                return event.getEventTime();
            }
        } catch (final IOException ioe) {
            logger.warn("Failed to obtain timestamp of first event from Provenance Event Log File {}", logFile);
        }
    }

    return 0L;
}
 
Example 3
Source File: DumpEventFile.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
public static void main(final String[] args) throws IOException {
    if (args.length != 1) {
        printUsage();
        return;
    }

    final File file = new File(args[0]);
    if (!file.exists()) {
        System.out.println("Cannot find file " + file.getAbsolutePath());
        return;
    }

    try (final RecordReader reader = RecordReaders.newRecordReader(file, Collections.emptyList(), 65535)) {
        StandardProvenanceEventRecord event;
        int index = 0;
        while ((event = reader.nextRecord()) != null) {
            final long byteOffset = reader.getBytesConsumed();
            final String string = stringify(event, index++, byteOffset);
            System.out.println(string);
        }
    }
}
 
Example 4
Source File: TestPersistentProvenanceRepository.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
private long checkJournalRecords(final File storageDir, final Boolean exact) throws IOException {
    File[] storagefiles = storageDir.listFiles();
    long counter = 0;
    assertNotNull(storagefiles);
    for (final File file : storagefiles) {
        if (file.isFile()) {
            try (RecordReader reader = RecordReaders.newRecordReader(file, null, 2048)) {
                ProvenanceEventRecord r;
                ProvenanceEventRecord last = null;
                while ((r = reader.nextRecord()) != null) {
                    if (exact) {
                        assertTrue(counter++ == r.getEventId());
                    } else {
                        assertTrue(counter++ <= r.getEventId());
                    }
                }
            }
        }
    }
    return counter;
}
 
Example 5
Source File: TestSelectiveRecordReaderEventIterator.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testFileNotFound() throws IOException {
    final File file1 = new File("1.prov");

    // Filter out the first file.
    final List<File> files = new ArrayList<>();
    files.add(file1);

    List<Long> eventIds = new ArrayList<>();
    eventIds.add(1L);
    eventIds.add(5L);

    final RecordReaderFactory readerFactory = (file, logs, maxChars) -> {
        return RecordReaders.newRecordReader(file, logs, maxChars);
    };

    final SelectiveRecordReaderEventIterator itr = new SelectiveRecordReaderEventIterator(files, readerFactory, eventIds, 65536);
    final Optional<ProvenanceEventRecord> firstRecordOption = itr.nextEvent();
    assertFalse(firstRecordOption.isPresent());
}
 
Example 6
Source File: MiNiFiPersistentProvenanceRepositoryTest.java    From nifi-minifi with Apache License 2.0 6 votes vote down vote up
private long checkJournalRecords(final File storageDir, final Boolean exact) throws IOException {
    File[] storagefiles = storageDir.listFiles();
    long counter = 0;
    assertNotNull(storagefiles);
    for (final File file : storagefiles) {
        if (file.isFile()) {
            try (RecordReader reader = RecordReaders.newRecordReader(file, null, 2048)) {
                ProvenanceEventRecord r;
                ProvenanceEventRecord last = null;
                while ((r = reader.nextRecord()) != null) {
                    if (exact) {
                        assertTrue(counter++ == r.getEventId());
                    } else {
                        assertTrue(counter++ <= r.getEventId());
                    }
                }
            }
        }
    }
    return counter;
}
 
Example 7
Source File: ITestPersistentProvenanceRepository.java    From nifi with Apache License 2.0 6 votes vote down vote up
private long checkJournalRecords(final File storageDir, final Boolean exact) throws IOException {
    File[] storagefiles = storageDir.listFiles();
    long counter = 0;
    assertNotNull(storagefiles);
    for (final File file : storagefiles) {
        if (file.isFile()) {
            try (RecordReader reader = RecordReaders.newRecordReader(file, null, 2048)) {
                ProvenanceEventRecord r;
                ProvenanceEventRecord last = null;
                while ((r = reader.nextRecord()) != null) {
                    if (exact) {
                        assertTrue(counter++ == r.getEventId());
                    } else {
                        assertTrue(counter++ <= r.getEventId());
                    }
                }
            }
        }
    }
    return counter;
}
 
Example 8
Source File: DumpEventFile.java    From nifi with Apache License 2.0 6 votes vote down vote up
public static void main(final String[] args) throws IOException {
    if (args.length != 1) {
        printUsage();
        return;
    }

    final File file = new File(args[0]);
    if (!file.exists()) {
        System.out.println("Cannot find file " + file.getAbsolutePath());
        return;
    }

    try (final RecordReader reader = RecordReaders.newRecordReader(file, Collections.emptyList(), 65535)) {
        StandardProvenanceEventRecord event;
        int index = 0;
        while ((event = reader.nextRecord()) != null) {
            final long byteOffset = reader.getBytesConsumed();
            final String string = stringify(event, index++, byteOffset);
            System.out.println(string);
        }
    }
}
 
Example 9
Source File: UpdateMinimumEventId.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public File execute(final File expiredFile) throws IOException {
    try (final RecordReader reader = RecordReaders.newRecordReader(expiredFile, null, Integer.MAX_VALUE)) {
        final long maxEventId = reader.getMaxEventId();
        indexConfig.setMinIdIndexed(maxEventId);

        logger.info("Updated Minimum Event ID for Provenance Event Repository - Minimum Event ID now {}", maxEventId);
    } catch (final IOException ioe) {
        logger.warn("Failed to obtain max ID present in journal file {}", expiredFile.getAbsolutePath());
    }

    return expiredFile;
}
 
Example 10
Source File: UpdateMinimumEventId.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Override
public File execute(final File expiredFile) throws IOException {
    try (final RecordReader reader = RecordReaders.newRecordReader(expiredFile, null, Integer.MAX_VALUE)) {
        final long maxEventId = reader.getMaxEventId();
        indexConfig.setMinIdIndexed(maxEventId);

        logger.info("Updated Minimum Event ID for Provenance Event Repository - Minimum Event ID now {}", maxEventId);
    } catch (final IOException ioe) {
        logger.warn("Failed to obtain max ID present in journal file {}", expiredFile.getAbsolutePath());
    }

    return expiredFile;
}
 
Example 11
Source File: PersistentProvenanceRepository.java    From nifi with Apache License 2.0 4 votes vote down vote up
private void purgeExpiredIndexes() throws IOException {
    // Now that we have potentially removed expired Provenance Event Log Files, we can look at
    // whether or not we can delete any of the indexes. An index can be deleted if all of the
    // data that is associated with that index has already been deleted. In order to test this,
    // we will get the timestamp of the earliest event and then compare that to the latest timestamp
    // that would be indexed by the earliest index. If the event occurred after the timestamp of
    // the latest index, then we can just delete the entire index all together.

    // find all of the index directories
    final List<File> indexDirs = getAllIndexDirectories();
    if (indexDirs.size() < 2) {
        this.firstEventTimestamp = determineFirstEventTimestamp();
        return;
    }

    // Indexes are named "index-XXX" where the XXX is the timestamp of the earliest event that
    // could be in the index. Once we have finished with one index, we move on to another index,
    // but we don't move on until we are finished with the previous index.
    // Therefore, an efficient way to determine the latest timestamp of one index is to look at the
    // timestamp of the next index (these could potentially overlap for one millisecond). This is
    // efficient because we can determine the earliest timestamp of an index simply by looking at
    // the name of the Index's directory.
    final long latestTimestampOfFirstIndex = getIndexTimestamp(indexDirs.get(1));

    // Get the timestamp of the first event in the first Provenance Event Log File and the ID of the last event
    // in the event file.
    final List<File> logFiles = getSortedLogFiles();
    if (logFiles.isEmpty()) {
        this.firstEventTimestamp = System.currentTimeMillis();
        return;
    }

    final File firstLogFile = logFiles.get(0);
    long earliestEventTime = System.currentTimeMillis();
    long maxEventId = -1L;
    try (final RecordReader reader = RecordReaders.newRecordReader(firstLogFile, null, Integer.MAX_VALUE)) {
        final StandardProvenanceEventRecord event = reader.nextRecord();
        earliestEventTime = event.getEventTime();
        maxEventId = reader.getMaxEventId();
    } catch (final IOException ioe) {
        logger.warn("Unable to determine the maximum ID for Provenance Event Log File {}; values reported for the number of "
                + "events in the Provenance Repository may be inaccurate.", firstLogFile);
    }

    // check if we can delete the index safely.
    if (latestTimestampOfFirstIndex <= earliestEventTime) {
        // we can safely delete the first index because the latest event in the index is an event
        // that has already been expired from the repository.
        final File indexingDirectory = indexDirs.get(0);
        getIndexManager().removeIndex(indexingDirectory);
        indexConfig.removeIndexDirectory(indexingDirectory);
        deleteDirectory(indexingDirectory);

        if (maxEventId > -1L) {
            indexConfig.setMinIdIndexed(maxEventId + 1L);
        }
    }

    this.firstEventTimestamp = earliestEventTime;
}
 
Example 12
Source File: PersistentProvenanceRepository.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public List<ProvenanceEventRecord> getEvents(final long firstRecordId, final int maxRecords, final NiFiUser user) throws IOException {
    final List<ProvenanceEventRecord> records = new ArrayList<>(maxRecords);

    final List<Path> paths = getPathsForId(firstRecordId);
    if (paths == null || paths.isEmpty()) {
        return records;
    }

    for (final Path path : paths) {
        try (RecordReader reader = RecordReaders.newRecordReader(path.toFile(), getAllLogFiles(), maxAttributeChars)) {
            // if this is the first record, try to find out the block index and jump directly to
            // the block index. This avoids having to read through a lot of data that we don't care about
            // just to get to the first record that we want.
            if (records.isEmpty()) {
                final TocReader tocReader = reader.getTocReader();
                if (tocReader != null) {
                    final Integer blockIndex = tocReader.getBlockIndexForEventId(firstRecordId);
                    if (blockIndex != null) {
                        reader.skipToBlock(blockIndex);
                    }
                }
            }

            StandardProvenanceEventRecord record;
            while (records.size() < maxRecords && (record = reader.nextRecord()) != null) {
                if (record.getEventId() >= firstRecordId && isAuthorized(record, user)) {
                    records.add(record);
                }
            }
        } catch (final EOFException | FileNotFoundException fnfe) {
            // assume file aged off (or there's no data in file, in case of EOFException, which indicates that data was cached
            // in operating system and entire O/S crashed and always.sync was not turned on.)
        } catch (final IOException ioe) {
            logger.error("Failed to read Provenance Event File {} due to {}", path.toFile(), ioe.toString());
            logger.error("", ioe);
            eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to read Provenance Event File " + path.toFile() + " due to " + ioe.toString());
        }

        if (records.size() >= maxRecords) {
            break;
        }
    }

    if (logger.isDebugEnabled()) {
        logger.debug("Retrieving up to {} records starting at Event ID {}; returning {} events", maxRecords, firstRecordId, records.size());
    }

    return records;
}
 
Example 13
Source File: ITestPersistentProvenanceRepository.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void testMergeJournals() throws IOException, InterruptedException {
    assumeFalse(isWindowsEnvironment());
    final RepositoryConfiguration config = createConfiguration();
    config.setMaxEventFileLife(3, TimeUnit.SECONDS);
    repo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS);
    repo.initialize(getEventReporter(), null, null, IdentifierLookup.EMPTY);

    final Map<String, String> attributes = new HashMap<>();

    final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
    builder.setEventTime(System.currentTimeMillis());
    builder.setEventType(ProvenanceEventType.RECEIVE);
    builder.setTransitUri("nifi://unit-test");
    attributes.put("uuid", "12345678-0000-0000-0000-012345678912");
    builder.fromFlowFile(createFlowFile(3L, 3000L, attributes));
    builder.setComponentId("1234");
    builder.setComponentType("dummy processor");

    final ProvenanceEventRecord record = builder.build();

    final ExecutorService exec = Executors.newFixedThreadPool(10);
    for (int i = 0; i < 10000; i++) {
        exec.submit(new Runnable() {
            @Override
            public void run() {
                repo.registerEvent(record);
            }
        });
    }

    repo.waitForRollover();

    final File storageDir = config.getStorageDirectories().values().iterator().next();
    long counter = 0;
    for (final File file : storageDir.listFiles()) {
        if (file.isFile()) {

            try (RecordReader reader = RecordReaders.newRecordReader(file, null, 2048)) {
                ProvenanceEventRecord r = null;

                while ((r = reader.nextRecord()) != null) {
                    assertEquals(counter++, r.getEventId());
                }
            }
        }
    }

    assertEquals(10000, counter);
}
 
Example 14
Source File: DeleteIndexAction.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public File execute(final File expiredFile) throws IOException {
    // count the number of records and determine the max event id that we are deleting.
    final long numDeleted = 0;
    long maxEventId = -1L;
    try (final RecordReader reader = RecordReaders.newRecordReader(expiredFile, repository.getAllLogFiles(), Integer.MAX_VALUE)) {
        maxEventId = reader.getMaxEventId();
    } catch (final IOException ioe) {
        logger.warn("Failed to obtain max ID present in journal file {}", expiredFile.getAbsolutePath());
    }

    // remove the records from the index
    final List<File> indexDirs = indexConfiguration.getIndexDirectories(expiredFile);
    for (final File indexingDirectory : indexDirs) {
        final Term term = new Term(FieldNames.STORAGE_FILENAME, LuceneUtil.substringBefore(expiredFile.getName(), "."));

        boolean deleteDir = false;
        final EventIndexWriter writer = indexManager.borrowIndexWriter(indexingDirectory);
        try {
            final IndexWriter indexWriter = writer.getIndexWriter();
            indexWriter.deleteDocuments(term);
            indexWriter.commit();
            final int docsLeft = indexWriter.getDocStats().numDocs;
            deleteDir = docsLeft <= 0;
            logger.debug("After expiring {}, there are {} docs left for index {}", expiredFile, docsLeft, indexingDirectory);
        } finally {
            indexManager.returnIndexWriter(writer);
        }

        // we've confirmed that all documents have been removed. Delete the index directory.
        if (deleteDir) {
            indexManager.removeIndex(indexingDirectory);
            indexConfiguration.removeIndexDirectory(indexingDirectory);

            deleteDirectory(indexingDirectory);
            logger.info("Removed empty index directory {}", indexingDirectory);
        }
    }

    // Update the minimum index to 1 more than the max Event ID in this file.
    if (maxEventId > -1L) {
        indexConfiguration.setMinIdIndexed(maxEventId + 1L);
    }

    logger.info("Deleted Indices for Expired Provenance File {} from {} index files; {} documents removed", expiredFile, indexDirs.size(), numDeleted);
    return expiredFile;
}
 
Example 15
Source File: MiNiFiPersistentProvenanceRepository.java    From nifi-minifi with Apache License 2.0 4 votes vote down vote up
private void purgeExpiredIndexes() throws IOException {
    // Now that we have potentially removed expired Provenance Event Log Files, we can look at
    // whether or not we can delete any of the indexes. An index can be deleted if all of the
    // data that is associated with that index has already been deleted. In order to test this,
    // we will get the timestamp of the earliest event and then compare that to the latest timestamp
    // that would be indexed by the earliest index. If the event occurred after the timestamp of
    // the latest index, then we can just delete the entire index all together.

    // find all of the index directories
    final List<File> indexDirs = getAllIndexDirectories();
    if (indexDirs.size() < 2) {
        this.firstEventTimestamp = determineFirstEventTimestamp();
        return;
    }

    // Indexes are named "index-XXX" where the XXX is the timestamp of the earliest event that
    // could be in the index. Once we have finished with one index, we move on to another index,
    // but we don't move on until we are finished with the previous index.
    // Therefore, an efficient way to determine the latest timestamp of one index is to look at the
    // timestamp of the next index (these could potentially overlap for one millisecond). This is
    // efficient because we can determine the earliest timestamp of an index simply by looking at
    // the name of the Index's directory.
    final long latestTimestampOfFirstIndex = getIndexTimestamp(indexDirs.get(1));

    // Get the timestamp of the first event in the first Provenance Event Log File and the ID of the last event
    // in the event file.
    final List<File> logFiles = getSortedLogFiles();
    if (logFiles.isEmpty()) {
        this.firstEventTimestamp = System.currentTimeMillis();
        return;
    }

    final File firstLogFile = logFiles.get(0);
    long earliestEventTime = System.currentTimeMillis();
    long maxEventId = -1L;
    try (final RecordReader reader = RecordReaders.newRecordReader(firstLogFile, null, Integer.MAX_VALUE)) {
        final StandardProvenanceEventRecord event = reader.nextRecord();
        earliestEventTime = event.getEventTime();
        maxEventId = reader.getMaxEventId();
    } catch (final IOException ioe) {
        logger.warn("Unable to determine the maximum ID for Provenance Event Log File {}; values reported for the number of "
                + "events in the Provenance Repository may be inaccurate.", firstLogFile);
    }

    // check if we can delete the index safely.
    if (latestTimestampOfFirstIndex <= earliestEventTime) {
        // we can safely delete the first index because the latest event in the index is an event
        // that has already been expired from the repository.
        final File indexingDirectory = indexDirs.get(0);
        getIndexManager().removeIndex(indexingDirectory);
        indexConfig.removeIndexDirectory(indexingDirectory);
        deleteDirectory(indexingDirectory);

        if (maxEventId > -1L) {
            indexConfig.setMinIdIndexed(maxEventId + 1L);
        }
    }

    this.firstEventTimestamp = earliestEventTime;
}
 
Example 16
Source File: DocsReader.java    From nifi with Apache License 2.0 4 votes vote down vote up
public Set<ProvenanceEventRecord> read(final List<Document> docs, final EventAuthorizer authorizer, final Collection<Path> allProvenanceLogFiles,
        final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars) throws IOException {

    if (retrievalCount.get() >= maxResults) {
        return Collections.emptySet();
    }

    final long start = System.nanoTime();
    final Set<ProvenanceEventRecord> matchingRecords = new LinkedHashSet<>();
    final Map<String, List<Document>> byStorageNameDocGroups = LuceneUtil.groupDocsByStorageFileName(docs);

    int eventsReadThisFile = 0;
    int logFileCount = 0;

    for (String storageFileName : byStorageNameDocGroups.keySet()) {
        final File provenanceEventFile = LuceneUtil.getProvenanceLogFile(storageFileName, allProvenanceLogFiles);
        if (provenanceEventFile == null) {
            logger.warn("Could not find Provenance Log File with "
                + "basename {} in the Provenance Repository; assuming "
                + "file has expired and continuing without it", storageFileName);
            continue;
        }

        try (final RecordReader reader = RecordReaders.newRecordReader(provenanceEventFile, allProvenanceLogFiles, maxAttributeChars)) {
            final Iterator<Document> docIter = byStorageNameDocGroups.get(storageFileName).iterator();
            while (docIter.hasNext() && retrievalCount.getAndIncrement() < maxResults) {
                final ProvenanceEventRecord event = getRecord(docIter.next(), reader);
                if (event != null && authorizer.isAuthorized(event)) {
                    matchingRecords.add(event);
                    eventsReadThisFile++;
                }
            }
        } catch (final Exception e) {
            logger.warn("Failed to read Provenance Events. The event file '"
                + provenanceEventFile.getAbsolutePath() + "' may be missing or corrupt.", e);
        }
    }

    logger.debug("Read {} records from previous file", eventsReadThisFile);
    final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
    logger.debug("Took {} ms to read {} events from {} prov log files", millis, matchingRecords.size(),
            logFileCount);

    return matchingRecords;
}
 
Example 17
Source File: DocsReader.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
public Set<ProvenanceEventRecord> read(final List<Document> docs, final EventAuthorizer authorizer, final Collection<Path> allProvenanceLogFiles,
        final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars) throws IOException {

    if (retrievalCount.get() >= maxResults) {
        return Collections.emptySet();
    }

    final long start = System.nanoTime();
    final Set<ProvenanceEventRecord> matchingRecords = new LinkedHashSet<>();
    final Map<String, List<Document>> byStorageNameDocGroups = LuceneUtil.groupDocsByStorageFileName(docs);

    int eventsReadThisFile = 0;
    int logFileCount = 0;

    for (String storageFileName : byStorageNameDocGroups.keySet()) {
        final File provenanceEventFile = LuceneUtil.getProvenanceLogFile(storageFileName, allProvenanceLogFiles);
        if (provenanceEventFile == null) {
            logger.warn("Could not find Provenance Log File with "
                + "basename {} in the Provenance Repository; assuming "
                + "file has expired and continuing without it", storageFileName);
            continue;
        }

        try (final RecordReader reader = RecordReaders.newRecordReader(provenanceEventFile, allProvenanceLogFiles, maxAttributeChars)) {
            final Iterator<Document> docIter = byStorageNameDocGroups.get(storageFileName).iterator();
            while (docIter.hasNext() && retrievalCount.getAndIncrement() < maxResults) {
                final ProvenanceEventRecord event = getRecord(docIter.next(), reader);
                if (event != null && authorizer.isAuthorized(event)) {
                    matchingRecords.add(event);
                    eventsReadThisFile++;
                }
            }
        } catch (final Exception e) {
            logger.warn("Failed to read Provenance Events. The event file '"
                + provenanceEventFile.getAbsolutePath() + "' may be missing or corrupt.", e);
        }
    }

    logger.debug("Read {} records from previous file", eventsReadThisFile);
    final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
    logger.debug("Took {} ms to read {} events from {} prov log files", millis, matchingRecords.size(),
            logFileCount);

    return matchingRecords;
}
 
Example 18
Source File: DeleteIndexAction.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public File execute(final File expiredFile) throws IOException {
    // count the number of records and determine the max event id that we are deleting.
    final long numDeleted = 0;
    long maxEventId = -1L;
    try (final RecordReader reader = RecordReaders.newRecordReader(expiredFile, repository.getAllLogFiles(), Integer.MAX_VALUE)) {
        maxEventId = reader.getMaxEventId();
    } catch (final IOException ioe) {
        logger.warn("Failed to obtain max ID present in journal file {}", expiredFile.getAbsolutePath());
    }

    // remove the records from the index
    final List<File> indexDirs = indexConfiguration.getIndexDirectories(expiredFile);
    for (final File indexingDirectory : indexDirs) {
        final Term term = new Term(FieldNames.STORAGE_FILENAME, LuceneUtil.substringBefore(expiredFile.getName(), "."));

        boolean deleteDir = false;
        final EventIndexWriter writer = indexManager.borrowIndexWriter(indexingDirectory);
        try {
            final IndexWriter indexWriter = writer.getIndexWriter();
            indexWriter.deleteDocuments(term);
            indexWriter.commit();
            final int docsLeft = indexWriter.numDocs();
            deleteDir = docsLeft <= 0;
            logger.debug("After expiring {}, there are {} docs left for index {}", expiredFile, docsLeft, indexingDirectory);
        } finally {
            indexManager.returnIndexWriter(writer);
        }

        // we've confirmed that all documents have been removed. Delete the index directory.
        if (deleteDir) {
            indexManager.removeIndex(indexingDirectory);
            indexConfiguration.removeIndexDirectory(indexingDirectory);

            deleteDirectory(indexingDirectory);
            logger.info("Removed empty index directory {}", indexingDirectory);
        }
    }

    // Update the minimum index to 1 more than the max Event ID in this file.
    if (maxEventId > -1L) {
        indexConfiguration.setMinIdIndexed(maxEventId + 1L);
    }

    logger.info("Deleted Indices for Expired Provenance File {} from {} index files; {} documents removed", expiredFile, indexDirs.size(), numDeleted);
    return expiredFile;
}
 
Example 19
Source File: PersistentProvenanceRepository.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
private void purgeExpiredIndexes() throws IOException {
    // Now that we have potentially removed expired Provenance Event Log Files, we can look at
    // whether or not we can delete any of the indexes. An index can be deleted if all of the
    // data that is associated with that index has already been deleted. In order to test this,
    // we will get the timestamp of the earliest event and then compare that to the latest timestamp
    // that would be indexed by the earliest index. If the event occurred after the timestamp of
    // the latest index, then we can just delete the entire index all together.

    // find all of the index directories
    final List<File> indexDirs = getAllIndexDirectories();
    if (indexDirs.size() < 2) {
        this.firstEventTimestamp = determineFirstEventTimestamp();
        return;
    }

    // Indexes are named "index-XXX" where the XXX is the timestamp of the earliest event that
    // could be in the index. Once we have finished with one index, we move on to another index,
    // but we don't move on until we are finished with the previous index.
    // Therefore, an efficient way to determine the latest timestamp of one index is to look at the
    // timestamp of the next index (these could potentially overlap for one millisecond). This is
    // efficient because we can determine the earliest timestamp of an index simply by looking at
    // the name of the Index's directory.
    final long latestTimestampOfFirstIndex = getIndexTimestamp(indexDirs.get(1));

    // Get the timestamp of the first event in the first Provenance Event Log File and the ID of the last event
    // in the event file.
    final List<File> logFiles = getSortedLogFiles();
    if (logFiles.isEmpty()) {
        this.firstEventTimestamp = System.currentTimeMillis();
        return;
    }

    final File firstLogFile = logFiles.get(0);
    long earliestEventTime = System.currentTimeMillis();
    long maxEventId = -1L;
    try (final RecordReader reader = RecordReaders.newRecordReader(firstLogFile, null, Integer.MAX_VALUE)) {
        final StandardProvenanceEventRecord event = reader.nextRecord();
        earliestEventTime = event.getEventTime();
        maxEventId = reader.getMaxEventId();
    } catch (final IOException ioe) {
        logger.warn("Unable to determine the maximum ID for Provenance Event Log File {}; values reported for the number of "
                + "events in the Provenance Repository may be inaccurate.", firstLogFile);
    }

    // check if we can delete the index safely.
    if (latestTimestampOfFirstIndex <= earliestEventTime) {
        // we can safely delete the first index because the latest event in the index is an event
        // that has already been expired from the repository.
        final File indexingDirectory = indexDirs.get(0);
        getIndexManager().removeIndex(indexingDirectory);
        indexConfig.removeIndexDirectory(indexingDirectory);
        deleteDirectory(indexingDirectory);

        if (maxEventId > -1L) {
            indexConfig.setMinIdIndexed(maxEventId + 1L);
        }
    }

    this.firstEventTimestamp = earliestEventTime;
}
 
Example 20
Source File: PersistentProvenanceRepository.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public List<ProvenanceEventRecord> getEvents(final long firstRecordId, final int maxRecords, final NiFiUser user) throws IOException {
    final List<ProvenanceEventRecord> records = new ArrayList<>(maxRecords);

    final List<Path> paths = getPathsForId(firstRecordId);
    if (paths == null || paths.isEmpty()) {
        return records;
    }

    for (final Path path : paths) {
        try (RecordReader reader = RecordReaders.newRecordReader(path.toFile(), getAllLogFiles(), maxAttributeChars)) {
            // if this is the first record, try to find out the block index and jump directly to
            // the block index. This avoids having to read through a lot of data that we don't care about
            // just to get to the first record that we want.
            if (records.isEmpty()) {
                final TocReader tocReader = reader.getTocReader();
                if (tocReader != null) {
                    final Integer blockIndex = tocReader.getBlockIndexForEventId(firstRecordId);
                    if (blockIndex != null) {
                        reader.skipToBlock(blockIndex);
                    }
                }
            }

            StandardProvenanceEventRecord record;
            while (records.size() < maxRecords && (record = reader.nextRecord()) != null) {
                if (record.getEventId() >= firstRecordId && isAuthorized(record, user)) {
                    records.add(record);
                }
            }
        } catch (final EOFException | FileNotFoundException fnfe) {
            // assume file aged off (or there's no data in file, in case of EOFException, which indicates that data was cached
            // in operating system and entire O/S crashed and always.sync was not turned on.)
        } catch (final IOException ioe) {
            logger.error("Failed to read Provenance Event File {} due to {}", path.toFile(), ioe.toString());
            logger.error("", ioe);
            eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to read Provenance Event File " + path.toFile() + " due to " + ioe.toString());
        }

        if (records.size() >= maxRecords) {
            break;
        }
    }

    if (logger.isDebugEnabled()) {
        logger.debug("Retrieving up to {} records starting at Event ID {}; returning {} events", maxRecords, firstRecordId, records.size());
    }

    return records;
}