com.amazonaws.services.s3.AmazonS3#listNextBatchOfObjects

Source File: InfectedFileCache.java From aws-s3-virusscan with Apache License 2.0

6 votes

public List<InfectedFile> getFiles() {
    final List<InfectedFile> files = new ArrayList<>();
    if (Config.has(Config.Key.INFECTED_FILES_BUCKET_NAME)) {
        final AmazonS3 s3local = AmazonS3ClientBuilder.standard().withCredentials(this.credentialsProvider).withRegion(Config.get(Config.Key.INFECTED_FILES_BUCKET_REGION)).build();
        ObjectListing objectListing = s3local.listObjects(Config.get(Config.Key.INFECTED_FILES_BUCKET_NAME));
        while (true) {
            objectListing.getObjectSummaries().forEach((summary) -> {
                final S3Object object = s3local.getObject(summary.getBucketName(), summary.getKey());
                final byte[] content;
                try {
                    content = IOUtils.toByteArray(object.getObjectContent());
                } catch (final IOException e) {
                    throw new RuntimeException(e);
                }
                files.add(new InfectedFile(summary.getKey(), content, object.getObjectMetadata().getContentType()));
            });
            if (objectListing.isTruncated()) {
                objectListing = s3local.listNextBatchOfObjects(objectListing);
            } else {
                break;
            }
        }
    }
    return files;
}

Source File: S3CheckpointSpiSelfTest.java From ignite with Apache License 2.0

6 votes

/**
 * @throws Exception If error.
 */
@Override protected void afterSpiStopped() throws Exception {
    AWSCredentials cred = new BasicAWSCredentials(IgniteS3TestSuite.getAccessKey(),
        IgniteS3TestSuite.getSecretKey());

    AmazonS3 s3 = new AmazonS3Client(cred);

    String bucketName = S3CheckpointSpi.BUCKET_NAME_PREFIX + "unit-test-bucket";

    try {
        ObjectListing list = s3.listObjects(bucketName);

        while (true) {
            for (S3ObjectSummary sum : list.getObjectSummaries())
                s3.deleteObject(bucketName, sum.getKey());

            if (list.isTruncated())
                list = s3.listNextBatchOfObjects(list);
            else
                break;
        }
    }
    catch (AmazonClientException e) {
        throw new IgniteSpiException("Failed to read checkpoint bucket: " + bucketName, e);
    }
}

Source File: PrimitiveS3OperationHandler.java From CloverETL-Engine with GNU Lesser General Public License v2.1

6 votes

private void deleteObjects(AmazonS3 service, ObjectListing listing) throws MultiObjectDeleteException, IOException {
	do {
		if (Thread.currentThread().isInterrupted()) {
			throw new IOException(FileOperationMessages.getString("IOperationHandler.interrupted")); //$NON-NLS-1$
		}
		List<S3ObjectSummary> objectSummaries = listing.getObjectSummaries();
		if (!objectSummaries.isEmpty()) {
			List<KeyVersion> keys = new ArrayList<KeyVersion>(objectSummaries.size());
			for (S3ObjectSummary object: objectSummaries) {
				keys.add(new KeyVersion(object.getKey()));
			}
			DeleteObjectsRequest request = new DeleteObjectsRequest(listing.getBucketName()).withKeys(keys).withQuiet(true);
			service.deleteObjects(request); // quiet
		}
		listing = service.listNextBatchOfObjects(listing);
	} while (listing.isTruncated());
}

Source File: AmazonS3FileSystemTestHelper.java From iaf with Apache License 2.0

6 votes

public void cleanUpBucketAndShutDown(AmazonS3 s3Client) {
	if(s3Client.doesBucketExistV2(bucketName)) {
		 ObjectListing objectListing = s3Client.listObjects(bucketName);
            while (true) {
                Iterator<S3ObjectSummary> objIter = objectListing.getObjectSummaries().iterator();
                while (objIter.hasNext()) {
                    s3Client.deleteObject(bucketName, objIter.next().getKey());
                }
    
                // If the bucket contains many objects, the listObjects() call
                // might not return all of the objects in the first listing. Check to
                // see whether the listing was truncated. If so, retrieve the next page of objects 
                // and delete them.
                if (objectListing.isTruncated()) {
                    objectListing = s3Client.listNextBatchOfObjects(objectListing);
                } else {
                    break;
                }
            }
		s3Client.deleteBucket(bucketName);
	}
	if(s3Client != null) {
		s3Client.shutdown();
	}
}

Source File: ImportS3.java From h2o-2 with Apache License 2.0

6 votes

@Override
protected Response serve() {
  String bucket = _bucket.value();
  Log.info("ImportS3 processing (" + bucket + ")");
  JsonObject json = new JsonObject();
  JsonArray succ = new JsonArray();
  JsonArray fail = new JsonArray();
  AmazonS3 s3 = PersistS3.getClient();
  ObjectListing currentList = s3.listObjects(bucket);
  processListing(currentList, succ, fail);
  while(currentList.isTruncated()){
    currentList = s3.listNextBatchOfObjects(currentList);
    processListing(currentList, succ, fail);
  }
  json.add(NUM_SUCCEEDED, new JsonPrimitive(succ.size()));
  json.add(SUCCEEDED, succ);
  json.add(NUM_FAILED, new JsonPrimitive(fail.size()));
  json.add(FAILED, fail);
  DKV.write_barrier();
  Response r = Response.done(json);
  r.setBuilder(SUCCEEDED + "." + KEY, new KeyCellBuilder());
  return r;
}

Source File: S3UploadAllCallable.java From jobcacher-plugin with MIT License

5 votes

private Map<String,S3ObjectSummary> lookupExistingCacheEntries(AmazonS3 s3) {
    Map<String,S3ObjectSummary> summaries = new HashMap<>();

    ObjectListing listing = s3.listObjects(bucketName, pathPrefix);
    do {
        for (S3ObjectSummary summary : listing.getObjectSummaries()) {
            summaries.put(summary.getKey(), summary);
        }
        listing = listing.isTruncated() ? s3.listNextBatchOfObjects(listing) : null;
    } while (listing != null);

    return summaries;
}

Source File: S3InputFormatUtils.java From kangaroo with Apache License 2.0

5 votes

/**
 * Efficiently gets the Hadoop {@link org.apache.hadoop.fs.FileStatus} for all S3 files under the provided
 * {@code dirs}
 * 
 * @param s3Client
 *            s3 client
 * @param blockSize
 *            the block size
 * @param dirs
 *            the dirs to search through
 * @return the {@link org.apache.hadoop.fs.FileStatus} version of all S3 files under {@code dirs}
 */
static List<FileStatus> getFileStatuses(final AmazonS3 s3Client, final long blockSize, final Path... dirs) {
    final List<FileStatus> result = Lists.newArrayList();
    for (final Path dir : dirs) {
        // get bucket and prefix from path
        final String bucket = S3HadoopUtils.getBucketFromPath(dir.toString());
        final String prefix = S3HadoopUtils.getKeyFromPath(dir.toString());
        // list request
        final ListObjectsRequest req = new ListObjectsRequest().withMaxKeys(Integer.MAX_VALUE)
                .withBucketName(bucket).withPrefix(prefix);
        // recursively page through all objects under the path
        for (ObjectListing listing = s3Client.listObjects(req); listing.getObjectSummaries().size() > 0; listing = s3Client
                .listNextBatchOfObjects(listing)) {
            for (final S3ObjectSummary summary : listing.getObjectSummaries()) {
                final Path path = new Path(String.format("s3n://%s/%s", summary.getBucketName(), summary.getKey()));
                if (S3_PATH_FILTER.accept(path)) {
                    result.add(new FileStatus(summary.getSize(), false, 1, blockSize, summary.getLastModified()
                            .getTime(), path));
                }
            }
            // don't need to check the next listing if this one is not truncated
            if (!listing.isTruncated()) {
                break;
            }
        }
    }
    return result;
}

Source File: AWSCommon.java From camel-kafka-connector with Apache License 2.0

4 votes

/**
 * Delete an S3 bucket using the provided client. Coming from AWS documentation:
 * https://docs.aws.amazon.com/AmazonS3/latest/dev/delete-or-empty-bucket.html#delete-bucket-sdk-java
 * @param s3Client the AmazonS3 client instance used to delete the bucket
 * @param bucketName a String containing the bucket name
 */
public static void deleteBucket(AmazonS3 s3Client, String bucketName) {
    // Delete all objects from the bucket. This is sufficient
    // for non versioned buckets. For versioned buckets, when you attempt to delete objects, Amazon S3 inserts
    // delete markers for all objects, but doesn't delete the object versions.
    // To delete objects from versioned buckets, delete all of the object versions before deleting
    // the bucket (see below for an example).
    ObjectListing objectListing = s3Client.listObjects(bucketName);
    while (true) {
        Iterator<S3ObjectSummary> objIter = objectListing.getObjectSummaries().iterator();
        while (objIter.hasNext()) {
            s3Client.deleteObject(bucketName, objIter.next().getKey());
        }

        // If the bucket contains many objects, the listObjects() call
        // might not return all of the objects in the first listing. Check to
        // see whether the listing was truncated. If so, retrieve the next page of objects
        // and delete them.
        if (objectListing.isTruncated()) {
            objectListing = s3Client.listNextBatchOfObjects(objectListing);
        } else {
            break;
        }
    }

    // Delete all object versions (required for versioned buckets).
    VersionListing versionList = s3Client.listVersions(new ListVersionsRequest().withBucketName(bucketName));
    while (true) {
        Iterator<S3VersionSummary> versionIter = versionList.getVersionSummaries().iterator();
        while (versionIter.hasNext()) {
            S3VersionSummary vs = versionIter.next();
            s3Client.deleteVersion(bucketName, vs.getKey(), vs.getVersionId());
        }

        if (versionList.isTruncated()) {
            versionList = s3Client.listNextBatchOfVersions(versionList);
        } else {
            break;
        }
    }

    // After all objects and object versions are deleted, delete the bucket.
    s3Client.deleteBucket(bucketName);
}

Source File: CloudFormationClient.java From herd-mdl with Apache License 2.0

4 votes

/**
 * Delete the stack {@link #stackName}
 */
public void deleteStack() throws Exception {

    CFTStackInfo cftStackInfo = getStackInfo();
    String rootStackId = cftStackInfo.stackId(); // Use the stack id to track the delete operation
    LOGGER.info("rootStackId   =   " + rootStackId);

    // Go through the stack and pick up resources that we want
    // to finalize before deleting the stack.
    List<String> s3BucketIds = new ArrayList<>();

    DescribeStacksResult describeStacksResult = amazonCloudFormation.describeStacks();
    for (Stack currentStack : describeStacksResult.getStacks()) {
        if (rootStackId.equals(currentStack.getRootId()) || rootStackId
                .equals(currentStack.getStackId())) {
            LOGGER.info("stackId   =   " + currentStack.getStackId());
            DescribeStackResourcesRequest describeStackResourcesRequest = new DescribeStackResourcesRequest();
            describeStackResourcesRequest.setStackName(currentStack.getStackName());
            List<StackResource> stackResources = amazonCloudFormation
                    .describeStackResources(describeStackResourcesRequest).getStackResources();
            for (StackResource stackResource : stackResources) {
                if (!stackResource.getResourceStatus()
                        .equals(ResourceStatus.DELETE_COMPLETE.toString())) {
                    if (stackResource.getResourceType().equals("AWS::S3::Bucket")) {
                        s3BucketIds.add(stackResource.getPhysicalResourceId());
                    }
                }
            }
        }
    }

    // Now empty S3 buckets, clean up will be done when the stack is deleted
    AmazonS3 amazonS3 = AmazonS3ClientBuilder.standard().withRegion(Regions.getCurrentRegion().getName())
            .withCredentials(new InstanceProfileCredentialsProvider(true)).build();
    for (String s3BucketPhysicalId : s3BucketIds) {
        String s3BucketName = s3BucketPhysicalId;
        if(!amazonS3.doesBucketExistV2(s3BucketName)){
            break;
        }
        LOGGER.info("Empyting S3 bucket, " + s3BucketName);
        ObjectListing objectListing = amazonS3.listObjects(s3BucketName);
        while (true) {
            for (Iterator<?> iterator = objectListing.getObjectSummaries().iterator(); iterator
                    .hasNext(); ) {
                S3ObjectSummary summary = (S3ObjectSummary) iterator.next();
                amazonS3.deleteObject(s3BucketName, summary.getKey());
            }
            if (objectListing.isTruncated()) {
                objectListing = amazonS3.listNextBatchOfObjects(objectListing);
            }
            else {
                break;
            }
        }
    }

    //Proceed with the regular stack deletion operation
    DeleteStackRequest deleteRequest = new DeleteStackRequest();
    deleteRequest.setStackName(stackName);
    amazonCloudFormation.deleteStack(deleteRequest);
    LOGGER.info("Stack deletion initiated");

    CFTStackStatus cftStackStatus = waitForCompletionAndGetStackStatus(amazonCloudFormation,
            rootStackId);
    LOGGER.info(
            "Stack deletion completed, the stack " + stackName + " completed with " + cftStackStatus);

    // Throw exception if failed
    if (!cftStackStatus.getStackStatus().equals(StackStatus.DELETE_COMPLETE.toString())) {
        throw new Exception(
                "deleteStack operation failed for stack " + stackName + " - " + cftStackStatus);
    }
}

Source File: S3StorageService.java From kayenta with Apache License 2.0

4 votes

@Override
public List<Map<String, Object>> listObjectKeys(
    String accountName, ObjectType objectType, List<String> applications, boolean skipIndex) {
  AwsNamedAccountCredentials credentials =
      accountCredentialsRepository.getRequiredOne(accountName);

  if (!skipIndex && objectType == ObjectType.CANARY_CONFIG) {
    Set<Map<String, Object>> canaryConfigSet =
        canaryConfigIndex.getCanaryConfigSummarySet(credentials, applications);

    return Lists.newArrayList(canaryConfigSet);
  } else {
    AmazonS3 amazonS3 = credentials.getAmazonS3();
    String bucket = credentials.getBucket();
    String group = objectType.getGroup();
    String prefix = buildTypedFolder(credentials, group);

    ensureBucketExists(accountName);

    int skipToOffset = prefix.length() + 1; // + Trailing slash
    List<Map<String, Object>> result = new ArrayList<>();

    log.debug("Listing {}", group);

    ObjectListing bucketListing =
        amazonS3.listObjects(new ListObjectsRequest(bucket, prefix, null, null, 10000));

    List<S3ObjectSummary> summaries = bucketListing.getObjectSummaries();

    while (bucketListing.isTruncated()) {
      bucketListing = amazonS3.listNextBatchOfObjects(bucketListing);
      summaries.addAll(bucketListing.getObjectSummaries());
    }

    if (summaries != null) {
      for (S3ObjectSummary summary : summaries) {
        String itemName = summary.getKey();
        int indexOfLastSlash = itemName.lastIndexOf("/");
        Map<String, Object> objectMetadataMap = new HashMap<>();
        long updatedTimestamp = summary.getLastModified().getTime();

        objectMetadataMap.put("id", itemName.substring(skipToOffset, indexOfLastSlash));
        objectMetadataMap.put("updatedTimestamp", updatedTimestamp);
        objectMetadataMap.put(
            "updatedTimestampIso", Instant.ofEpochMilli(updatedTimestamp).toString());

        if (objectType == ObjectType.CANARY_CONFIG) {
          String name = itemName.substring(indexOfLastSlash + 1);

          if (name.endsWith(".json")) {
            name = name.substring(0, name.length() - 5);
          }

          objectMetadataMap.put("name", name);
        }

        result.add(objectMetadataMap);
      }
    }

    return result;
  }
}

Source File: DeleteBucket.java From aws-doc-sdk-examples with Apache License 2.0

4 votes

public static void main(String[] args) {
    final String USAGE = "\n" +
            "To run this example, supply the name of an S3 bucket\n" +
            "\n" +
            "Ex: DeleteBucket <bucketname>\n";

    if (args.length < 1) {
        System.out.println(USAGE);
        System.exit(1);
    }

    String bucket_name = args[0];

    System.out.println("Deleting S3 bucket: " + bucket_name);
    final AmazonS3 s3 = AmazonS3ClientBuilder.standard().withRegion(Regions.DEFAULT_REGION).build();
    try {
        System.out.println(" - removing objects from bucket");
        ObjectListing object_listing = s3.listObjects(bucket_name);
        while (true) {
            for (Iterator<?> iterator =
                 object_listing.getObjectSummaries().iterator();
                 iterator.hasNext(); ) {
                S3ObjectSummary summary = (S3ObjectSummary) iterator.next();
                s3.deleteObject(bucket_name, summary.getKey());
            }

            // more object_listing to retrieve?
            if (object_listing.isTruncated()) {
                object_listing = s3.listNextBatchOfObjects(object_listing);
            } else {
                break;
            }
        }

        System.out.println(" - removing versions from bucket");
        VersionListing version_listing = s3.listVersions(
                new ListVersionsRequest().withBucketName(bucket_name));
        while (true) {
            for (Iterator<?> iterator =
                 version_listing.getVersionSummaries().iterator();
                 iterator.hasNext(); ) {
                S3VersionSummary vs = (S3VersionSummary) iterator.next();
                s3.deleteVersion(
                        bucket_name, vs.getKey(), vs.getVersionId());
            }

            if (version_listing.isTruncated()) {
                version_listing = s3.listNextBatchOfVersions(
                        version_listing);
            } else {
                break;
            }
        }

        System.out.println(" OK, bucket ready to delete!");
        s3.deleteBucket(bucket_name);
    } catch (AmazonServiceException e) {
        System.err.println(e.getErrorMessage());
        System.exit(1);
    }
    System.out.println("Done!");
}

Source File: S3ChangeLogStore.java From athenz with Apache License 2.0

4 votes

/**
 * list the objects in the zts bucket. If the mod time is specified as 0
 * then we want to list all objects otherwise, we only list objects
 * that are newer than the specified timestamp
 * @param s3 AWS S3 client object
 * @param domains collection to be updated to include domain names
 * @param modTime only include domains newer than this timestamp
 */
void listObjects(AmazonS3 s3, Collection<String> domains, long modTime) {
    
    if (LOGGER.isDebugEnabled()) {
        LOGGER.debug("listObjects: Retrieving domains from {} with mod time > {}",
                s3BucketName, modTime);
    }
    
    ObjectListing objectListing = s3.listObjects(new ListObjectsRequest()
            .withBucketName(s3BucketName));
    
    String objectName;
    while (objectListing != null) {
        
        // process each entry in our result set and add the domain
        // name to our return list

        final List<S3ObjectSummary> objectSummaries = objectListing.getObjectSummaries();
        boolean listTruncated = objectListing.isTruncated();
        
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("listObjects: retrieved {} objects, more objects available - {}",
                    objectSummaries.size(), listTruncated);
        }
        
        for (S3ObjectSummary objectSummary : objectSummaries) {
            
            // if mod time is specified then make sure we automatically skip
            // any domains older than the specified value
            
            if (modTime > 0 && objectSummary.getLastModified().getTime() <= modTime) {
                continue;
            }
            
            // for now skip any folders/objects that start with '.'
            
            objectName = objectSummary.getKey();
            if (objectName.charAt(0) == '.') {
                continue;
            }
            domains.add(objectName);
        }
        
        // check if the object listing is truncated or not (break out in this case)
        // technically we can skip this call and just call listNextBatchOfResults
        // since that returns null if the object listing is not truncated but 
        // this direct check here makes the logic easier to follow
        
        if (!listTruncated) {
            break;
        }
        
        objectListing = s3.listNextBatchOfObjects(objectListing);
    }
}

Source File: AmazonS3Util.java From datacollector with Apache License 2.0

4 votes

/**
 * Lists objects from AmazonS3 in lexicographical order
 *
 * @param s3Client
 * @param s3ConfigBean
 * @param pathMatcher glob patterns to match file name against
 * @param s3Offset current offset which provides the key name of the previous object
 * @param fetchSize number of objects to fetch in one go
 * @return
 * @throws AmazonClientException
 */
static List<S3ObjectSummary> listObjectsLexicographically(
    AmazonS3 s3Client,
    S3ConfigBean s3ConfigBean,
    AntPathMatcher pathMatcher,
    S3Offset s3Offset,
    int fetchSize
) {
  // Incrementally scan objects after the marker (s3Offset).
  List<S3ObjectSummary> list = new ArrayList<>(fetchSize);

  ListObjectsRequest listObjectsRequest = new ListObjectsRequest();
  listObjectsRequest.setBucketName(s3ConfigBean.s3Config.bucket);
  listObjectsRequest.setPrefix(s3ConfigBean.s3Config.commonPrefix);
  listObjectsRequest.setMaxKeys(BATCH_SIZE);

  if (s3Offset.getKey() != null) {
    if (!s3Offset.getKey().isEmpty() && parseOffset(s3Offset) != -1) {
      S3ObjectSummary currentObjectSummary = getObjectSummary(s3Client, s3ConfigBean.s3Config.bucket, s3Offset.getKey());
      list.add(currentObjectSummary);
    }
    listObjectsRequest.setMarker(s3Offset.getKey());
  }

  ObjectListing objectListing = s3Client.listObjects(listObjectsRequest);

  while (true) {
    for (S3ObjectSummary s : objectListing.getObjectSummaries()) {
      String fullPrefix = s.getKey();
      String remainingPrefix = fullPrefix.substring(s3ConfigBean.s3Config.commonPrefix.length(), fullPrefix.length());
      if (!remainingPrefix.isEmpty()) {
        if (pathMatcher.match(s3ConfigBean.s3FileConfig.prefixPattern, remainingPrefix)) {
          list.add(s);
        }
        // We've got enough objects.
        if (list.size() == fetchSize) {
          return list;
        }
      }
    }
    // Listing is complete. No more objects to be listed.
    if (!objectListing.isTruncated()) {
      break;
    }
    objectListing = s3Client.listNextBatchOfObjects(objectListing);
  }

  return list;
}

Java Code Examples for com.amazonaws.services.s3.AmazonS3#listNextBatchOfObjects()