com.amazonaws.services.s3.iterable.S3Objects Java Examples

The following examples show how to use com.amazonaws.services.s3.iterable.S3Objects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: S3CacheFileInfoImpl.java    From nifi-minifi with Apache License 2.0 6 votes vote down vote up
@Override
public Stream<WriteableConfiguration> getCachedConfigurations() throws IOException {

  Iterable<S3ObjectSummary> objectSummaries = S3Objects.withPrefix(s3, bucket, prefix);
  Stream<S3ObjectSummary> objectStream = StreamSupport.stream(objectSummaries.spliterator(), false);

  return objectStream.map(p -> {
    Integer version = getVersionIfMatch(p.getKey());
    if (version == null) {
      return null;
    }
    return new Pair<>(version, p);
  }).filter(Objects::nonNull)
      .sorted(Comparator.comparing(pair -> ((Pair<Integer, S3ObjectSummary>) pair).getFirst())
            .reversed()).map(pair -> new S3WritableConfiguration(s3, pair.getSecond(), Integer.toString(pair.getFirst())));

}
 
Example #2
Source File: TestAmazonS3TargetForWholeFile.java    From datacollector with Apache License 2.0 6 votes vote down vote up
private static List<Record> createRecordsForWholeFileFromS3() throws Exception {
  Iterator<S3ObjectSummary> s3ObjectSummaryIterator = S3Objects.inBucket(s3client, SOURCE_BUCKET_NAME).iterator();
  List<Record> records = new ArrayList<>();
  while (s3ObjectSummaryIterator.hasNext()) {
    S3ObjectSummary s3ObjectSummary = s3ObjectSummaryIterator.next();
    Map<String, Object> metadata = getS3Metadata(s3client.getObject(SOURCE_BUCKET_NAME, s3ObjectSummary.getKey()));
    Record record = RecordCreator.create();
    record.getHeader().setAttribute("bucket", TARGET_BUCKET_NAME);
    record.set(
        FileRefUtil.getWholeFileRecordRootField(
            new S3FileRef.Builder()
                .s3Client(s3client)
                .s3ObjectSummary(s3ObjectSummary)
                .useSSE(false)
                .verifyChecksum(false)
                .bufferSize(1024)
                .build(),
            metadata
        )
    );
    records.add(record);
  }
  return records;
}
 
Example #3
Source File: S3BlobStore.java    From nexus-blobstore-s3 with Eclipse Public License 1.0 5 votes vote down vote up
@Override
public Stream<BlobId> getDirectPathBlobIdStream(final String prefix) {
  String subpath = format("%s/%s", DIRECT_PATH_PREFIX, prefix);
  Iterable<S3ObjectSummary> summaries = S3Objects.withPrefix(s3, getConfiguredBucket(), subpath);
  return stream(summaries.spliterator(), false)
    .map(S3ObjectSummary::getKey)
    .filter(key -> key.endsWith(BLOB_ATTRIBUTE_SUFFIX))
    .map(this::attributePathToDirectPathBlobId);
}
 
Example #4
Source File: TaxiEventReader.java    From flink-stream-processing-refarch with Apache License 2.0 5 votes vote down vote up
public TaxiEventReader(AmazonS3 s3, String bucketName, String prefix) {
  this.s3 = s3;
  this.s3Objects = S3Objects.withPrefix(s3, bucketName, prefix).iterator();

  //initialize next and hasNext fields
  next();
}
 
Example #5
Source File: S3BlobStore.java    From nexus-public with Eclipse Public License 1.0 5 votes vote down vote up
@Override
public Stream<BlobId> getDirectPathBlobIdStream(final String prefix) {
  String subpath = getBucketPrefix() + format("%s/%s", DIRECT_PATH_PREFIX, prefix);
  Iterable<S3ObjectSummary> summaries = S3Objects.withPrefix(s3, getConfiguredBucket(), subpath);
  return stream(summaries.spliterator(), false)
    .map(S3ObjectSummary::getKey)
    .filter(key -> key.endsWith(BLOB_ATTRIBUTE_SUFFIX))
    .map(this::attributePathToDirectPathBlobId);
}
 
Example #6
Source File: TestUtil.java    From datacollector with Apache License 2.0 5 votes vote down vote up
public static void createBucket(AmazonS3 s3client, String bucketName) {
  if(s3client.doesBucketExist(bucketName)) {
    for(S3ObjectSummary s : S3Objects.inBucket(s3client, bucketName)) {
      s3client.deleteObject(bucketName, s.getKey());
    }
    s3client.deleteBucket(bucketName);
  }
  Assert.assertFalse(s3client.doesBucketExist(bucketName));
  // Note that CreateBucketRequest does not specify region. So bucket is
  // bucketName
  s3client.createBucket(new CreateBucketRequest(bucketName));
}
 
Example #7
Source File: TestAmazonS3TargetForWholeFile.java    From datacollector with Apache License 2.0 5 votes vote down vote up
private int verifyAndReturnNoOfObjects() throws Exception {
  int numberOfObjects = 0;
  for (S3ObjectSummary s3ObjectSummary : S3Objects.inBucket(s3client, TARGET_BUCKET_NAME)) {
    String fileNameOrKey = s3ObjectSummary.getKey();
    if (withFileNamePrefix) {
      //strip out the filePrefix sdc-
      fileNameOrKey = fileNameOrKey.substring(4);
    }
    switch (source) {
      case LOCAL:
        verifyStreamCorrectness(
            new FileInputStream(testDir.getAbsolutePath() + "/" + fileNameOrKey),
            s3client.getObject(TARGET_BUCKET_NAME, s3ObjectSummary.getKey()).getObjectContent()
        );
        break;
      case S3:
        verifyStreamCorrectness(
            s3client.getObject(SOURCE_BUCKET_NAME, fileNameOrKey).getObjectContent(),
            s3client.getObject(TARGET_BUCKET_NAME, s3ObjectSummary.getKey()).getObjectContent()
        );
        break;
    }
    deleteObjectsAfterVerificationInTarget(s3ObjectSummary.getKey());
    numberOfObjects++;
  }
  return numberOfObjects;
}
 
Example #8
Source File: S3BlobStore.java    From nexus-blobstore-s3 with Eclipse Public License 1.0 4 votes vote down vote up
@Override
public Stream<BlobId> getBlobIdStream() {
  Iterable<S3ObjectSummary> summaries = S3Objects.withPrefix(s3, getConfiguredBucket(), CONTENT_PREFIX);
  return blobIdStream(summaries);
}
 
Example #9
Source File: S3BlobStore.java    From nexus-public with Eclipse Public License 1.0 4 votes vote down vote up
@Override
public Stream<BlobId> getBlobIdStream() {
  Iterable<S3ObjectSummary> summaries = S3Objects.withPrefix(s3, getConfiguredBucket(), getContentPrefix());
  return blobIdStream(summaries);
}
 
Example #10
Source File: AmazonS3Util.java    From datacollector with Apache License 2.0 4 votes vote down vote up
/**
 * Lists objects from AmazonS3 in chronological order [lexicographical order if 2 files have same timestamp] which are
 * later than or equal to the timestamp of the previous offset object
 *
 * @param s3Client
 * @param s3ConfigBean
 * @param pathMatcher glob patterns to match file name against
 * @param s3Offset current offset which provides the timestamp of the previous object
 * @param fetchSize number of objects to fetch in one go
 * @return
 * @throws AmazonClientException
 */
static List<S3ObjectSummary> listObjectsChronologically(
    AmazonS3 s3Client,
    S3ConfigBean s3ConfigBean,
    AntPathMatcher pathMatcher,
    S3Offset s3Offset,
    int fetchSize
) {

  //Algorithm:
  // - Full scan all objects that match the file name pattern and which are later than the file in the offset
  // - Select the oldest "fetchSize" number of files and return them.
  TreeSet<S3ObjectSummary> treeSet = new TreeSet<>((o1, o2) -> {
    int result = o1.getLastModified().compareTo(o2.getLastModified());
    if(result != 0) {
      //same modified time. Use name to sort
      return result;
    }
    return o1.getKey().compareTo(o2.getKey());
  });

  S3Objects s3ObjectSummaries = S3Objects
    .withPrefix(s3Client, s3ConfigBean.s3Config.bucket, s3ConfigBean.s3Config.commonPrefix);

  // SDC-9413: since the s3ObjectSummaries is in lexical order, we should get all list of files in one api call
  for (S3ObjectSummary s : s3ObjectSummaries) {
    String fullPrefix = s.getKey();
    String remainingPrefix = fullPrefix.substring(s3ConfigBean.s3Config.commonPrefix.length(), fullPrefix.length());
    if (!remainingPrefix.isEmpty()) {
      // remainingPrefix can be empty.
      // If the user manually creates a prefix "myFolder/mySubFolder" in bucket "myBucket" and uploads "myObject",
      // then the first objects returned here are:
      // myFolder/mySubFolder
      // myFolder/mySubFolder/myObject
      //
      // All is good when pipeline is run but preview returns with no data. So we should ignore the empty file as it
      // has no data
      if (pathMatcher.match(s3ConfigBean.s3FileConfig.prefixPattern, remainingPrefix) && isEligible(s, s3Offset)) {
        treeSet.add(s);
      }
      if (treeSet.size() > fetchSize) {
        treeSet.pollLast();
      }
    }
  }

  return new ArrayList<>(treeSet);
}
 
Example #11
Source File: AwsSdkSample.java    From aws-sdk-java-archetype with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    System.out.println("===========================================");
    System.out.println("Welcome to the AWS Java SDK!");
    System.out.println("===========================================");

    init();

    try {
        /*
         * The Amazon EC2 client allows you to easily launch and configure
         * computing capacity in AWS datacenters.
         *
         * In this sample, we use the EC2 client to list the availability zones
         * in a region, and then list the instances running in those zones.
         */
        DescribeAvailabilityZonesResult availabilityZonesResult = ec2.describeAvailabilityZones();
        List<AvailabilityZone> availabilityZones = availabilityZonesResult.getAvailabilityZones();
        System.out.println("You have access to " + availabilityZones.size() + " availability zones:");
        for (AvailabilityZone zone : availabilityZones) {
            System.out.println(" - " + zone.getZoneName() + " (" + zone.getRegionName() + ")");
        }

        DescribeInstancesResult describeInstancesResult = ec2.describeInstances();
        Set<Instance> instances = new HashSet<Instance>();
        for (Reservation reservation : describeInstancesResult.getReservations()) {
            instances.addAll(reservation.getInstances());
        }

        System.out.println("You have " + instances.size() + " Amazon EC2 instance(s) running.");


        /*
         * The Amazon S3 client allows you to manage and configure buckets
         * and to upload and download data.
         *
         * In this sample, we use the S3 client to list all the buckets in
         * your account, and then iterate over the object metadata for all
         * objects in one bucket to calculate the total object count and
         * space usage for that one bucket. Note that this sample only
         * retrieves the object's metadata and doesn't actually download the
         * object's content.
         *
         * In addition to the low-level Amazon S3 client in the SDK, there
         * is also a high-level TransferManager API that provides
         * asynchronous management of uploads and downloads with an easy to
         * use API:
         *   http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/s3/transfer/TransferManager.html
         */
        List<Bucket> buckets = s3.listBuckets();
        System.out.println("You have " + buckets.size() + " Amazon S3 bucket(s).");

        if (buckets.size() > 0) {
            Bucket bucket = buckets.get(0);

            long totalSize  = 0;
            long totalItems = 0;
            /*
             * The S3Objects and S3Versions classes provide convenient APIs
             * for iterating over the contents of your buckets, without
             * having to manually deal with response pagination.
             */
            for (S3ObjectSummary objectSummary : S3Objects.inBucket(s3, bucket.getName())) {
                totalSize += objectSummary.getSize();
                totalItems++;
            }

            System.out.println("The bucket '" + bucket.getName() + "' contains "+ totalItems + " objects "
                    + "with a total size of " + totalSize + " bytes.");
        }
    } catch (AmazonServiceException ase) {
        /*
         * AmazonServiceExceptions represent an error response from an AWS
         * services, i.e. your request made it to AWS, but the AWS service
         * either found it invalid or encountered an error trying to execute
         * it.
         */
        System.out.println("Error Message:    " + ase.getMessage());
        System.out.println("HTTP Status Code: " + ase.getStatusCode());
        System.out.println("AWS Error Code:   " + ase.getErrorCode());
        System.out.println("Error Type:       " + ase.getErrorType());
        System.out.println("Request ID:       " + ase.getRequestId());
    } catch (AmazonClientException ace) {
        /*
         * AmazonClientExceptions represent an error that occurred inside
         * the client on the local host, either while trying to send the
         * request to AWS or interpret the response. For example, if no
         * network connection is available, the client won't be able to
         * connect to AWS to execute a request and will throw an
         * AmazonClientException.
         */
        System.out.println("Error Message: " + ace.getMessage());
    }
}