com.amazonaws.services.s3.model.SelectObjectContentRequest Java Examples

The following examples show how to use com.amazonaws.services.s3.model.SelectObjectContentRequest. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: S3QueryUtil.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private static SelectObjectContentRequest generateBaseCSVRequest(String bucket, String key, String query) {
	SelectObjectContentRequest request = new SelectObjectContentRequest();
	request.setBucketName(bucket);
	request.setKey(key);
	request.setExpression(query);
	request.setExpressionType(ExpressionType.SQL);

	InputSerialization inputSerialization = new InputSerialization();
	inputSerialization.setCsv(new CSVInput());
	inputSerialization.setCompressionType(CompressionType.NONE);
	request.setInputSerialization(inputSerialization);

	OutputSerialization outputSerialization = new OutputSerialization();
	outputSerialization.setCsv(new CSVOutput());
	request.setOutputSerialization(outputSerialization);

	return request;
}
 
Example #2
Source File: S3QueryUtil.java    From flink with Apache License 2.0 6 votes vote down vote up
private static SelectObjectContentRequest generateBaseCSVRequest(String bucket, String key, String query) {
	SelectObjectContentRequest request = new SelectObjectContentRequest();
	request.setBucketName(bucket);
	request.setKey(key);
	request.setExpression(query);
	request.setExpressionType(ExpressionType.SQL);

	InputSerialization inputSerialization = new InputSerialization();
	inputSerialization.setCsv(new CSVInput());
	inputSerialization.setCompressionType(CompressionType.NONE);
	request.setInputSerialization(inputSerialization);

	OutputSerialization outputSerialization = new OutputSerialization();
	outputSerialization.setCsv(new CSVOutput());
	request.setOutputSerialization(outputSerialization);

	return request;
}
 
Example #3
Source File: PrestoS3SelectClient.java    From presto with Apache License 2.0 5 votes vote down vote up
public InputStream getRecordsContent(SelectObjectContentRequest selectObjectRequest)
{
    this.selectObjectRequest = requireNonNull(selectObjectRequest, "selectObjectRequest is null");
    this.selectObjectContentResult = s3Client.selectObjectContent(selectObjectRequest);
    return selectObjectContentResult.getPayload()
            .getRecordsInputStream(
                    new SelectObjectContentEventVisitor()
                    {
                        @Override
                        public void visit(EndEvent endEvent)
                        {
                            requestComplete = true;
                        }
                    });
}
 
Example #4
Source File: S3SelectCsvRecordReader.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
public SelectObjectContentRequest buildSelectObjectRequest(Properties schema, String query, Path path)
{
    SelectObjectContentRequest selectObjectRequest = new SelectObjectContentRequest();
    URI uri = path.toUri();
    selectObjectRequest.setBucketName(PrestoS3FileSystem.extractBucketName(uri));
    selectObjectRequest.setKey(PrestoS3FileSystem.keyFromPath(path));
    selectObjectRequest.setExpression(query);
    selectObjectRequest.setExpressionType(ExpressionType.SQL);

    String fieldDelimiter = getFieldDelimiter(schema);
    String quoteChar = schema.getProperty(QUOTE_CHAR, null);
    String escapeChar = schema.getProperty(ESCAPE_CHAR, null);

    CSVInput selectObjectCSVInputSerialization = new CSVInput();
    selectObjectCSVInputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVInputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVInputSerialization.setComments(COMMENTS_CHAR_STR);
    selectObjectCSVInputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVInputSerialization.setQuoteEscapeCharacter(escapeChar);

    InputSerialization selectObjectInputSerialization = new InputSerialization();
    selectObjectInputSerialization.setCompressionType(getCompressionType(path));
    selectObjectInputSerialization.setCsv(selectObjectCSVInputSerialization);
    selectObjectRequest.setInputSerialization(selectObjectInputSerialization);

    OutputSerialization selectObjectOutputSerialization = new OutputSerialization();
    CSVOutput selectObjectCSVOutputSerialization = new CSVOutput();
    selectObjectCSVOutputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVOutputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVOutputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVOutputSerialization.setQuoteEscapeCharacter(escapeChar);
    selectObjectOutputSerialization.setCsv(selectObjectCSVOutputSerialization);
    selectObjectRequest.setOutputSerialization(selectObjectOutputSerialization);

    return selectObjectRequest;
}
 
Example #5
Source File: S3QueryUtil.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/** Run SQL query over non-compressed CSV file saved in s3 object. */
static String queryFile(
		AmazonS3 s3client, String bucket, String s3file, @SuppressWarnings("SameParameterValue") String query) {
	SelectObjectContentRequest request = generateBaseCSVRequest(bucket, s3file, query);
	final AtomicBoolean isResultComplete = new AtomicBoolean(false);
	String res;
	try (SelectObjectContentResult result = s3client.selectObjectContent(request);
		SelectObjectContentEventStream payload = result.getPayload();
		ByteArrayOutputStream out = new ByteArrayOutputStream()) {
		InputStream resultInputStream = payload.getRecordsInputStream(
			new SelectObjectContentEventVisitor() {
				@Override
				public void visit(SelectObjectContentEvent.EndEvent event) {
					isResultComplete.set(true);
				}
			}
		);
		copy(resultInputStream, out);
		res = out.toString().trim();
	} catch (Throwable e) {
		System.out.println("SQL query failure");
		throw new RuntimeException("SQL query failure", e);
	}
	/*
	 * The End Event indicates all matching records have been transmitted.
	 * If the End Event is not received, the results may be incomplete.
	 */
	if (!isResultComplete.get()) {
		throw new RuntimeException("S3 Select request was incomplete as End Event was not received.");
	}
	return res;
}
 
Example #6
Source File: S3QueryUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
/** Run SQL query over non-compressed CSV file saved in s3 object. */
static String queryFile(
		AmazonS3 s3client, String bucket, String s3file, @SuppressWarnings("SameParameterValue") String query) {
	SelectObjectContentRequest request = generateBaseCSVRequest(bucket, s3file, query);
	final AtomicBoolean isResultComplete = new AtomicBoolean(false);
	String res;
	try (SelectObjectContentResult result = s3client.selectObjectContent(request);
		SelectObjectContentEventStream payload = result.getPayload();
		ByteArrayOutputStream out = new ByteArrayOutputStream()) {
		InputStream resultInputStream = payload.getRecordsInputStream(
			new SelectObjectContentEventVisitor() {
				@Override
				public void visit(SelectObjectContentEvent.EndEvent event) {
					isResultComplete.set(true);
				}
			}
		);
		copy(resultInputStream, out);
		res = out.toString().trim();
	} catch (Throwable e) {
		System.out.println("SQL query failure");
		throw new RuntimeException("SQL query failure", e);
	}
	/*
	 * The End Event indicates all matching records have been transmitted.
	 * If the End Event is not received, the results may be incomplete.
	 */
	if (!isResultComplete.get()) {
		throw new RuntimeException("S3 Select request was incomplete as End Event was not received.");
	}
	return res;
}
 
Example #7
Source File: S3SelectAccessor.java    From pxf with Apache License 2.0 5 votes vote down vote up
/**
 * Generates the {@link SelectObjectContentRequest} object from
 * the request context.
 *
 * @param context the request context
 * @return a {@link SelectObjectContentRequest}
 */
SelectObjectContentRequest generateBaseCSVRequest(RequestContext context) {

    InputSerialization inputSerialization = getInputSerialization(context);

    String fileHeaderInfo = context.getOption(FILE_HEADER_INFO);
    boolean usePositionToIdentifyColumn = inputSerialization.getCsv() != null &&
            (StringUtils.isBlank(fileHeaderInfo) ||
                    !StringUtils.equalsIgnoreCase(FILE_HEADER_INFO_USE, fileHeaderInfo));
    String query = null;
    try {
        S3SelectQueryBuilder queryBuilder = new S3SelectQueryBuilder(context, usePositionToIdentifyColumn);
        query = queryBuilder.buildSelectQuery();
    } catch (SQLException e) {
        LOG.error("Unable to build select query for filter string {}", context.getFilterString());
    }

    LOG.trace("Select query: {}", query);

    SelectObjectContentRequest request = new SelectObjectContentRequest();
    request.setBucketName(name.getHost());
    request.setKey(StringUtils.removeStart(name.getPath(), "/"));
    request.setExpression(query);
    request.setExpressionType(ExpressionType.SQL);

    LOG.debug("With bucket name '{}'", request.getBucketName());
    LOG.debug("With key '{}'", request.getKey());
    LOG.debug("With expression query '{}'", query);

    request.setInputSerialization(inputSerialization);

    OutputSerialization outputSerialization = getOutputSerialization(context);
    request.setOutputSerialization(outputSerialization);

    return request;
}
 
Example #8
Source File: S3SelectAccessorTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void testCorrectlyParsesDataSource() {
    RequestContext context = getDefaultRequestContext();
    context.setConfig("default");
    context.setUser("test-user");
    context.setDataSource("s3a://my-bucket/my/s3/path/");

    S3SelectAccessor accessor = new S3SelectAccessor();
    accessor.initialize(context);
    SelectObjectContentRequest request = accessor.generateBaseCSVRequest(context);
    assertEquals("my-bucket", request.getBucketName());
    assertEquals("my/s3/path/", request.getKey());
}
 
Example #9
Source File: S3SelectAccessorTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void testCorrectlyParsesDataSourceWithNoKey() {
    RequestContext context = getDefaultRequestContext();
    context.setConfig("default");
    context.setUser("test-user");
    context.setDataSource("s3a://my-bucket");

    S3SelectAccessor accessor = new S3SelectAccessor();
    accessor.initialize(context);
    SelectObjectContentRequest request = accessor.generateBaseCSVRequest(context);
    assertEquals("my-bucket", request.getBucketName());
    assertEquals("", request.getKey());
}
 
Example #10
Source File: S3SelectLineRecordReader.java    From presto with Apache License 2.0 votes vote down vote up
public abstract SelectObjectContentRequest buildSelectObjectRequest(Properties schema, String query, Path path);