Python boto.connect_s3() Examples
The following are 30
code examples of boto.connect_s3().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
boto
, or try the search function
.
Example #1
Source File: test_s3StorageDriver.py From fileflow with Apache License 2.0 | 6 votes |
def setUp(self): """ Set up a mock S3 connection, bucket, and key, using moto. """ self.bucket_name = 's3storagesdrivertest' conn = boto.connect_s3() # We need to create the bucket since this is all in Moto's 'virtual' AWS account conn.create_bucket(self.bucket_name) self.bucket = conn.get_bucket(self.bucket_name) key = self.bucket.new_key('the_dag/the_task/1983-09-05') data = 'this is a test.' key.set_metadata('Content-Type', 'text/plain') key.set_contents_from_string(data) key.set_acl('private') self.driver = S3StorageDriver('', '', self.bucket_name)
Example #2
Source File: test_deeds.py From cccatalog with MIT License | 6 votes |
def get_records(id_, iterator): conn = boto.connect_s3(anon=True, host='s3.amazonaws.com') bucket = conn.get_bucket('commoncrawl') for uri in iterator: key_ = Key(bucket, uri) _file = warc.WARCFile(fileobj=GzipStreamFile(key_)) for record in _file: if record['Content-Type'] == 'application/json': record = json.loads(record.payload.read()) try: def cc_filter(x): return "creativecommons.org" in x['url'] cc_links = filter(cc_filter, list(record['Envelope']['Payload-Metadata']['HTTP-Response-Metadata']['HTML-Metadata']['Links'])) if len(cc_links) > 0: yield record except KeyError: pass
Example #3
Source File: storage_client.py From Hunch with Apache License 2.0 | 6 votes |
def __get_from_s3(self, key): try: conn = boto.connect_s3(aws_access_key_id=self.__storage_client_config["access_key"], aws_secret_access_key=self.__storage_client_config["secret_key"], host=self.__storage_client_config["endpoint"], port=80, is_secure=False, calling_format=boto.s3.connection.OrdinaryCallingFormat()) bucket = conn.get_bucket(self.__storage_client_config["bucket"]) store_key = bucket.get_key(key) result = store_key.get_contents_as_string() conn.close() return result except AWSConnectionError as e: raise AWSConnectionError("Unable to connect to AWS") except Exception as e: raise Exception("Exception occured" + str(e))
Example #4
Source File: archive_mysql_binlogs.py From mysql_utils with GNU General Public License v2.0 | 6 votes |
def already_uploaded(instance, binlog, logged_uploads): """ Check to see if a binlog has already been uploaded Args: instance - a hostAddr object binlog - the full path to the binlog file logged_uploads - a set of all uploaded binlogs for this instance Returns True if already uploaded, False otherwise. """ if os.path.basename(binlog) in logged_uploads: log.debug('Binlog already logged as uploaded') return True # we should hit this code rarely, only when uploads have not been logged boto_conn = boto.connect_s3() bucket = boto_conn.get_bucket(environment_specific.BACKUP_BUCKET_UPLOAD_MAP[host_utils.get_iam_role()], validate=False) if bucket.get_key(s3_binlog_path(instance, os.path.basename((binlog)))): log.debug("Binlog already uploaded but not logged {b}".format(b=binlog)) log_binlog_upload(instance, binlog) return True return False
Example #5
Source File: mysql_backup_csv.py From mysql_utils with GNU General Public License v2.0 | 6 votes |
def already_backed_up(self, table_tuple): """ Check to see if a particular partition has already been uploaded to s3 Args: table_tuple - (table, partition name, part number) Returns: bool - True if the partition has already been backed up, False otherwise """ boto_conn = boto.connect_s3() bucket = boto_conn.get_bucket(self.upload_bucket, validate=False) (_, data_path, _) = backup.get_csv_backup_paths(self.instance, *table_tuple[0].split('.'), date=self.datestamp, partition_number=table_tuple[2]) if not bucket.get_key(data_path): return False return True
Example #6
Source File: mysql_backup_csv.py From mysql_utils with GNU General Public License v2.0 | 6 votes |
def upload_schema(self, db, table, tmp_dir_db): """ Upload the schema of a table to s3 Args: db - the db to be backed up table - the table to be backed up tmp_dir_db - temporary storage used for all tables in the db """ (schema_path, _, _) = backup.get_csv_backup_paths( self.instance, db, table, self.datestamp) create_stm = mysql_lib.show_create_table(self.instance, db, table) log.debug('{proc_id}: Uploading schema to {schema_path}' ''.format(schema_path=schema_path, proc_id=multiprocessing.current_process().name)) boto_conn = boto.connect_s3() bucket = boto_conn.get_bucket(self.upload_bucket, validate=False) key = bucket.new_key(schema_path) key.set_contents_from_string(create_stm)
Example #7
Source File: mysql_backup_csv.py From mysql_utils with GNU General Public License v2.0 | 6 votes |
def upload_pitr_data(self, db, tbl, pitr_data): """ Upload a file of PITR data to s3 for each table Args: db - the db that was backed up. tbl - the table that was backed up. pitr_data - a dict of various data that might be helpful for running a PITR """ zk = host_utils.MysqlZookeeper() replica_set = zk.get_replica_set_from_instance(self.instance) s3_path = PATH_PITR_DATA.format(replica_set=replica_set, date=self.datestamp, db_name=db, table=tbl) log.debug('{proc_id}: {db}.{tbl} Uploading pitr data to {s3_path}' ''.format(s3_path=s3_path, proc_id=multiprocessing.current_process().name, db=db, tbl=tbl)) boto_conn = boto.connect_s3() bucket = boto_conn.get_bucket(self.upload_bucket, validate=False) key = bucket.new_key(s3_path) key.set_contents_from_string(json.dumps(pitr_data))
Example #8
Source File: rnaseq_unc_tcga_versions.py From toil-scripts with Apache License 2.0 | 6 votes |
def upload_output_to_s3(job, job_vars): """ If s3_dir is specified in arguments, file will be uploaded to S3 using boto. WARNING: ~/.boto credentials are necessary for this to succeed! job_vars: tuple Tuple of dictionaries: input_args and ids """ import boto from boto.s3.key import Key input_args, ids = job_vars work_dir = job.fileStore.getLocalTempDir() uuid = input_args['uuid'] # Parse s3_dir s3_dir = input_args['s3_dir'] bucket_name = s3_dir.split('/')[0] bucket_dir = '/'.join(s3_dir.split('/')[1:]) # I/O uuid_tar = return_input_paths(job, work_dir, ids, 'uuid.tar.gz') # Upload to S3 via boto conn = boto.connect_s3() bucket = conn.get_bucket(bucket_name) k = Key(bucket) k.key = os.path.join(bucket_dir, uuid + '.tar.gz') k.set_contents_from_filename(uuid_tar)
Example #9
Source File: downloader_aws.py From dmipy with MIT License | 6 votes |
def __init__(self, your_aws_public_key, your_aws_secret_key): self.public_key = your_aws_public_key self.secret_key = your_aws_secret_key s3 = boto.connect_s3( aws_access_key_id=self.public_key, aws_secret_access_key=self.secret_key, ) for key in s3.get_all_buckets(): if key.name == 'hcp-openaccess': self.s3_bucket = key break self.hcp_directory = os.path.join(DATA_PATH, 'hcp') if not os.path.exists(self.hcp_directory): os.makedirs(self.hcp_directory)
Example #10
Source File: test_aws.py From flocker with Apache License 2.0 | 6 votes |
def test_upload_content_type(self): """ A content type can be set for an uploaded file. """ filename = random_name(self) tmpdir = FilePath(self.mktemp()) tmpdir.makedirs() tmpfile = tmpdir.child(filename) tmpfile.setContent('foo') s3 = boto.connect_s3() bucket = s3.get_bucket(bucket_name) self.addCleanup(bucket.delete_key, filename) sync_perform( dispatcher=ComposedDispatcher([boto_dispatcher, base_dispatcher]), effect=Effect(UploadToS3( source_path=tmpdir, target_bucket=bucket_name, target_key=filename, file=tmpfile, content_type='application/json', )) ) key = bucket.get_key(filename) self.assertEqual('application/json', key.content_type)
Example #11
Source File: aws.py From flocker with Apache License 2.0 | 6 votes |
def perform_copy_s3_keys(dispatcher, intent): """ See :class:`CopyS3Keys`. """ s3 = boto.connect_s3() source_bucket = s3.get_bucket(intent.source_bucket) for key in intent.keys: source_key = source_bucket.get_key(intent.source_prefix + key) # We are explicit about Content-Type here, since the upload tool # isn't smart enough to set the right Content-Type. destination_metadata = source_key.metadata for extention, content_type in EXTENSION_MIME_TYPES.items(): if key.endswith(extention): destination_metadata['Content-Type'] = content_type break source_key.copy( dst_bucket=intent.destination_bucket, dst_key=intent.destination_prefix + key, metadata=destination_metadata, )
Example #12
Source File: PyObjectWalker_test.py From ufora with Apache License 2.0 | 6 votes |
def test_PyObjectWalker_boto_connection_2(self): import boto bucketName = 'ufora-test-data' conn = boto.connect_s3() bucket = conn.get_bucket(bucketName) key = bucket.get_key("trip_data_1.csv") res = key.md5 walker = PyObjectWalker( self.mappings, BinaryObjectRegistry() ) walker.walkPyObject(boto) walker.walkPyObject(conn) walker.walkPyObject(bucket) walker.walkPyObject(key) walker.walkPyObject(res)
Example #13
Source File: ActualS3Interface.py From ufora with Apache License 2.0 | 6 votes |
def connectS3(self): if not boto.config.has_section('Boto'): boto.config.add_section('Boto') # override the default super-long timeout in boto. # boto automatically retries timed out requests so it's best to keep a # short timeout because S3 can sometimes (about 1 in 10 requests) stall # for a long time. boto.config.set('Boto', 'http_socket_timeout', '5') boto.config.set('Boto', 'metadata_service_num_attempts', '10') az = os.getenv('AWS_AVAILABILITY_ZONE') boto_args = {} if self.credentials_ != ('', ''): boto_args = { 'aws_access_key_id': self.credentials_[0], 'aws_secret_access_key': self.credentials_[1] } if az: return boto.s3.connect_to_region(az[:-1], **boto_args) else: return boto.connect_s3(**boto_args)
Example #14
Source File: zipnumclusterjob.py From webarchive-indexing with MIT License | 6 votes |
def _do_upload(self): self.gzip_temp.flush() #TODO: move to generalized put() function if self.output_dir.startswith('s3://') or self.output_dir.startswith('s3a://'): import boto conn = boto.connect_s3() parts = urlparse.urlsplit(self.output_dir) bucket = conn.lookup(parts.netloc) cdxkey = bucket.new_key(parts.path + '/' + self.part_name) cdxkey.set_contents_from_file(self.gzip_temp, rewind=True) else: path = os.path.join(self.output_dir, self.part_name) self.gzip_temp.seek(0) with open(path, 'w+b') as target: shutil.copyfileobj(self.gzip_temp, target) self.gzip_temp.close()
Example #15
Source File: indexwarcsjob.py From webarchive-indexing with MIT License | 6 votes |
def mapper_init(self): # Note: this assumes that credentials are set via # AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY env variables self.conn = boto.connect_s3() self.warc_bucket = self.conn.lookup(self.options.warc_bucket) assert(self.warc_bucket) self.cdx_bucket = self.conn.lookup(self.options.cdx_bucket) assert(self.cdx_bucket) self.index_options = { 'surt_ordered': True, 'sort': True, 'cdxj': True, #'minimal': True }
Example #16
Source File: test.py From learn_python3_spider with MIT License | 6 votes |
def get_s3_content_and_delete(bucket, path, with_key=False): """ Get content from s3 key, and delete key afterwards. """ if is_botocore(): import botocore.session session = botocore.session.get_session() client = session.create_client('s3') key = client.get_object(Bucket=bucket, Key=path) content = key['Body'].read() client.delete_object(Bucket=bucket, Key=path) else: import boto # assuming boto=2.2.2 bucket = boto.connect_s3().get_bucket(bucket, validate=False) key = bucket.get_key(path) content = key.get_contents_as_string() bucket.delete_key(path) return (content, key) if with_key else content
Example #17
Source File: test.py From learn_python3_spider with MIT License | 6 votes |
def get_s3_content_and_delete(bucket, path, with_key=False): """ Get content from s3 key, and delete key afterwards. """ if is_botocore(): import botocore.session session = botocore.session.get_session() client = session.create_client('s3') key = client.get_object(Bucket=bucket, Key=path) content = key['Body'].read() client.delete_object(Bucket=bucket, Key=path) else: import boto # assuming boto=2.2.2 bucket = boto.connect_s3().get_bucket(bucket, validate=False) key = bucket.get_key(path) content = key.get_contents_as_string() bucket.delete_key(path) return (content, key) if with_key else content
Example #18
Source File: test_s3.py From drf-to-s3 with MIT License | 6 votes |
def setUp(self): import boto from boto.exception import NoAuthHandlerFound from boto.s3.key import Key keys = ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'] try: for k in keys: os.environ[k] self.bucket_name = os.environ.get('AWS_TEST_BUCKET', 'drf-to-s3-test') except KeyError: self.skipTest('To test s3, set %s in .env' % ' and '.join(keys)) conn = boto.connect_s3() bucket = conn.get_bucket(self.bucket_name) k = Key(bucket) k.key = "%s%s.txt" % (str(uuid.uuid4()), self.prefix) k.set_contents_from_string('This is a test of S3') self.existing_key = k.key self.existing_key_etag = k.etag self.bucket = bucket self.nonexisting_key = "%s%s.txt" % (str(uuid.uuid4()), self.prefix) self.new_key = None
Example #19
Source File: test_exportactionlogsworker.py From quay with Apache License 2.0 | 6 votes |
def storage_engine(request): if request.param == "test": yield test_storage else: with mock_s3(): # Create a test bucket and put some test content. boto.connect_s3().create_bucket(_TEST_BUCKET) engine = DistributedStorage( { "foo": S3Storage( _TEST_CONTEXT, "some/path", _TEST_BUCKET, _TEST_USER, _TEST_PASSWORD ) }, ["foo"], ) yield engine
Example #20
Source File: test_cloudfront.py From quay with Apache License 2.0 | 6 votes |
def test_direct_download_no_ip(test_aws_ip, aws_ip_range_data, ipranges_populated, app): ipresolver = IPResolver(app) context = StorageContext("nyc", None, config_provider, ipresolver) # Create a test bucket and put some test content. boto.connect_s3().create_bucket(_TEST_BUCKET) engine = CloudFrontedS3Storage( context, "cloudfrontdomain", "keyid", "test/data/test.pem", "some/path", _TEST_BUCKET, _TEST_USER, _TEST_PASSWORD, ) engine.put_content(_TEST_PATH, _TEST_CONTENT) assert engine.exists(_TEST_PATH) assert "s3.amazonaws.com" in engine.get_direct_download_url(_TEST_PATH)
Example #21
Source File: copybot.py From aws-extender with MIT License | 6 votes |
def __init__(self): super(CopyBot, self).__init__() self.wdir = boto.config.get('Pyami', 'working_dir') self.log_file = '%s.log' % self.instance_id self.log_path = os.path.join(self.wdir, self.log_file) boto.set_file_logger(self.name, self.log_path) self.src_name = boto.config.get(self.name, 'src_bucket') self.dst_name = boto.config.get(self.name, 'dst_bucket') self.replace = boto.config.getbool(self.name, 'replace_dst', True) s3 = boto.connect_s3() self.src = s3.lookup(self.src_name) if not self.src: boto.log.error('Source bucket does not exist: %s' % self.src_name) dest_access_key = boto.config.get(self.name, 'dest_aws_access_key_id', None) if dest_access_key: dest_secret_key = boto.config.get(self.name, 'dest_aws_secret_access_key', None) s3 = boto.connect(dest_access_key, dest_secret_key) self.dst = s3.lookup(self.dst_name) if not self.dst: self.dst = s3.create_bucket(self.dst_name)
Example #22
Source File: bigmessage.py From aws-extender with MIT License | 6 votes |
def encode(self, value): """ :type value: file-like object :param value: A file-like object containing the content of the message. The actual content will be stored in S3 and a link to the S3 object will be stored in the message body. """ bucket_name, key_name = self._get_bucket_key(self.s3_url) if bucket_name and key_name: return self.s3_url key_name = uuid.uuid4() s3_conn = boto.connect_s3() s3_bucket = s3_conn.get_bucket(bucket_name) key = s3_bucket.new_key(key_name) key.set_contents_from_file(value) self.s3_url = 's3://%s/%s' % (bucket_name, key_name) return self.s3_url
Example #23
Source File: test_bypass_azure_s3.py From exporters with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_bypass(self): # given: opts = create_s3_azure_file_bypass_simple_opts() # when: with moto.mock_s3(), mock.patch('azure.storage.file.FileService') as azure: s3_conn = boto.connect_s3() bucket = s3_conn.create_bucket(opts['reader']['options']['bucket']) keys = ['some_prefix/{}'.format(k) for k in ['some', 'keys', 'here']] create_s3_keys(bucket, keys) exporter = BasicExporter(opts) exporter.export() # then: self.assertEquals(exporter.writer.get_metadata('items_count'), 0, "No items should be read") self.assertEquals(exporter.reader.get_metadata('read_items'), 0, "No items should get written") azure_puts = [ call for call in azure.mock_calls if call[0] == '().copy_file' ] self.assertEquals(len(azure_puts), len(keys), "all keys should be put into Azure files")
Example #24
Source File: test_bypass_azure_s3.py From exporters with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_bypass(self): # given: opts = create_s3_azure_blob_bypass_simple_opts() # when: with moto.mock_s3(), mock.patch('azure.storage.blob.BlockBlobService') as azure: s3_conn = boto.connect_s3() bucket = s3_conn.create_bucket(opts['reader']['options']['bucket']) keys = ['some_prefix/{}'.format(k) for k in ['this', 'test', 'has', 'keys']] create_s3_keys(bucket, keys) exporter = BasicExporter(opts) exporter.export() # then: self.assertEquals(exporter.writer.get_metadata('items_count'), 0, "No items should be read") self.assertEquals(exporter.reader.get_metadata('read_items'), 0, "No items should get written") azure_puts = [ call for call in azure.mock_calls if call[0] == '().copy_blob' ] self.assertEquals(len(azure_puts), len(keys), "all keys should be put into Azure blobs")
Example #25
Source File: test_readers_s3.py From exporters with BSD 3-Clause "New" or "Revised" License | 6 votes |
def setUp(self): self.mock_s3 = moto.mock_s3() self.mock_s3.start() self.s3_conn = boto.connect_s3() self.s3_conn.create_bucket('last_bucket') bucket = self.s3_conn.get_bucket('last_bucket') key = bucket.new_key('test_list/LAST') self.pointers = ['pointer1', 'pointer2', 'pointer3', ''] key.set_contents_from_string('\r\n'.join(self.pointers)) key.close() for key_name in POINTER_KEYS: key = bucket.new_key(key_name) out = StringIO.StringIO() with gzip.GzipFile(fileobj=out, mode='w') as f: f.write(json.dumps({'name': key_name})) key.set_contents_from_string(out.getvalue()) key.close() self.options_prefix_pointer = { 'bucket': 'last_bucket', 'aws_access_key_id': 'KEY', 'aws_secret_access_key': 'SECRET', 'prefix_pointer': 'test_list/LAST' }
Example #26
Source File: test_writers_s3.py From exporters with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_write_pointer(self): # given: conn = boto.connect_s3() conn.create_bucket('pointer_fake_bucket') options = self.get_writer_config() options['options']['save_pointer'] = 'pointer/LAST' options['options']['bucket'] = 'pointer_fake_bucket' items_to_write = self.get_batch() # when: try: writer = S3Writer(options, meta()) writer.write_batch(items_to_write) writer.flush() finally: writer.close() # then: bucket = self.s3_conn.get_bucket('pointer_fake_bucket') saved_keys = [k for k in bucket.list('pointer/')] self.assertEquals(1, len(saved_keys)) key = saved_keys[0] self.assertEqual('tests/', key.get_contents_as_string())
Example #27
Source File: test_bypass_s3.py From exporters with BSD 3-Clause "New" or "Revised" License | 6 votes |
def setUp(self): self.mock_s3 = moto.mock_s3() self.mock_s3.start() self.s3_conn = boto.connect_s3() self.s3_conn.create_bucket('source_bucket') self.source_bucket = self.s3_conn.get_bucket('source_bucket') self.data = [ {'name': 'Roberto', 'birthday': '12/05/1987'}, {'name': 'Claudia', 'birthday': '21/12/1985'}, ] with closing(self.source_bucket.new_key('some_prefix/test_key')) as key: with TmpFile() as tmp_filename: with open(tmp_filename, 'w') as f: f.write(json.dumps(self.data)) with open(tmp_filename) as f: self.key_md5 = compute_md5(f) key.metadata = {'total': 2, 'md5': self.key_md5} key.set_contents_from_string(json.dumps(self.data)) self.tmp_bypass_resume_file = 'tests/data/tmp_s3_bypass_resume_persistence.pickle' shutil.copyfile('tests/data/s3_bypass_resume_persistence.pickle', self.tmp_bypass_resume_file)
Example #28
Source File: s3_storage_driver.py From fileflow with Apache License 2.0 | 6 votes |
def __init__(self, access_key_id, secret_access_key, bucket_name): """ Set up the credentials and bucket name. :param str access_key_id: AWS credentials. :param str secret_access_key: AWS credentials. :param str bucket_name: The S3 bucket to use. """ super(S3StorageDriver, self).__init__() self.bucket_name = bucket_name self.s3 = boto.connect_s3( aws_access_key_id=access_key_id, aws_secret_access_key=secret_access_key ) self.bucket = self.s3.get_bucket(self.bucket_name)
Example #29
Source File: rnaseq_unc_pipeline.py From toil-scripts with Apache License 2.0 | 6 votes |
def upload_output_to_s3(job, job_vars): """ If s3_dir is specified in arguments, file will be uploaded to S3 using boto. WARNING: ~/.boto credentials are necessary for this to succeed! job_vars: tuple Tuple of dictionaries: input_args and ids """ import boto from boto.s3.key import Key input_args, ids = job_vars work_dir = job.fileStore.getLocalTempDir() uuid = input_args['uuid'] # Parse s3_dir s3_dir = input_args['s3_dir'] bucket_name = s3_dir.split('/')[0] bucket_dir = '/'.join(s3_dir.split('/')[1:]) # I/O uuid_tar = return_input_paths(job, work_dir, ids, 'uuid.tar.gz') # Upload to S3 via boto conn = boto.connect_s3() bucket = conn.get_bucket(bucket_name) k = Key(bucket) k.key = os.path.join(bucket_dir, uuid + '.tar.gz') k.set_contents_from_filename(uuid_tar)
Example #30
Source File: test_cloud_storage.py From quay with Apache License 2.0 | 5 votes |
def storage_engine(): with mock_s3(): # Create a test bucket and put some test content. boto.connect_s3().create_bucket(_TEST_BUCKET) engine = S3Storage(_TEST_CONTEXT, "some/path", _TEST_BUCKET, _TEST_USER, _TEST_PASSWORD) engine.put_content(_TEST_PATH, _TEST_CONTENT) yield engine