Zipping s3

`ZippingS3`

Source code in s3_compress/zipping_s3.py

class ZippingS3:

    __s3_client__ = boto3.client('s3')

    __s3_resource__ = boto3.resource('s3')

    console = Console()

    def credentials(
        self,
        ACCESS_KEY: str = None,
        SECRET_KEY: str = None,
        SESSION_TOKEN: str = None,
        url: str = None,
    ) -> None:

        """
        Parameters:
            ACCESS_KEY: AWS_ACCESS_KEY_ID - The access key for your AWS account.
            SECRET_KEY: AWS_SECRET_ACCESS_KEY - The secret key for your AWS account.
            SESSION_TOKEN: AWS_SESSION_TOKEN - The session key for your AWS account. This is only needed when you are using temporary credentials. The AWS_SECURITY_TOKEN environment variable can also be used, but is only supported for backwards compatibility purposes. AWS_SESSION_TOKEN is supported by multiple AWS SDKs besides python.
        Returns:
            Return None, all variable will be sets as global variable.
        Examples:
            >>> credentials('ACCESS_KEY', 'SECRET_KEY', 'SESSION_TOKEN')
        """

        self.__s3_client__ = boto3.client(
            's3',
            aws_access_key_id=ACCESS_KEY,
            aws_secret_access_key=SECRET_KEY,
            aws_session_token=SESSION_TOKEN,
            endpoint_url=url,
        )

        self.__s3_resource__ = boto3.resource(
            's3',
            aws_access_key_id=ACCESS_KEY,
            aws_secret_access_key=SECRET_KEY,
            aws_session_token=SESSION_TOKEN,
            endpoint_url=url,
        )

    def s3_download_in_memory(
        self, bucket_name: str, prefix: str
    ) -> list[tuple[str, io.BytesIO()]]:

        """
        Parameters:
            bucket_name: The name of the bucket.
            prefix: The prefix is used to find the path/file matches.
        Returns:
            A method that returns a list of tuples, where each tuple contains a `string` (name file) and an `io.BytesIO()` object (file binary), is returning information about binary files at runtime, without the need to create temporary physical files. This allows for manipulation of binary data without taking up disk space.
        Examples:
            >>> s3_download_in_memory('bucket_name', 'prefix')
            [
                ('1.jpeg', <_io.BytesIO object at 0x7fb7ec9825c0>),
                ('2.jpeg', <_io.BytesIO object at 0x7fb7ef08d9e0>),
                ('3.jpeg', <_io.BytesIO object at 0x7fb7ec9bff60>),
                ('4.jpeg', <_io.BytesIO object at 0x7fb7ed38fec0>),
                ('5.jpeg', <_io.BytesIO object at 0x7fb7ec983790>),
            ]
        """

        self.console.print('[green]\nStart Download :rocket:')
        with Progress() as progress:
            files = list()
            bucket = self.__s3_resource__.Bucket(bucket_name)
            objects = bucket.objects.filter(Prefix=prefix)
            task = progress.add_task(
                '[green]Downloading...', total=len(list(objects))
            )

            for obj in objects:

                name = str(
                    (obj.key).replace(
                        str('/'.join(prefix.split('/')[:-1])), ''
                    )
                )
                if name.startswith('/'):
                    name = name[1:]

                byte_io = io.BytesIO()
                self.__s3_resource__.Object(
                    bucket_name, obj.key
                ).download_fileobj(byte_io)
                tupla_file = (
                    name,
                    io.BytesIO(byte_io.getvalue()),
                )
                files.append(tupla_file)
                progress.update(task, advance=1)
        self.console.print('[green]Finish Download :ok_hand:\n')
        return files

    def zipping_in_s3(
        self,
        bucket_name: str,
        prefix: str,
        zip_name: str,
        files: list = None,
        extra_args: dict = None,
    ) -> None:

        """
        Parameters:
            bucket_name: The name of the bucket .
            prefix: The prefix is used to find the path/file matches.
            zip_name: zip_name is the name given to the compressed file generated from the compression of one or more files in zip format.
            files: It is a list of tuples, where each tuple contains a string and an io.BytesIO() object. When this parameter is used, the s3_download_in_memory() method is not executed, which means that the file is not downloaded from AWS S3. This way, it is possible to send a ZIP file directly from the local machine to S3 without the need to download the file from the cloud.
            extra_args: The extra_args parameter is an optional parameter used in the Boto3 library to send additional arguments for the upload or download operation of files in AWS S3. It allows specifying additional options such as metadata or storage settings that can be passed to the S3 service during the file transfer.
        Returns:
            Return None
        Examples:
            >>> zipping_in_s3('bucket_name', 'prefix', 'zip_name')
        """
        if not files:
            files = self.s3_download_in_memory(bucket_name, prefix)
            if len(files) == 0:
                raise FileNotFoundError(
                    'File or directory is requested but doesn’t exist'
                )
        try:
            for f in files:
                if not (
                    type(f) is tuple
                    and type(f[0]) is str
                    and type(f[1]) is io.BytesIO
                ):
                    raise TypeError(
                        'Object has inappropriate type, accepted format (list[tuple[str, io.BytesIO()]])'
                    )
        except:
            raise TypeError(
                'Object has inappropriate type, accepted format (list[tuple[str, io.BytesIO()]])'
            )

        if '/' in prefix:
            prefix = '/'.join(prefix.split('/')[:-1]) + '/'
        else:
            prefix = ''

        self.console.print('[green]Start zip :package:')
        with self.console.status('Initial status ') as status:

            status.update(
                '[green]zipping...',
                spinner='bouncingBall',
                spinner_style='green',
            )
            zip_buffer = io.BytesIO()
            with zipfile.ZipFile(
                zip_buffer, 'a', zipfile.ZIP_DEFLATED, False
            ) as zip_file:
                for file_name, data in files:
                    zip_file.writestr(file_name, data.getvalue())
            zip_buffer.seek(0)

            status.update(
                '[green]uploading...',
                spinner='bouncingBall',
                spinner_style='green',
            )
            self.__s3_client__.upload_fileobj(
                zip_buffer,
                bucket_name,
                str(prefix + zip_name + '.zip'),
                extra_args,
            )
        self.console.print('[green]Finish zip :ok_hand:\n')

        self.console.print('[green]All Rigth :tada::tada::tada:')

`credentials(ACCESS_KEY=None, SECRET_KEY=None, SESSION_TOKEN=None, url=None)`

Parameters:

Name	Type	Description	Default
`ACCESS_KEY`	`str`	AWS_ACCESS_KEY_ID - The access key for your AWS account.	`None`
`SECRET_KEY`	`str`	AWS_SECRET_ACCESS_KEY - The secret key for your AWS account.	`None`
`SESSION_TOKEN`	`str`	AWS_SESSION_TOKEN - The session key for your AWS account. This is only needed when you are using temporary credentials. The AWS_SECURITY_TOKEN environment variable can also be used, but is only supported for backwards compatibility purposes. AWS_SESSION_TOKEN is supported by multiple AWS SDKs besides python.	`None`

Returns:

Type	Description
`None`	Return None, all variable will be sets as global variable.

Examples:

>>> credentials('ACCESS_KEY', 'SECRET_KEY', 'SESSION_TOKEN')

Source code in s3_compress/zipping_s3.py

def credentials(
    self,
    ACCESS_KEY: str = None,
    SECRET_KEY: str = None,
    SESSION_TOKEN: str = None,
    url: str = None,
) -> None:

    """
    Parameters:
        ACCESS_KEY: AWS_ACCESS_KEY_ID - The access key for your AWS account.
        SECRET_KEY: AWS_SECRET_ACCESS_KEY - The secret key for your AWS account.
        SESSION_TOKEN: AWS_SESSION_TOKEN - The session key for your AWS account. This is only needed when you are using temporary credentials. The AWS_SECURITY_TOKEN environment variable can also be used, but is only supported for backwards compatibility purposes. AWS_SESSION_TOKEN is supported by multiple AWS SDKs besides python.
    Returns:
        Return None, all variable will be sets as global variable.
    Examples:
        >>> credentials('ACCESS_KEY', 'SECRET_KEY', 'SESSION_TOKEN')
    """

    self.__s3_client__ = boto3.client(
        's3',
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
        aws_session_token=SESSION_TOKEN,
        endpoint_url=url,
    )

    self.__s3_resource__ = boto3.resource(
        's3',
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
        aws_session_token=SESSION_TOKEN,
        endpoint_url=url,
    )

`s3_download_in_memory(bucket_name, prefix)`

Parameters:

Name	Type	Description	Default
`bucket_name`	`str`	The name of the bucket.	required
`prefix`	`str`	The prefix is used to find the path/file matches.	required

Returns:

Type	Description
`list[tuple[str, io.BytesIO()]]`	A method that returns a list of tuples, where each tuple contains a `string` (name file) and an `io.BytesIO()` object (file binary), is returning information about binary files at runtime, without the need to create temporary physical files. This allows for manipulation of binary data without taking up disk space.

Examples:

>>> s3_download_in_memory('bucket_name', 'prefix')
[
    ('1.jpeg', <_io.BytesIO object at 0x7fb7ec9825c0>),
    ('2.jpeg', <_io.BytesIO object at 0x7fb7ef08d9e0>),
    ('3.jpeg', <_io.BytesIO object at 0x7fb7ec9bff60>),
    ('4.jpeg', <_io.BytesIO object at 0x7fb7ed38fec0>),
    ('5.jpeg', <_io.BytesIO object at 0x7fb7ec983790>),
]

Source code in s3_compress/zipping_s3.py

def s3_download_in_memory(
    self, bucket_name: str, prefix: str
) -> list[tuple[str, io.BytesIO()]]:

    """
    Parameters:
        bucket_name: The name of the bucket.
        prefix: The prefix is used to find the path/file matches.
    Returns:
        A method that returns a list of tuples, where each tuple contains a `string` (name file) and an `io.BytesIO()` object (file binary), is returning information about binary files at runtime, without the need to create temporary physical files. This allows for manipulation of binary data without taking up disk space.
    Examples:
        >>> s3_download_in_memory('bucket_name', 'prefix')
        [
            ('1.jpeg', <_io.BytesIO object at 0x7fb7ec9825c0>),
            ('2.jpeg', <_io.BytesIO object at 0x7fb7ef08d9e0>),
            ('3.jpeg', <_io.BytesIO object at 0x7fb7ec9bff60>),
            ('4.jpeg', <_io.BytesIO object at 0x7fb7ed38fec0>),
            ('5.jpeg', <_io.BytesIO object at 0x7fb7ec983790>),
        ]
    """

    self.console.print('[green]\nStart Download :rocket:')
    with Progress() as progress:
        files = list()
        bucket = self.__s3_resource__.Bucket(bucket_name)
        objects = bucket.objects.filter(Prefix=prefix)
        task = progress.add_task(
            '[green]Downloading...', total=len(list(objects))
        )

        for obj in objects:

            name = str(
                (obj.key).replace(
                    str('/'.join(prefix.split('/')[:-1])), ''
                )
            )
            if name.startswith('/'):
                name = name[1:]

            byte_io = io.BytesIO()
            self.__s3_resource__.Object(
                bucket_name, obj.key
            ).download_fileobj(byte_io)
            tupla_file = (
                name,
                io.BytesIO(byte_io.getvalue()),
            )
            files.append(tupla_file)
            progress.update(task, advance=1)
    self.console.print('[green]Finish Download :ok_hand:\n')
    return files

`zipping_in_s3(bucket_name, prefix, zip_name, files=None, extra_args=None)`

Parameters:

Name	Type	Description	Default
`bucket_name`	`str`	The name of the bucket .	required
`prefix`	`str`	The prefix is used to find the path/file matches.	required
`zip_name`	`str`	zip_name is the name given to the compressed file generated from the compression of one or more files in zip format.	required
`files`	`list`	It is a list of tuples, where each tuple contains a string and an io.BytesIO() object. When this parameter is used, the s3_download_in_memory() method is not executed, which means that the file is not downloaded from AWS S3. This way, it is possible to send a ZIP file directly from the local machine to S3 without the need to download the file from the cloud.	`None`
`extra_args`	`dict`	The extra_args parameter is an optional parameter used in the Boto3 library to send additional arguments for the upload or download operation of files in AWS S3. It allows specifying additional options such as metadata or storage settings that can be passed to the S3 service during the file transfer.	`None`

Returns:

Type	Description
`None`	Return None

Examples:

>>> zipping_in_s3('bucket_name', 'prefix', 'zip_name')

Source code in s3_compress/zipping_s3.py

def zipping_in_s3(
    self,
    bucket_name: str,
    prefix: str,
    zip_name: str,
    files: list = None,
    extra_args: dict = None,
) -> None:

    """
    Parameters:
        bucket_name: The name of the bucket .
        prefix: The prefix is used to find the path/file matches.
        zip_name: zip_name is the name given to the compressed file generated from the compression of one or more files in zip format.
        files: It is a list of tuples, where each tuple contains a string and an io.BytesIO() object. When this parameter is used, the s3_download_in_memory() method is not executed, which means that the file is not downloaded from AWS S3. This way, it is possible to send a ZIP file directly from the local machine to S3 without the need to download the file from the cloud.
        extra_args: The extra_args parameter is an optional parameter used in the Boto3 library to send additional arguments for the upload or download operation of files in AWS S3. It allows specifying additional options such as metadata or storage settings that can be passed to the S3 service during the file transfer.
    Returns:
        Return None
    Examples:
        >>> zipping_in_s3('bucket_name', 'prefix', 'zip_name')
    """
    if not files:
        files = self.s3_download_in_memory(bucket_name, prefix)
        if len(files) == 0:
            raise FileNotFoundError(
                'File or directory is requested but doesn’t exist'
            )
    try:
        for f in files:
            if not (
                type(f) is tuple
                and type(f[0]) is str
                and type(f[1]) is io.BytesIO
            ):
                raise TypeError(
                    'Object has inappropriate type, accepted format (list[tuple[str, io.BytesIO()]])'
                )
    except:
        raise TypeError(
            'Object has inappropriate type, accepted format (list[tuple[str, io.BytesIO()]])'
        )

    if '/' in prefix:
        prefix = '/'.join(prefix.split('/')[:-1]) + '/'
    else:
        prefix = ''

    self.console.print('[green]Start zip :package:')
    with self.console.status('Initial status ') as status:

        status.update(
            '[green]zipping...',
            spinner='bouncingBall',
            spinner_style='green',
        )
        zip_buffer = io.BytesIO()
        with zipfile.ZipFile(
            zip_buffer, 'a', zipfile.ZIP_DEFLATED, False
        ) as zip_file:
            for file_name, data in files:
                zip_file.writestr(file_name, data.getvalue())
        zip_buffer.seek(0)

        status.update(
            '[green]uploading...',
            spinner='bouncingBall',
            spinner_style='green',
        )
        self.__s3_client__.upload_fileobj(
            zip_buffer,
            bucket_name,
            str(prefix + zip_name + '.zip'),
            extra_args,
        )
    self.console.print('[green]Finish zip :ok_hand:\n')

    self.console.print('[green]All Rigth :tada::tada::tada:')