Source code for bases.encoding.base

"""
    Abstract base encodings.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any, Mapping, Optional, TypeVar, Union
from typing_extensions import Final
from typing_validation import validate

from bases.alphabet import Alphabet
from bases.alphabet import make as alphabet_make
from .errors import NonAlphabeticCharError

BytesLike = Union[bytes, bytearray, memoryview]
""" Type alias for bytes-like objects. """

byteslike: Final = (bytes, bytearray, memoryview)
""" Tuple of bytes-like objects types (for use with :obj:`isinstance` checks). """

BaseEncodingSubclass = TypeVar("BaseEncodingSubclass", bound="BaseEncoding")
""" Type variable for subclasses of :class:`BaseEncoding`. """


[docs]
class BaseEncoding(ABC):
    """
        Abstract superclass for base encodings.
        Instances can always be constructed from an alphabet (with optional change of case sensitivity)
        and a number of additional options specified by subclasses.

        :param alphabet: the alphabet to use for the encoding
        :type alphabet: :obj:`str`, :obj:`range` or :class:`~bases.alphabet.abstract.Alphabet`
        :param case_sensitive: optional case sensitivity (if :obj:`None`, the one from the alphabet is used)
        :type case_sensitive: :obj:`bool` or :obj:`None`, *optional*
    """

    _alphabet: Alphabet
    _alphabet_revdir: Mapping[str, int]
    _case_sensitive: bool

    def __init__(self, alphabet: Union[str, range, Alphabet], *,
                 case_sensitive: Optional[bool] = None):
        validate(alphabet, Union[str, range, Alphabet])
        validate(case_sensitive, Optional[bool])
        if isinstance(alphabet, Alphabet):
            if case_sensitive is not None:
                alphabet = alphabet.with_case_sensitivity(case_sensitive)
            self._alphabet = alphabet
        else:
            if case_sensitive is None:
                case_sensitive = True
            self._alphabet = alphabet_make(alphabet, case_sensitive=case_sensitive)

    @property
    def alphabet(self) -> Alphabet:
        """
            The encoding alphabet.

            Example usage:

            >>> encoding.base32.alphabet
            StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                           case_sensitive=False)

        """
        return self._alphabet

    @property
    def base(self) -> int:
        """
            The base for this encoding (the length of the alphabet).

            Example usage:

            >>> encoding.base32.base
            32

        """
        return len(self.alphabet)

    @property
    def case_sensitive(self) -> bool:
        """
            Determines whether the decoder is case sensitive.

            Example usage:

            >>> encoding.base32.case_sensitive
            False

        """
        return self.alphabet.case_sensitive

    @property
    def zero_char(self) -> str:
        """
            The zero digit for this encoding (first character in the alphabet).

            Example usage:

            >>> encoding.base32.alphabet
            StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                           case_sensitive=False)
            >>> encoding.base32.zero_char
            'A'

        """
        return self.alphabet[0]


[docs]
    def with_alphabet(self: BaseEncodingSubclass, alphabet: Union[str, range, Alphabet], *,
                      case_sensitive: Optional[bool] = None) -> BaseEncodingSubclass:
        """
            Returns a new encoding with the same kind and options as this one,
            but a different alphabet and/or case sensitivity.

            :param alphabet: the alphabet to use for the encoding
            :type alphabet: :obj:`str`, :obj:`range` or :class:`~bases.alphabet.abstract.Alphabet`
            :param case_sensitive: optional case sensitivity (if :obj:`None`, the one from the alphabet is used)
            :type case_sensitive: :obj:`bool` or :obj:`None`, *optional*

            :rtype: :obj:`BaseEncodingSubclass`
        """
        validate(alphabet, Union[str, range, Alphabet])
        validate(case_sensitive, Optional[bool])
        options = {**self.options()}
        options["case_sensitive"] = case_sensitive
        return type(self)(alphabet, **options)



[docs]
    def with_case_sensitivity(self: BaseEncodingSubclass, case_sensitive: bool) -> BaseEncodingSubclass:
        """
            Returns a new encoding with the same characters as this one but with specified case sensitivity.

            Example usage:

            >>> encoding.base32
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                               case_sensitive=False),
                pad_char='=', padding='include')
            >>> encoding.base32.with_case_sensitivity(True)
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'),
                pad_char='=', padding='include')

            :param case_sensitive: case sensitivity for the new encoding
            :type case_sensitive: :obj:`bool`
            :rtype: :obj:`BaseEncodingSubclass`
        """
        validate(case_sensitive, bool)
        return self.with_alphabet(self.alphabet.with_case_sensitivity(case_sensitive))



[docs]
    def upper(self: BaseEncodingSubclass) -> BaseEncodingSubclass:
        """
            Returns a new encoding with all cased characters turned to uppercase.

            Example usage:

            >>> encoding.base32z
            FixcharBaseEncoding(
                StringAlphabet('ybndrfg8ejkmcpqxot1uwisza345h769',
                               case_sensitive=False))
            >>> encoding.base32z.upper()
            FixcharBaseEncoding(
                StringAlphabet('YBNDRFG8EJKMCPQXOT1UWISZA345H769',
                               case_sensitive=False))

            :rtype: :obj:`BaseEncodingSubclass`

        """
        return self.with_alphabet(self.alphabet.upper())



[docs]
    def lower(self: BaseEncodingSubclass) -> BaseEncodingSubclass:
        """
            Returns a new encoding with all cased characters turned to lowercase.

            Example usage:

            >>> encoding.base32
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                               case_sensitive=False),
                pad_char='=', padding='include')
            >>> encoding.base32.lower()
            FixcharBaseEncoding(
                StringAlphabet('abcdefghijklmnopqrstuvwxyz234567',
                               case_sensitive=False),
                pad_char='=', padding='include')

            :rtype: :obj:`BaseEncodingSubclass`

        """
        return self.with_alphabet(self.alphabet.lower())



[docs]
    def with_options(self: BaseEncodingSubclass, **options: Any) -> BaseEncodingSubclass:
        r"""
            Returns a new encoding with the same kind, alphabet and case sensitivity as this one,
            but different options.


            :param options: options to set for the new encoding
            :type options: :obj:`~typing.Dict`\ [:obj:`str`, :obj:`~typing.Any`]

            :rtype: :obj:`BaseEncodingSubclass`
        """
        new_options = {**self.options()}
        for name in options:
            if name not in new_options:
                raise KeyError(f"Unknown option {repr(name)} for {type(self).__name__}")
        new_options.update(options)
        return type(self)(self.alphabet, **new_options)



[docs]
    def encode(self, b: BytesLike) -> str:
        """
            Encodes a bytestring into a string.

            Example usage:

            >>> b = bytes([70, 98, 190, 187, 66, 224, 178])
            >>> encoding.base32.encode(b)
            'IZRL5O2C4CZA===='
            >>> s = 'IZRL5O2C4CZA===='
            >>> list(base32.decode(s))
            [70, 98, 190, 187, 66, 224, 178]

            :param b: the bytestring
            :type b: :obj:`BytesLike`

            :raises ~bases.encoding.errors.EncodingError: if the bytestring is invalid
        """
        b = self._validate_bytes(b)
        return self._encode(b)



[docs]
    def decode(self, s: str) -> bytes:
        """
            Decodes a string into a bytestring.

            Example usage:

            >>> s = 'IZRL5O2C4CZA===='
            >>> list(encoding.base32.decode(s))
            [70, 98, 190, 187, 66, 224, 178]

            :param s: the string
            :type s: :obj:`str`

            :raises ~bases.encoding.errors.DecodingError: if the string is invalid

        """
        s = self._validate_string(s)
        return self._decode(s)



[docs]
    def canonical_bytes(self, b: BytesLike) -> bytes:
        """
            Returns a canonical version of the bytestring ``b``:
            this is the bytestring obtained by first encoding ``b``
            and then decoding it.

            (This method is overridden by subclasses with more efficient implementations.)

            :param b: the bytestring
            :type b: :obj:`BytesLike`
        """
        return self.decode(self.encode(b))



[docs]
    def canonical_string(self, s: str) -> str:
        """
            Returns a canonical version of the string ``s``:
            this is the string obtained by first decoding ``s``
            and then encoding it.

            (This method is overridden by subclasses with more efficient implementations.)

            :param s: the string
            :type s: :obj:`str`
        """
        return self.encode(self.decode(s))


    def _validate_bytes(self, b: BytesLike) -> memoryview:
        validate(b, BytesLike)
        return memoryview(b)

    def _validate_string(self, s: str) -> str:
        validate(s, str)
        alphabet = self.alphabet
        for c in s:
            if c not in alphabet:
                raise NonAlphabeticCharError(c, alphabet)
        return s

    @abstractmethod
    def _encode(self, b: memoryview) -> str:
        ...

    @abstractmethod
    def _decode(self, s: str) -> bytes:
        ...


[docs]
    @abstractmethod
    def options(self, skip_defaults: bool = False) -> Mapping[str, Any]:
        """
            The options used to construct this particular encoding.

            Example usage:

            >>> encoding.base32.options()
            {'char_nbits': 'auto', 'pad_char': '=', 'padding': 'include'}
            >>> encoding.base32.options(skip_defaults=True)
            {'pad_char': '=', 'padding': 'include'}

            :param skip_defaults: if set to :obj:`True`, only options with non-default values are included in the mapping
            :type skip_defaults: :obj:`bool`, *optional*

        """


    def __eq__(self, other: Any) -> bool:
        if not isinstance(other, BaseEncoding):
            return NotImplemented
        if type(self) != type(other): # pylint: disable = unidiomatic-typecheck
            return NotImplemented
        return self.options() == other.options()

    def __hash__(self) -> int:
        return hash((type(self), self.alphabet, tuple(self.options().items())))

    def __repr__(self) -> str:
        type_name = type(self).__name__
        alphabet_str = repr(self.alphabet)
        options = self.options(skip_defaults=True)
        if not options:
            return f"{type_name}({alphabet_str})"
        options_str = ", ".join(f"{name}={repr(value)}" for name, value in options.items())
        return f"{type_name}({alphabet_str}, {options_str})"



[docs]
def lstrip_memview(b: memoryview, byte: int = 0) -> memoryview:
    r"""
        Returns a new memoryview obtained by slicing away all leading zero bytes
        from the given memoryview ``b``.

        Example usage:

        >>> b = bytes([0, 0, 1, 0, 2, 0, 3, 0, 0])
        >>> b
        b'\x00\x00\x01\x00\x02\x00\x03\x00\x00'
        >>> m = memview(b)
        >>> m
        <memory at 0x0000024A3AB9EB80>
        >>> bytes(m)
        b'\x00\x00\x01\x00\x02\x00\x03\x00\x00'
        >>> ms = lstrip_memview(m)
        >>> ms
        <memory at 0x0000024A3AB9EC40>
        >>> bytes(ms)
        b'\x01\x00\x02\x00\x03\x00\x00'

        :param b: the memoryview from which to strip leading zero bytes
        :type b: :obj:`memoryview`
        :param byte: optionally, a leading byte value to strip instead of zero
        :type byte: :obj:`int`, *optional*

        :raises ValueError: if ``byte not in range(256)``

    """
    validate(b, memoryview)
    if byte != 0:
        validate(byte, int)
        if byte not in range(256):
            raise ValueError(f"Byte values must be in range(256), found {byte}.")
    return _lstrip_memview(b, byte)


def _lstrip_memview(b: memoryview, byte: int = 0) -> memoryview:
    idx = 0
    l = len(b)
    while idx < l and b[idx] == byte:
        idx += 1
    return b[idx:]