Source code for bases.encoding.base

"""
    Abstract base encodings.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any, Mapping, Optional, TypeVar, Union
from typing_extensions import Final
from typing_validation import validate

from bases.alphabet import Alphabet
from bases.alphabet import make as alphabet_make
from .errors import NonAlphabeticCharError

BytesLike = Union[bytes, bytearray, memoryview]
""" Type alias for bytes-like objects. """

byteslike: Final = (bytes, bytearray, memoryview)
""" Tuple of bytes-like objects types (for use with :obj:`isinstance` checks). """

BaseEncodingSubclass = TypeVar("BaseEncodingSubclass", bound="BaseEncoding")
""" Type variable for subclasses of :class:`BaseEncoding`. """

[docs] class BaseEncoding(ABC): """ Abstract superclass for base encodings. Instances can always be constructed from an alphabet (with optional change of case sensitivity) and a number of additional options specified by subclasses. :param alphabet: the alphabet to use for the encoding :type alphabet: :obj:`str`, :obj:`range` or :class:`~bases.alphabet.abstract.Alphabet` :param case_sensitive: optional case sensitivity (if :obj:`None`, the one from the alphabet is used) :type case_sensitive: :obj:`bool` or :obj:`None`, *optional* """ _alphabet: Alphabet _alphabet_revdir: Mapping[str, int] _case_sensitive: bool def __init__(self, alphabet: Union[str, range, Alphabet], *, case_sensitive: Optional[bool] = None): validate(alphabet, Union[str, range, Alphabet]) validate(case_sensitive, Optional[bool]) if isinstance(alphabet, Alphabet): if case_sensitive is not None: alphabet = alphabet.with_case_sensitivity(case_sensitive) self._alphabet = alphabet else: if case_sensitive is None: case_sensitive = True self._alphabet = alphabet_make(alphabet, case_sensitive=case_sensitive) @property def alphabet(self) -> Alphabet: """ The encoding alphabet. Example usage: >>> encoding.base32.alphabet StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567', case_sensitive=False) """ return self._alphabet @property def base(self) -> int: """ The base for this encoding (the length of the alphabet). Example usage: >>> encoding.base32.base 32 """ return len(self.alphabet) @property def case_sensitive(self) -> bool: """ Determines whether the decoder is case sensitive. Example usage: >>> encoding.base32.case_sensitive False """ return self.alphabet.case_sensitive @property def zero_char(self) -> str: """ The zero digit for this encoding (first character in the alphabet). Example usage: >>> encoding.base32.alphabet StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567', case_sensitive=False) >>> encoding.base32.zero_char 'A' """ return self.alphabet[0]
[docs] def with_alphabet(self: BaseEncodingSubclass, alphabet: Union[str, range, Alphabet], *, case_sensitive: Optional[bool] = None) -> BaseEncodingSubclass: """ Returns a new encoding with the same kind and options as this one, but a different alphabet and/or case sensitivity. :param alphabet: the alphabet to use for the encoding :type alphabet: :obj:`str`, :obj:`range` or :class:`~bases.alphabet.abstract.Alphabet` :param case_sensitive: optional case sensitivity (if :obj:`None`, the one from the alphabet is used) :type case_sensitive: :obj:`bool` or :obj:`None`, *optional* :rtype: :obj:`BaseEncodingSubclass` """ validate(alphabet, Union[str, range, Alphabet]) validate(case_sensitive, Optional[bool]) options = {**self.options()} options["case_sensitive"] = case_sensitive return type(self)(alphabet, **options)
[docs] def with_case_sensitivity(self: BaseEncodingSubclass, case_sensitive: bool) -> BaseEncodingSubclass: """ Returns a new encoding with the same characters as this one but with specified case sensitivity. Example usage: >>> encoding.base32 FixcharBaseEncoding( StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567', case_sensitive=False), pad_char='=', padding='include') >>> encoding.base32.with_case_sensitivity(True) FixcharBaseEncoding( StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'), pad_char='=', padding='include') :param case_sensitive: case sensitivity for the new encoding :type case_sensitive: :obj:`bool` :rtype: :obj:`BaseEncodingSubclass` """ validate(case_sensitive, bool) return self.with_alphabet(self.alphabet.with_case_sensitivity(case_sensitive))
[docs] def upper(self: BaseEncodingSubclass) -> BaseEncodingSubclass: """ Returns a new encoding with all cased characters turned to uppercase. Example usage: >>> encoding.base32z FixcharBaseEncoding( StringAlphabet('ybndrfg8ejkmcpqxot1uwisza345h769', case_sensitive=False)) >>> encoding.base32z.upper() FixcharBaseEncoding( StringAlphabet('YBNDRFG8EJKMCPQXOT1UWISZA345H769', case_sensitive=False)) :rtype: :obj:`BaseEncodingSubclass` """ return self.with_alphabet(self.alphabet.upper())
[docs] def lower(self: BaseEncodingSubclass) -> BaseEncodingSubclass: """ Returns a new encoding with all cased characters turned to lowercase. Example usage: >>> encoding.base32 FixcharBaseEncoding( StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567', case_sensitive=False), pad_char='=', padding='include') >>> encoding.base32.lower() FixcharBaseEncoding( StringAlphabet('abcdefghijklmnopqrstuvwxyz234567', case_sensitive=False), pad_char='=', padding='include') :rtype: :obj:`BaseEncodingSubclass` """ return self.with_alphabet(self.alphabet.lower())
[docs] def with_options(self: BaseEncodingSubclass, **options: Any) -> BaseEncodingSubclass: r""" Returns a new encoding with the same kind, alphabet and case sensitivity as this one, but different options. :param options: options to set for the new encoding :type options: :obj:`~typing.Dict`\ [:obj:`str`, :obj:`~typing.Any`] :rtype: :obj:`BaseEncodingSubclass` """ new_options = {**self.options()} for name in options: if name not in new_options: raise KeyError(f"Unknown option {repr(name)} for {type(self).__name__}") new_options.update(options) return type(self)(self.alphabet, **new_options)
[docs] def encode(self, b: BytesLike) -> str: """ Encodes a bytestring into a string. Example usage: >>> b = bytes([70, 98, 190, 187, 66, 224, 178]) >>> encoding.base32.encode(b) 'IZRL5O2C4CZA====' >>> s = 'IZRL5O2C4CZA====' >>> list(base32.decode(s)) [70, 98, 190, 187, 66, 224, 178] :param b: the bytestring :type b: :obj:`BytesLike` :raises ~bases.encoding.errors.EncodingError: if the bytestring is invalid """ b = self._validate_bytes(b) return self._encode(b)
[docs] def decode(self, s: str) -> bytes: """ Decodes a string into a bytestring. Example usage: >>> s = 'IZRL5O2C4CZA====' >>> list(encoding.base32.decode(s)) [70, 98, 190, 187, 66, 224, 178] :param s: the string :type s: :obj:`str` :raises ~bases.encoding.errors.DecodingError: if the string is invalid """ s = self._validate_string(s) return self._decode(s)
[docs] def canonical_bytes(self, b: BytesLike) -> bytes: """ Returns a canonical version of the bytestring ``b``: this is the bytestring obtained by first encoding ``b`` and then decoding it. (This method is overridden by subclasses with more efficient implementations.) :param b: the bytestring :type b: :obj:`BytesLike` """ return self.decode(self.encode(b))
[docs] def canonical_string(self, s: str) -> str: """ Returns a canonical version of the string ``s``: this is the string obtained by first decoding ``s`` and then encoding it. (This method is overridden by subclasses with more efficient implementations.) :param s: the string :type s: :obj:`str` """ return self.encode(self.decode(s))
def _validate_bytes(self, b: BytesLike) -> memoryview: validate(b, BytesLike) return memoryview(b) def _validate_string(self, s: str) -> str: validate(s, str) alphabet = self.alphabet for c in s: if c not in alphabet: raise NonAlphabeticCharError(c, alphabet) return s @abstractmethod def _encode(self, b: memoryview) -> str: ... @abstractmethod def _decode(self, s: str) -> bytes: ...
[docs] @abstractmethod def options(self, skip_defaults: bool = False) -> Mapping[str, Any]: """ The options used to construct this particular encoding. Example usage: >>> encoding.base32.options() {'char_nbits': 'auto', 'pad_char': '=', 'padding': 'include'} >>> encoding.base32.options(skip_defaults=True) {'pad_char': '=', 'padding': 'include'} :param skip_defaults: if set to :obj:`True`, only options with non-default values are included in the mapping :type skip_defaults: :obj:`bool`, *optional* """
def __eq__(self, other: Any) -> bool: if not isinstance(other, BaseEncoding): return NotImplemented if type(self) != type(other): # pylint: disable = unidiomatic-typecheck return NotImplemented return self.options() == other.options() def __hash__(self) -> int: return hash((type(self), self.alphabet, tuple(self.options().items()))) def __repr__(self) -> str: type_name = type(self).__name__ alphabet_str = repr(self.alphabet) options = self.options(skip_defaults=True) if not options: return f"{type_name}({alphabet_str})" options_str = ", ".join(f"{name}={repr(value)}" for name, value in options.items()) return f"{type_name}({alphabet_str}, {options_str})"
[docs] def lstrip_memview(b: memoryview, byte: int = 0) -> memoryview: r""" Returns a new memoryview obtained by slicing away all leading zero bytes from the given memoryview ``b``. Example usage: >>> b = bytes([0, 0, 1, 0, 2, 0, 3, 0, 0]) >>> b b'\x00\x00\x01\x00\x02\x00\x03\x00\x00' >>> m = memview(b) >>> m <memory at 0x0000024A3AB9EB80> >>> bytes(m) b'\x00\x00\x01\x00\x02\x00\x03\x00\x00' >>> ms = lstrip_memview(m) >>> ms <memory at 0x0000024A3AB9EC40> >>> bytes(ms) b'\x01\x00\x02\x00\x03\x00\x00' :param b: the memoryview from which to strip leading zero bytes :type b: :obj:`memoryview` :param byte: optionally, a leading byte value to strip instead of zero :type byte: :obj:`int`, *optional* :raises ValueError: if ``byte not in range(256)`` """ validate(b, memoryview) if byte != 0: validate(byte, int) if byte not in range(256): raise ValueError(f"Byte values must be in range(256), found {byte}.") return _lstrip_memview(b, byte)
def _lstrip_memview(b: memoryview, byte: int = 0) -> memoryview: idx = 0 l = len(b) while idx < l and b[idx] == byte: idx += 1 return b[idx:]