Source code for bases.encoding.zeropad

"""
    Zero-padded base encodings.

    Similar to :mod:`~bases.encoding.simple` base encodings, but additionally:

    - preserves leading zeros
    - optionally enforces a fixed block size for encoded strings and decoded bytestrings

    Constructor options:

    - ``block_nbytes: int`` number of bytes in a block for decoded bytestrings (default: 1)
    - ``block_nchars: int`` number of chars in a block for encoded strings (default: 1)

    The static method :meth:`ZeropadBaseEncoding.max_block_nchars` (resp. :meth:`ZeropadBaseEncoding.max_block_nbytes`) gives the
    maximum block size in chars (resp. bytes) that can be used for a given block size in bytes (resp. chars).
    This is to ensure that encoding/decoding can always be performed unambiguously.

    Encoding of a bytestring ``b``:

    1. count the number Z of leading zero byte blocks in ``b`` and strip them (default: count zero bytes and strip them)
    2. encode ``b`` as a :mod:`~bases.encoding.simple` base encoding would
    3. prepend the minimum number of zero chars necessary to make the encoded string length an integral multiple of ``block_nchars``
    4. prepend Z zero char blocks to the encoded string

    Decoding of a string ``s``:

    1. count the number Z of leading zero char blocks in ``s`` and strip them (default: count zero chars and strip them)
    2. decode ``s`` as a :mod:`~bases.encoding.simple` base encoding would
    3. prepend the minimum number of zero bytes necessary to make the decoded bytestring length an integral multiple of ``block_nbytes``
    4. prepend Z zero byte blocks to the encoded string
"""

from __future__ import annotations

import math
from typing import Any, Dict, Mapping, Optional, Union
from typing_validation import validate

from bases.alphabet import Alphabet
from .base import BaseEncoding, _lstrip_memview
from .simple import SimpleBaseEncoding

[docs] class ZeropadBaseEncoding(BaseEncoding): """ Zero-added base encodings. :param alphabet: the alphabet to use for the encoding :type alphabet: :obj:`str`, :obj:`range` or :class:`~bases.alphabet.abstract.Alphabet` :param case_sensitive: optional case sensitivity (if :obj:`None`, the one from the alphabet is used) :type case_sensitive: :obj:`bool` or :obj:`None`, *optional* :param block_nbytes: number of bytes in a block for decoded bytestrings (default: 1) :type block_nbytes: :obj:`int`, *optional* :param block_nchars: number of chars in a block for encoded strings (default: 1) :type block_nchars: :obj:`int`, *optional* """ _simple_encoding: SimpleBaseEncoding _block_nbytes: int _block_nchars: int def __init__(self, alphabet: Union[str, range, Alphabet], *, case_sensitive: Optional[bool] = None, block_nbytes: int = 1, block_nchars: int = 1): validate(block_nbytes, int) validate(block_nchars, int) super().__init__(alphabet, case_sensitive=case_sensitive) self._simple_encoding = SimpleBaseEncoding(self.alphabet) self._block_nbytes = block_nbytes self._block_nchars = block_nchars self.__validate_init() def __validate_init(self) -> None: base = self.base block_nbytes = self.block_nbytes block_nchars = self.block_nchars max_block_nbytes = ZeropadBaseEncoding.max_block_nbytes(base, block_nchars) max_block_nchars = ZeropadBaseEncoding.max_block_nchars(base, block_nbytes) if block_nchars > max_block_nchars: raise ValueError(f"Number of characters allowed per zero-padding block is too large: " f"the maximum for base = {base} and block_nbytes = {block_nbytes} is " f"block_nchars = {max_block_nchars}") if block_nbytes > max_block_nbytes: raise ValueError(f"Number of bytes allowed per zero-padding block is too large: " f"the maximum for base = {base} and block_nchars {block_nchars} is " f"block_nbytes = {max_block_nbytes}")
[docs] @staticmethod def max_block_nchars(base: int, block_nbytes: int) -> int: """ Returns the maximum integer value for ``block_chars`` such that: .. code-block:: python 256**block_nbytes > base**(block_nchars-1) :param block_nbytes: number of bytes in a block for decoded bytestrings :type block_nbytes: :obj:`int` """ validate(base, int) validate(block_nbytes, int) if base <= 1: raise ValueError("Base must be >= 2.") if block_nbytes <= 0: raise ValueError("Number of bytes per zero-padding block must be positive.") _max_nc = block_nbytes/math.log(base, 256)+1 _max_nc_floor = int(math.floor(_max_nc)) return _max_nc_floor if _max_nc > _max_nc_floor else _max_nc_floor-1
[docs] @staticmethod def max_block_nbytes(base: int, block_nchars: int) -> int: """ Returns the maximum integer value for ``block_nbytes`` such that: .. code-block:: python base**block_nchars > 256**(block_nbytes-1) :param block_nchars: number of chars in a block for encoded strings :type block_nchars: :obj:`int` """ validate(base, int) validate(block_nchars, int) if base <= 1: raise ValueError("Base must be >= 2.") if block_nchars <= 0: raise ValueError("Number of chars per zero-padding block must be positive.") _max_nb = block_nchars/math.log(256, base)+1 _max_nb_floor = int(math.floor(_max_nb)) return _max_nb_floor if _max_nb > _max_nb_floor else _max_nb_floor-1
@property def block_nbytes(self) -> int: """ Number of bytes in a block. """ return self._block_nbytes @property def block_nchars(self) -> int: """ Number of characters in a block. """ return self._block_nchars def _num_canonical_bytes(self, b: memoryview) -> int: self._validate_bytes(b) block_nbytes = self.block_nbytes extra_bytes = len(b)%block_nbytes if extra_bytes == 0: return len(b) # return b return (block_nbytes-extra_bytes)+len(b) # return bytes(chain(repeat(0, block_nbytes-extra_bytes), iter(b))) # return b"\x00"*(block_nbytes-extra_bytes)+b def _canonical_string(self, s: str) -> str: self._validate_string(s) block_nchars = self.block_nchars extra_chars = len(s)%block_nchars if extra_chars == 0: return s return self.zero_char*(block_nchars-extra_chars)+s def _encode(self, b: memoryview) -> str: canonical_nbytes = self._num_canonical_bytes(b) block_nbytes = self.block_nbytes block_nchars = self.block_nchars zero_char = self.zero_char # strip leading zero bytes # b_stripped = b.lstrip(b"\x00") b_stripped = _lstrip_memview(b) # compute simple base encoding s = self._simple_encoding.encode(b_stripped) # pad simple base encoding to integral multiple of block char size extra_chars = len(s)%block_nchars if extra_chars != 0: s = zero_char*(block_nchars-extra_chars)+s # count leading zero blocks num_zero_blocks = (canonical_nbytes-len(b_stripped))//block_nbytes # return zero-padded base encoding s = zero_char*num_zero_blocks*block_nchars+s return s def _decode(self, s: str) -> bytes: s = self._canonical_string(s) block_nbytes = self.block_nbytes block_nchars = self.block_nchars # strip leading zero chars s_stripped = s.lstrip(self.zero_char) # compute simple base decoding b = self._simple_encoding.decode(s_stripped) # pad simple base decoding to integral multiple of block byte size extra_bytes = len(b)%block_nbytes if extra_bytes != 0: b = b"\x00"*(block_nbytes-extra_bytes)+b # compute leading zero blocks num_zero_blocks = (len(s)-len(s_stripped))//block_nchars # return zero-padded base decoding b = b"\x00"*num_zero_blocks*block_nbytes+b return b
[docs] def options(self, skip_defaults: bool = False) -> Mapping[str, Any]: validate(skip_defaults, bool) options: Dict[str, Any] = {} if not skip_defaults or self.block_nbytes != 1: options["block_nbytes"] = self.block_nbytes if not skip_defaults or self.block_nchars != 1: options["block_nchars"] = self.block_nchars return options