From 44d11bdc282d21361a9520a68267c0ab2119e01e Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Sun, 28 Mar 2021 14:37:51 +0300 Subject: [PATCH 01/21] Refactor the bit length set into a package --- pydsdl/_bit_length_set/__init__.py | 5 +++++ pydsdl/{ => _bit_length_set}/_bit_length_set.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 pydsdl/_bit_length_set/__init__.py rename pydsdl/{ => _bit_length_set}/_bit_length_set.py (99%) diff --git a/pydsdl/_bit_length_set/__init__.py b/pydsdl/_bit_length_set/__init__.py new file mode 100644 index 0000000..21bbc4c --- /dev/null +++ b/pydsdl/_bit_length_set/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2021 UAVCAN Consortium +# This software is distributed under the terms of the MIT License. +# Author: Pavel Kirienko + +from ._bit_length_set import BitLengthSet as BitLengthSet diff --git a/pydsdl/_bit_length_set.py b/pydsdl/_bit_length_set/_bit_length_set.py similarity index 99% rename from pydsdl/_bit_length_set.py rename to pydsdl/_bit_length_set/_bit_length_set.py index 23809fd..d1a23d5 100644 --- a/pydsdl/_bit_length_set.py +++ b/pydsdl/_bit_length_set/_bit_length_set.py @@ -70,7 +70,7 @@ def is_aligned_at_byte(self) -> bool: >>> BitLengthSet(33).is_aligned_at_byte() False """ - from ._serializable import SerializableType + from .._serializable import SerializableType return self.is_aligned_at(SerializableType.BITS_PER_BYTE) From 8fd3af16e618d8048245ec44b63926ac70f99b4b Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Sat, 3 Apr 2021 01:41:36 +0300 Subject: [PATCH 02/21] Intermediate result; seems like a dead-end --- pydsdl/_bit_length_set/_bit_length_set.py | 170 ++-------------------- pydsdl/_bit_length_set/_symbolic.py | 131 +++++++++++++++++ 2 files changed, 145 insertions(+), 156 deletions(-) create mode 100644 pydsdl/_bit_length_set/_symbolic.py diff --git a/pydsdl/_bit_length_set/_bit_length_set.py b/pydsdl/_bit_length_set/_bit_length_set.py index d1a23d5..1e0f65c 100644 --- a/pydsdl/_bit_length_set/_bit_length_set.py +++ b/pydsdl/_bit_length_set/_bit_length_set.py @@ -12,32 +12,17 @@ class BitLengthSet: It is used for representing bit offsets of fields and bit lengths of serialized representations. Instances are comparable between each other, with plain integers, and with native sets of integers. - The methods do not mutate the instance they are invoked on; instead, the result is returned as a new instance, - excepting the in-place ``__ixx__()`` operator overloads. - - This class performs very intensive computations that largely define the data type processing time - so it has been carefully optimized for speed. For details, see https://github.com/UAVCAN/pydsdl/issues/49. + The methods do not mutate the instance they are invoked on; instead, the result is returned as a new instance. """ - def __init__(self, values: typing.Optional[typing.Union[typing.Iterable[int], int]] = None): + def __init__(self, values: typing.Union[typing.Iterable[int], int] = 0): """ Accepts any iterable that yields integers (like another bit length set) or a single integer, in which case it will result in the set containing only the one specified integer. The source container is always deep-copied. - - >>> BitLengthSet() - BitLengthSet() - >>> len(BitLengthSet()) == 0 - True - >>> BitLengthSet(1) - BitLengthSet({1}) - >>> BitLengthSet({1, 2, 3}) - BitLengthSet({1, 2, 3}) """ if isinstance(values, set): self._value = values # Do not convert if already a set - elif values is None: - self._value = set() elif isinstance(values, int): self._value = {values} else: @@ -46,7 +31,7 @@ def __init__(self, values: typing.Optional[typing.Union[typing.Iterable[int], in def is_aligned_at(self, bit_length: int) -> bool: """ Checks whether all of the contained offset values match the specified alignment goal. - An empty bit length set is considered to have infinite alignment. + A zero-length bit length set is considered to have infinite alignment. >>> BitLengthSet(64).is_aligned_at(32) True @@ -54,12 +39,10 @@ def is_aligned_at(self, bit_length: int) -> bool: False >>> BitLengthSet(48).is_aligned_at(16) True - >>> BitLengthSet().is_aligned_at(123456) + >>> BitLengthSet().is_aligned_at(1234567) True """ - if self: - return set(map(lambda x: x % bit_length, self._value)) == {0} - return True # An empty set is always aligned. + return set(map(lambda x: x % bit_length, self._value)) == {0} def is_aligned_at_byte(self) -> bool: """ @@ -80,18 +63,6 @@ def pad_to_alignment(self, bit_length: int) -> "BitLengthSet": After this transformation is applied, elements may become up to ``bit_length-1`` bits larger. The argument shall be a positive integer, otherwise it's a :class:`ValueError`. - >>> BitLengthSet({0, 1, 2, 3, 4, 5, 6, 7, 8}).pad_to_alignment(1) # Alignment to 1 is a no-op. - BitLengthSet({0, 1, 2, 3, 4, 5, 6, 7, 8}) - >>> BitLengthSet({0, 1, 2, 3, 4, 5, 6, 7, 8}).pad_to_alignment(2) - BitLengthSet({0, 2, 4, 6, 8}) - >>> BitLengthSet({0, 1, 5, 7}).pad_to_alignment(2) - BitLengthSet({0, 2, 6, 8}) - >>> BitLengthSet({0, 1, 2, 3, 4, 5, 6, 7, 8}).pad_to_alignment(3) - BitLengthSet({0, 3, 6, 9}) - >>> BitLengthSet({0, 1, 2, 3, 4, 5, 6, 7, 8}).pad_to_alignment(8) - BitLengthSet({0, 8}) - >>> BitLengthSet({0, 9}).pad_to_alignment(8) - BitLengthSet({0, 16}) >>> from random import randint >>> alignment = randint(1, 64) >>> BitLengthSet(randint(1, 1000) for _ in range(100)).pad_to_alignment(alignment).is_aligned_at(alignment) @@ -111,18 +82,6 @@ def elementwise_sum_k_multicombinations(self, k: int) -> "BitLengthSet": """ This is a special case of :meth:`elementwise_sum_cartesian_product`. The original object is not modified. - - One can replace this method with the aforementioned general case method and the behavior would not change; - however, we need this special case method for performance reasons. When dealing with arrays (either fixed- or - variable-length), usage of this method instead of the generic one yields significantly better performance, - since the computational complexity of k-selections is much lower than that of the Cartesian product. - - >>> BitLengthSet(1).elementwise_sum_k_multicombinations(1) - BitLengthSet({1}) - >>> BitLengthSet({1, 2, 3}).elementwise_sum_k_multicombinations(1) - BitLengthSet({1, 2, 3}) - >>> BitLengthSet({1, 2, 3}).elementwise_sum_k_multicombinations(2) - BitLengthSet({2, 3, 4, 5, 6}) """ k_multicombination = itertools.combinations_with_replacement(self, k) elementwise_sums = map(sum, k_multicombination) @@ -158,15 +117,6 @@ def elementwise_sum_cartesian_product( variable-length array with the capacity of N elements can be modeled as a tagged union containing N fixed arrays of length from 1 to N, plus one empty field (representing the case of an empty variable-length array). - - >>> BitLengthSet.elementwise_sum_cartesian_product([1, 2, 10]) - BitLengthSet({13}) - >>> BitLengthSet.elementwise_sum_cartesian_product([{1, 2}, {4, 5}]) - BitLengthSet({5, 6, 7}) - >>> BitLengthSet.elementwise_sum_cartesian_product([{1, 2, 3}, {4, 5, 6}]) - BitLengthSet({5, 6, 7, 8, 9}) - >>> BitLengthSet.elementwise_sum_cartesian_product([{1, 2, 3}, {4, 5, 6}, {7, 8, 9}]) - BitLengthSet({12, 13, 14, 15, 16, 17, 18}) """ cartesian_product = itertools.product(*list(map(BitLengthSet, sets))) elementwise_sums = map(sum, cartesian_product) @@ -182,23 +132,11 @@ def __len__(self) -> int: def __eq__(self, other: typing.Any) -> bool: """ Whether the current set equals the other. - The other may be a bit length set, an integer, or a native ``typing.Set[int]``. """ - if isinstance(other, _OPERAND_TYPES): - return self._value == BitLengthSet(other)._value + if isinstance(other, BitLengthSet): + return self._value == other._value return NotImplemented - def __bool__(self) -> bool: - """ - Evaluates to True unless empty. - - >>> assert not BitLengthSet() - >>> assert not BitLengthSet({}) - >>> assert BitLengthSet(0) - >>> assert BitLengthSet({1, 2, 3}) - """ - return bool(self._value) - def __add__(self, other: typing.Any) -> "BitLengthSet": """ This operation models the addition of a new object to a serialized representation; @@ -209,122 +147,42 @@ def __add__(self, other: typing.Any) -> "BitLengthSet": with the current set will be computed, and the result will be returned as a new set (self is not modified). One can easily see that if the argument is a set of one value (or a scalar), this method will result in the addition of said scalar to every element of the original set. - - SPECIAL CASE: if the current set is empty at the time of invocation, it will be assumed to be equal ``{0}``. - - The other may be a bit length set, an integer, or a native ``typing.Set[int]``. - - >>> BitLengthSet() + BitLengthSet() - BitLengthSet() - >>> BitLengthSet(4) + BitLengthSet(3) - BitLengthSet({7}) - >>> BitLengthSet({4, 91}) + 3 - BitLengthSet({7, 94}) - >>> BitLengthSet({4, 91}) + {5, 7} - BitLengthSet({9, 11, 96, 98}) """ - if isinstance(other, _OPERAND_TYPES): - return BitLengthSet.elementwise_sum_cartesian_product([self or BitLengthSet(0), BitLengthSet(other)]) + if isinstance(other, BitLengthSet): + return BitLengthSet.elementwise_sum_cartesian_product([self, other]) return NotImplemented def __radd__(self, other: typing.Any) -> "BitLengthSet": """ See :meth:`__add__`. - - >>> {1, 2, 3} + BitLengthSet({4, 5, 6}) - BitLengthSet({5, 6, 7, 8, 9}) - >>> 1 + BitLengthSet({2, 5, 7}) - BitLengthSet({3, 6, 8}) - """ - if isinstance(other, _OPERAND_TYPES): - return BitLengthSet(other) + self - return NotImplemented - - def __iadd__(self, other: typing.Any) -> "BitLengthSet": - """ - See :meth:`__add__`. - - >>> a = BitLengthSet({1, 2, 3}) - >>> a += {4, 5, 6} - >>> a - BitLengthSet({5, 6, 7, 8, 9}) """ - if isinstance(other, _OPERAND_TYPES): - self._value = (self + other)._value - return self + if isinstance(other, BitLengthSet): + return other + self return NotImplemented def __or__(self, other: typing.Any) -> "BitLengthSet": """ Creates and returns a new set that is a union of this set with another bit length set. - - >>> a = BitLengthSet() - >>> a = a | BitLengthSet({1, 2, 3}) - >>> a - BitLengthSet({1, 2, 3}) - >>> a = a | {3, 4, 5} - >>> a - BitLengthSet({1, 2, 3, 4, 5}) - >>> a | 6 - BitLengthSet({1, 2, 3, 4, 5, 6}) """ - if isinstance(other, _OPERAND_TYPES): - if not isinstance(other, BitLengthSet): # Speed optimization - other = BitLengthSet(other) + if isinstance(other, BitLengthSet): return BitLengthSet(self._value | other._value) return NotImplemented def __ror__(self, other: typing.Any) -> "BitLengthSet": """ See :meth:`__or__`. - - >>> {1, 2, 3} | BitLengthSet({4, 5, 6}) - BitLengthSet({1, 2, 3, 4, 5, 6}) - >>> 1 | BitLengthSet({2, 5, 7}) - BitLengthSet({1, 2, 5, 7}) """ - if isinstance(other, _OPERAND_TYPES): - return BitLengthSet(other) | self - return NotImplemented - - def __ior__(self, other: typing.Any) -> "BitLengthSet": - """ - See :meth:`__or__`. - - >>> a = BitLengthSet({4, 5, 6}) - >>> a |= {1, 2, 3} - >>> a - BitLengthSet({1, 2, 3, 4, 5, 6}) - """ - if isinstance(other, _OPERAND_TYPES): - self._value = (self | other)._value - return self + if isinstance(other, BitLengthSet): + return other | self return NotImplemented def __str__(self) -> str: - """ - Always yields a sorted representation for the ease of human consumption. - - >>> str(BitLengthSet()) - '{}' - >>> str(BitLengthSet({918, 16, 7, 42})) - '{7, 16, 42, 918}' - """ return "{" + ", ".join(map(str, sorted(self._value))) + "}" def __repr__(self) -> str: - """ - >>> BitLengthSet() - BitLengthSet() - >>> BitLengthSet({918, 16, 7, 42}) - BitLengthSet({7, 16, 42, 918}) - """ return type(self).__name__ + "(" + str(self or "") + ")" -_OPERAND_TYPES = BitLengthSet, set, int - - def _unittest_bit_length_set() -> None: from pytest import raises diff --git a/pydsdl/_bit_length_set/_symbolic.py b/pydsdl/_bit_length_set/_symbolic.py new file mode 100644 index 0000000..3bf2aac --- /dev/null +++ b/pydsdl/_bit_length_set/_symbolic.py @@ -0,0 +1,131 @@ +# Copyright (c) 2021 UAVCAN Consortium +# This software is distributed under the terms of the MIT License. +# Author: Pavel Kirienko + +import abc +import typing +import itertools + + +class Operator(abc.ABC): + @abc.abstractmethod + def is_aligned_at(self, bit_length: int) -> bool: + """ + Whether all of the contained offset values match the specified alignment goal. + """ + raise NotImplementedError + + @property + @abc.abstractmethod + def min(self) -> int: + raise NotImplementedError + + @property + @abc.abstractmethod + def max(self) -> int: + raise NotImplementedError + + @abc.abstractmethod + def expand(self) -> typing.Iterable[int]: + """ + Transform the symbolic form into numerical form. + For complex expressions this may be incomputable due to combinatorial explosion or memory limits. + """ + raise NotImplementedError + + +class NullaryOperator(Operator): + """ + A nullary operator represents a constant value, which is a leaf of the operator tree. + """ + + def __init__(self, values: typing.Iterable[int]) -> None: + if isinstance(values, frozenset): + self._value = values # type: typing.FrozenSet[int] + else: + self._value = frozenset(values) + self._value = self._value or frozenset({0}) + + def is_aligned_at(self, bit_length: int) -> bool: + return set(map(lambda x: x % bit_length, self._value)) == {0} + + @property + def min(self) -> int: + return min(self._value) + + @property + def max(self) -> int: + return max(self._value) + + def expand(self) -> typing.Iterable[int]: + return self._value + + +class PaddingOperator(Operator): + """ + Adds up to ``alignment - 1`` padding bits to each entry of the child to ensure that the values are aligned. + """ + + def __init__(self, child: Operator, alignment: int) -> None: + if alignment < 1: + raise ValueError("Invalid alignment: %r bits" % alignment) + self._child = child + self._padding = int(alignment) + + def is_aligned_at(self, bit_length: int) -> bool: + if self._padding % bit_length == 0: + return True + return self._child.is_aligned_at(bit_length) + + @property + def min(self) -> int: + r = self._padding + return ((self._child.min + r - 1) // r) * r + + @property + def max(self) -> int: + r = self._padding + return ((self._child.max + r - 1) // r) * r + + def expand(self) -> typing.Iterable[int]: + r = self._padding + for x in self._child.expand(): + yield ((x + r - 1) // r) * r + + +class ConcatenationOperator(Operator): + """ + Given a set of children, transforms them into a single bit length set expression where each item is the + elementwise sum of the cartesian product of the children's bit length sets. + """ + + def __init__(self, children: typing.Iterable[Operator]) -> None: + self._children = list(children) + if not self._children: + raise ValueError("This operator is not defined on zero operands") + + def is_aligned_at(self, bit_length: int) -> bool: + # Trivial case: if all children are aligned, the result is also aligned. + if all(x.is_aligned_at(bit_length) for x in self._children): + return True + # If all children are fixed-length, their sizes can be added to check alignment in constant time. + mn, mx = self.min, self.max + if mn == mx and mn % bit_length == 0: + return True + # Analytical solution is not possible, use brute-force check. + for x in self.expand(): + if x % bit_length != 0: + return False + return True + + @property + def min(self) -> int: + return sum(x.min for x in self._children) + + @property + def max(self) -> int: + return sum(x.max for x in self._children) + + def expand(self) -> typing.Iterable[int]: + for el in itertools.product(*(x.expand() for x in self._children)): + yield sum(el) From 894c32516b4a15dc07e7e864e77fb24837f68bdb Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Sun, 4 Apr 2021 13:13:17 +0300 Subject: [PATCH 03/21] Define three operators so far: Nullary, Padding, Concatenation --- .idea/dictionaries/pavel.xml | 2 + pydsdl/_bit_length_set/_bit_length_set.py | 1 - pydsdl/_bit_length_set/_symbolic.py | 63 ++++++------- pydsdl/_bit_length_set/_symbolic_test.py | 107 ++++++++++++++++++++++ 4 files changed, 137 insertions(+), 36 deletions(-) create mode 100644 pydsdl/_bit_length_set/_symbolic_test.py diff --git a/.idea/dictionaries/pavel.xml b/.idea/dictionaries/pavel.xml index d53834d..9126bc6 100644 --- a/.idea/dictionaries/pavel.xml +++ b/.idea/dictionaries/pavel.xml @@ -74,6 +74,7 @@ icmp iface inaddr + incomputable intersphinx intravehicular iscsi @@ -112,6 +113,7 @@ norecursedirs nosignatures noxfile + nullary octothorp onboard overgeneral diff --git a/pydsdl/_bit_length_set/_bit_length_set.py b/pydsdl/_bit_length_set/_bit_length_set.py index 1e0f65c..1262d74 100644 --- a/pydsdl/_bit_length_set/_bit_length_set.py +++ b/pydsdl/_bit_length_set/_bit_length_set.py @@ -186,7 +186,6 @@ def __repr__(self) -> str: def _unittest_bit_length_set() -> None: from pytest import raises - assert not BitLengthSet() assert BitLengthSet() == BitLengthSet() assert not (BitLengthSet() != BitLengthSet()) # pylint: disable=unneeded-not assert BitLengthSet(123) == BitLengthSet([123]) diff --git a/pydsdl/_bit_length_set/_symbolic.py b/pydsdl/_bit_length_set/_symbolic.py index 3bf2aac..7bccf89 100644 --- a/pydsdl/_bit_length_set/_symbolic.py +++ b/pydsdl/_bit_length_set/_symbolic.py @@ -3,16 +3,14 @@ # Author: Pavel Kirienko import abc +import math import typing import itertools class Operator(abc.ABC): @abc.abstractmethod - def is_aligned_at(self, bit_length: int) -> bool: - """ - Whether all of the contained offset values match the specified alignment goal. - """ + def modulo(self, bit_length: int) -> typing.Iterable[int]: raise NotImplementedError @property @@ -29,6 +27,7 @@ def max(self) -> int: def expand(self) -> typing.Iterable[int]: """ Transform the symbolic form into numerical form. + This is useful for cross-checking derived solutions and for DSDL expression evaluation. For complex expressions this may be incomputable due to combinatorial explosion or memory limits. """ raise NotImplementedError @@ -40,14 +39,10 @@ class NullaryOperator(Operator): """ def __init__(self, values: typing.Iterable[int]) -> None: - if isinstance(values, frozenset): - self._value = values # type: typing.FrozenSet[int] - else: - self._value = frozenset(values) - self._value = self._value or frozenset({0}) + self._value = frozenset(values) or frozenset({0}) - def is_aligned_at(self, bit_length: int) -> bool: - return set(map(lambda x: x % bit_length, self._value)) == {0} + def modulo(self, bit_length: int) -> typing.Iterable[int]: + return set(map(lambda x: x % bit_length, self._value)) @property def min(self) -> int: @@ -67,30 +62,33 @@ class PaddingOperator(Operator): """ def __init__(self, child: Operator, alignment: int) -> None: - if alignment < 1: + if alignment < 1: # pragma: no cover raise ValueError("Invalid alignment: %r bits" % alignment) self._child = child self._padding = int(alignment) - def is_aligned_at(self, bit_length: int) -> bool: - if self._padding % bit_length == 0: - return True - return self._child.is_aligned_at(bit_length) + def modulo(self, bit_length: int) -> typing.Iterable[int]: + r = self._padding + mx = self.max + lcm = math.lcm(r, bit_length) + for x in self._child.modulo(lcm): + assert x <= mx and x < lcm + yield self._pad(x) % bit_length @property def min(self) -> int: - r = self._padding - return ((self._child.min + r - 1) // r) * r + return self._pad(self._child.min) @property def max(self) -> int: - r = self._padding - return ((self._child.max + r - 1) // r) * r + return self._pad(self._child.max) def expand(self) -> typing.Iterable[int]: + return map(self._pad, self._child.expand()) + + def _pad(self, x: int) -> int: r = self._padding - for x in self._child.expand(): - yield ((x + r - 1) // r) * r + return ((x + r - 1) // r) * r class ConcatenationOperator(Operator): @@ -104,19 +102,14 @@ def __init__(self, children: typing.Iterable[Operator]) -> None: if not self._children: raise ValueError("This operator is not defined on zero operands") - def is_aligned_at(self, bit_length: int) -> bool: - # Trivial case: if all children are aligned, the result is also aligned. - if all(x.is_aligned_at(bit_length) for x in self._children): - return True - # If all children are fixed-length, their sizes can be added to check alignment in constant time. - mn, mx = self.min, self.max - if mn == mx and mn % bit_length == 0: - return True - # Analytical solution is not possible, use brute-force check. - for x in self.expand(): - if x % bit_length != 0: - return False - return True + def modulo(self, bit_length: int) -> typing.Iterable[int]: + # Take the modulus from each child and find all combinations. + # The computational complexity is tightly bounded because the cardinality of the modulus set is less than + # the bit length operand. + mods = [set(ch.modulo(bit_length)) for ch in self._children] + prod = itertools.product(*mods) + sums = set(map(sum, prod)) + return set(typing.cast(int, x) % bit_length for x in sums) @property def min(self) -> int: diff --git a/pydsdl/_bit_length_set/_symbolic_test.py b/pydsdl/_bit_length_set/_symbolic_test.py new file mode 100644 index 0000000..588c59c --- /dev/null +++ b/pydsdl/_bit_length_set/_symbolic_test.py @@ -0,0 +1,107 @@ +# Copyright (c) 2021 UAVCAN Consortium +# This software is distributed under the terms of the MIT License. +# Author: Pavel Kirienko + +import random +from ._symbolic import NullaryOperator, PaddingOperator, ConcatenationOperator + + +def _unittest_nullary() -> None: + op = NullaryOperator([]) + assert set(op.expand()) == {0} + assert set(op.modulo(12345)) == {0} + assert op.min == op.max == 0 + + op = NullaryOperator([1, 2, 3, 4, 5, 6, 7, 8]) + assert set(op.expand()) == {1, 2, 3, 4, 5, 6, 7, 8} + assert set(op.modulo(4)) == {0, 1, 2, 3} + assert (op.min, op.max) == (1, 8) + + +def _unittest_padding() -> None: + op = PaddingOperator( + NullaryOperator([1, 2, 3, 4, 5, 6, 7, 8, 9]), + 4, + ) + assert op.min == 4 + assert op.max == 12 + assert set(op.expand()) == {4, 8, 12} + assert set(op.modulo(2)) == {0} + assert set(op.modulo(4)) == {0} + assert set(op.modulo(8)) == {0, 4} + assert set(op.modulo(16)) == {4, 8, 12} + + assert set(x % 6 for x in op.expand()) == {0, 2, 4} # Reference + assert set(op.modulo(6)) == {0, 2, 4} + + assert set(x % 7 for x in op.expand()) == {1, 4, 5} # Reference + assert set(op.modulo(7)) == {1, 4, 5} + + for _ in range(10_000): + child = NullaryOperator(random.randint(0, 1024) for _ in range(random.randint(0, 100))) + alignment = random.randint(1, 64) + op = PaddingOperator(child, alignment) + bl = random.randint(1, 64) + assert set(op.modulo(bl)) == {x % bl for x in op.expand()} + + +def _unittest_concatenation() -> None: + op = ConcatenationOperator( + [ + NullaryOperator([1]), + NullaryOperator([2]), + NullaryOperator([10]), + ] + ) + assert op.min == op.max == 13 + assert set(op.expand()) == {13} + assert set(op.modulo(1)) == {0} + assert set(op.modulo(2)) == {1} + assert set(op.modulo(13)) == {0} + assert set(op.modulo(8)) == {5} + + op = ConcatenationOperator( + [ + NullaryOperator([1, 2, 10]), + ] + ) + assert op.min == 1 + assert op.max == 10 + assert set(op.expand()) == {1, 2, 10} + assert set(op.modulo(1)) == {0} + assert set(op.modulo(2)) == {0, 1} + assert set(op.modulo(8)) == {1, 2} + + op = ConcatenationOperator( + [ + NullaryOperator([1, 2]), + NullaryOperator([4, 5]), + ] + ) + assert op.min == 5 + assert op.max == 7 + assert set(op.expand()) == {5, 6, 7} + assert set(op.modulo(5)) == {0, 1, 2} + assert set(op.modulo(8)) == {5, 6, 7} + + op = ConcatenationOperator( + [ + NullaryOperator([1, 2, 3]), + NullaryOperator([4, 5, 6]), + NullaryOperator([7, 8, 9]), + ] + ) + assert op.min == 12 + assert op.max == 18 + assert set(op.expand()) == {12, 13, 14, 15, 16, 17, 18} + assert set(op.modulo(8)) == {0, 1, 2, 4, 5, 6, 7} # 3 is missing + + for _ in range(100): + op = ConcatenationOperator( + [ + NullaryOperator(random.randint(0, 1024) for _ in range(random.randint(0, 10))) + for _ in range(random.randint(1, 10)) + ] + ) + bl = random.randint(1, 64) + assert set(op.modulo(bl)) == {x % bl for x in op.expand()} From 59fc04e79aef60585b1a1dabdec75688fa7d52eb Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Sun, 4 Apr 2021 17:57:17 +0300 Subject: [PATCH 04/21] Implement all operators --- pydsdl/_bit_length_set/_symbolic.py | 148 ++++++++++++++++-- pydsdl/_bit_length_set/_symbolic_test.py | 183 ++++++++++++++++++++++- 2 files changed, 311 insertions(+), 20 deletions(-) diff --git a/pydsdl/_bit_length_set/_symbolic.py b/pydsdl/_bit_length_set/_symbolic.py index 7bccf89..dcb55fc 100644 --- a/pydsdl/_bit_length_set/_symbolic.py +++ b/pydsdl/_bit_length_set/_symbolic.py @@ -10,7 +10,10 @@ class Operator(abc.ABC): @abc.abstractmethod - def modulo(self, bit_length: int) -> typing.Iterable[int]: + def modulo(self, divisor: int) -> typing.Iterable[int]: + """ + May return duplicates. + """ raise NotImplementedError @property @@ -29,9 +32,14 @@ def expand(self) -> typing.Iterable[int]: Transform the symbolic form into numerical form. This is useful for cross-checking derived solutions and for DSDL expression evaluation. For complex expressions this may be incomputable due to combinatorial explosion or memory limits. + May return duplicates. """ raise NotImplementedError + @abc.abstractmethod + def __repr__(self) -> str: + raise NotImplementedError + class NullaryOperator(Operator): """ @@ -41,8 +49,8 @@ class NullaryOperator(Operator): def __init__(self, values: typing.Iterable[int]) -> None: self._value = frozenset(values) or frozenset({0}) - def modulo(self, bit_length: int) -> typing.Iterable[int]: - return set(map(lambda x: x % bit_length, self._value)) + def modulo(self, divisor: int) -> typing.Iterable[int]: + return map(lambda x: x % divisor, self._value) @property def min(self) -> int: @@ -55,6 +63,9 @@ def max(self) -> int: def expand(self) -> typing.Iterable[int]: return self._value + def __repr__(self) -> str: + return "{%s}" % ",".join(str(x) for x in sorted(self._value)) + class PaddingOperator(Operator): """ @@ -67,13 +78,13 @@ def __init__(self, child: Operator, alignment: int) -> None: self._child = child self._padding = int(alignment) - def modulo(self, bit_length: int) -> typing.Iterable[int]: + def modulo(self, divisor: int) -> typing.Iterable[int]: r = self._padding mx = self.max - lcm = math.lcm(r, bit_length) - for x in self._child.modulo(lcm): + lcm = math.lcm(r, divisor) + for x in set(self._child.modulo(lcm)): assert x <= mx and x < lcm - yield self._pad(x) % bit_length + yield self._pad(x) % divisor @property def min(self) -> int: @@ -84,12 +95,15 @@ def max(self) -> int: return self._pad(self._child.max) def expand(self) -> typing.Iterable[int]: - return map(self._pad, self._child.expand()) + return set(map(self._pad, self._child.expand())) def _pad(self, x: int) -> int: r = self._padding return ((x + r - 1) // r) * r + def __repr__(self) -> str: + return "pad(%d,%r)" % (self._padding, self._child) + class ConcatenationOperator(Operator): """ @@ -102,14 +116,14 @@ def __init__(self, children: typing.Iterable[Operator]) -> None: if not self._children: raise ValueError("This operator is not defined on zero operands") - def modulo(self, bit_length: int) -> typing.Iterable[int]: + def modulo(self, divisor: int) -> typing.Iterable[int]: # Take the modulus from each child and find all combinations. # The computational complexity is tightly bounded because the cardinality of the modulus set is less than # the bit length operand. - mods = [set(ch.modulo(bit_length)) for ch in self._children] + mods = [set(ch.modulo(divisor)) for ch in self._children] prod = itertools.product(*mods) sums = set(map(sum, prod)) - return set(typing.cast(int, x) % bit_length for x in sums) + return {typing.cast(int, x) % divisor for x in sums} @property def min(self) -> int: @@ -120,5 +134,113 @@ def max(self) -> int: return sum(x.max for x in self._children) def expand(self) -> typing.Iterable[int]: - for el in itertools.product(*(x.expand() for x in self._children)): - yield sum(el) + return {sum(el) for el in itertools.product(*(x.expand() for x in self._children))} + + def __repr__(self) -> str: + return "concat(%s)" % ",".join(map(repr, self._children)) + + +class RepetitionOperator(Operator): + """ + Concatenates ``k`` copies of the child. + This is equivalent to :class:`ConcatenationOperator` where the child is replicated ``k`` times. + """ + + def __init__(self, child: Operator, k: int) -> None: + self._k = int(k) + self._child = child + + def modulo(self, divisor: int) -> typing.Iterable[int]: + return { + (sum(el) % divisor) for el in itertools.combinations_with_replacement(self._child.modulo(divisor), self._k) + } + + @property + def min(self) -> int: + return self._child.min * self._k + + @property + def max(self) -> int: + return self._child.max * self._k + + def expand(self) -> typing.Iterable[int]: + return {sum(el) for el in itertools.combinations_with_replacement(self._child.expand(), self._k)} + + def __repr__(self) -> str: + return "repeat(%d,%r)" % (self._k, self._child) + + +class RangeRepetitionOperator(Operator): + """ + Concatenates ``k in [0, k_max]`` copies of the child. + In other words, this is like ``k+1`` instances of :class:`RepetitionOperator`. + """ + + def __init__(self, child: Operator, k_max: int) -> None: + self._k_max = int(k_max) + self._child = child + + def modulo(self, divisor: int) -> typing.Iterable[int]: + single = set(self._child.modulo(divisor)) + # Values of k > divisor will yield repeated entries so we can apply a reduction. + equivalent_k_max = min(self._k_max, divisor) + for k in range(equivalent_k_max + 1): + for el in itertools.combinations_with_replacement(single, k): + yield sum(el) % divisor + + @property + def min(self) -> int: + return 0 + + @property + def max(self) -> int: + return self._child.max * self._k_max + + def expand(self) -> typing.Iterable[int]: + ch = set(self._child.expand()) + for k in range(self._k_max + 1): + for el in itertools.combinations_with_replacement(ch, k): + yield sum(el) + + def __repr__(self) -> str: + return "repeat(<=%d,%r)" % (self._k_max, self._child) + + +class UnionOperator(Operator): + def __init__(self, children: typing.Iterable[Operator]) -> None: + self._children = list(children) + if not self._children: + raise ValueError("This operator is not defined on zero operands") + + def modulo(self, divisor: int) -> typing.Iterable[int]: + for x in self._children: + yield from x.modulo(divisor) + + @property + def min(self) -> int: + return min(x.min for x in self._children) + + @property + def max(self) -> int: + return max(x.max for x in self._children) + + def expand(self) -> typing.Iterable[int]: + for x in self._children: + yield from x.expand() + + def __repr__(self) -> str: + return "(%s)" % "|".join(map(repr, self._children)) + + +def validate_numerically(op: Operator) -> None: + """ + Validates the correctness of symbolic derivations by comparing the results against reference values + obtained numerically. + The computational complexity may be prohibitively high for some inputs due to combinatorial explosion. + In case of a divergence the function triggers an assertion failure. + """ + s = set(op.expand()) + assert min(s) == op.min + assert max(s) == op.max + for div in range(1, 65): + assert set(op.modulo(div)) == {x % div for x in s} diff --git a/pydsdl/_bit_length_set/_symbolic_test.py b/pydsdl/_bit_length_set/_symbolic_test.py index 588c59c..dbba737 100644 --- a/pydsdl/_bit_length_set/_symbolic_test.py +++ b/pydsdl/_bit_length_set/_symbolic_test.py @@ -2,8 +2,10 @@ # This software is distributed under the terms of the MIT License. # Author: Pavel Kirienko +import typing import random -from ._symbolic import NullaryOperator, PaddingOperator, ConcatenationOperator +import itertools +from ._symbolic import NullaryOperator, validate_numerically def _unittest_nullary() -> None: @@ -11,14 +13,18 @@ def _unittest_nullary() -> None: assert set(op.expand()) == {0} assert set(op.modulo(12345)) == {0} assert op.min == op.max == 0 + validate_numerically(op) op = NullaryOperator([1, 2, 3, 4, 5, 6, 7, 8]) assert set(op.expand()) == {1, 2, 3, 4, 5, 6, 7, 8} assert set(op.modulo(4)) == {0, 1, 2, 3} assert (op.min, op.max) == (1, 8) + validate_numerically(op) def _unittest_padding() -> None: + from ._symbolic import PaddingOperator + op = PaddingOperator( NullaryOperator([1, 2, 3, 4, 5, 6, 7, 8, 9]), 4, @@ -30,6 +36,7 @@ def _unittest_padding() -> None: assert set(op.modulo(4)) == {0} assert set(op.modulo(8)) == {0, 4} assert set(op.modulo(16)) == {4, 8, 12} + validate_numerically(op) assert set(x % 6 for x in op.expand()) == {0, 2, 4} # Reference assert set(op.modulo(6)) == {0, 2, 4} @@ -37,15 +44,18 @@ def _unittest_padding() -> None: assert set(x % 7 for x in op.expand()) == {1, 4, 5} # Reference assert set(op.modulo(7)) == {1, 4, 5} - for _ in range(10_000): + for _ in range(100): child = NullaryOperator(random.randint(0, 1024) for _ in range(random.randint(0, 100))) alignment = random.randint(1, 64) op = PaddingOperator(child, alignment) - bl = random.randint(1, 64) - assert set(op.modulo(bl)) == {x % bl for x in op.expand()} + div = random.randint(1, 64) + assert set(op.modulo(div)) == {x % div for x in op.expand()} + validate_numerically(op) def _unittest_concatenation() -> None: + from ._symbolic import ConcatenationOperator + op = ConcatenationOperator( [ NullaryOperator([1]), @@ -59,6 +69,7 @@ def _unittest_concatenation() -> None: assert set(op.modulo(2)) == {1} assert set(op.modulo(13)) == {0} assert set(op.modulo(8)) == {5} + validate_numerically(op) op = ConcatenationOperator( [ @@ -71,6 +82,7 @@ def _unittest_concatenation() -> None: assert set(op.modulo(1)) == {0} assert set(op.modulo(2)) == {0, 1} assert set(op.modulo(8)) == {1, 2} + validate_numerically(op) op = ConcatenationOperator( [ @@ -83,6 +95,7 @@ def _unittest_concatenation() -> None: assert set(op.expand()) == {5, 6, 7} assert set(op.modulo(5)) == {0, 1, 2} assert set(op.modulo(8)) == {5, 6, 7} + validate_numerically(op) op = ConcatenationOperator( [ @@ -95,13 +108,169 @@ def _unittest_concatenation() -> None: assert op.max == 18 assert set(op.expand()) == {12, 13, 14, 15, 16, 17, 18} assert set(op.modulo(8)) == {0, 1, 2, 4, 5, 6, 7} # 3 is missing + validate_numerically(op) - for _ in range(100): + for _ in range(10): op = ConcatenationOperator( [ NullaryOperator(random.randint(0, 1024) for _ in range(random.randint(0, 10))) for _ in range(random.randint(1, 10)) ] ) - bl = random.randint(1, 64) - assert set(op.modulo(bl)) == {x % bl for x in op.expand()} + div = random.randint(1, 64) + assert set(op.modulo(div)) == {x % div for x in op.expand()} + validate_numerically(op) + + +def _unittest_repetition() -> None: + from ._symbolic import RepetitionOperator + + op = RepetitionOperator( + NullaryOperator([7, 11, 17]), + 3, + ) + assert op.min == 7 * 3 + assert op.max == 17 * 3 + assert set(op.expand()) == set(map(sum, itertools.combinations_with_replacement([7, 11, 17], 3))) + assert set(op.expand()) == {21, 25, 29, 31, 33, 35, 39, 41, 45, 51} + assert set(op.modulo(7)) == {0, 1, 2, 3, 4, 5, 6} + assert set(op.modulo(8)) == {1, 3, 5, 7} + validate_numerically(op) + + for _ in range(10): + child = NullaryOperator(random.randint(0, 100) for _ in range(random.randint(0, 10))) + k = random.randint(0, 10) + ref = set(map(sum, itertools.combinations_with_replacement(child.expand(), k))) + op = RepetitionOperator(child, k) + assert set(op.expand()) == ref + + assert op.min == min(child.expand()) * k + assert op.max == max(child.expand()) * k + + div = random.randint(1, 64) + assert set(op.modulo(div)) == {typing.cast(int, x) % div for x in ref} + + validate_numerically(op) + + +def _unittest_range_repetition() -> None: + from ._symbolic import RangeRepetitionOperator + + op = RangeRepetitionOperator( + NullaryOperator([7, 11, 17]), + 3, + ) + assert op.min == 0 # Always 0 + assert op.max == 17 * 3 + assert set(op.expand()) == ( + {0} + | set(map(sum, itertools.combinations_with_replacement([7, 11, 17], 1))) + | set(map(sum, itertools.combinations_with_replacement([7, 11, 17], 2))) + | set(map(sum, itertools.combinations_with_replacement([7, 11, 17], 3))) + ) + assert set(op.expand()) == {0, 7, 11, 14, 17, 18, 21, 22, 24, 25, 28, 29, 31, 33, 34, 35, 39, 41, 45, 51} + assert set(op.modulo(7)) == {0, 1, 2, 3, 4, 5, 6} + validate_numerically(op) + + op = RangeRepetitionOperator( + NullaryOperator([7, 11]), + 2, + ) + assert op.min == 0 # Always 0 + assert op.max == 22 + assert set(op.expand()) == {0, 7, 14, 11, 18, 22} + assert set(op.modulo(7)) == {0, 1, 4} + assert set(op.modulo(8)) == {0, 2, 3, 6, 7} + validate_numerically(op) + + for _ in range(10): + child = NullaryOperator(random.randint(0, 100) for _ in range(random.randint(0, 10))) + k_max = random.randint(0, 10) + ref = set( + itertools.chain( + *(map(sum, itertools.combinations_with_replacement(child.expand(), k)) for k in range(k_max + 1)) + ) + ) + op = RangeRepetitionOperator(child, k_max) + assert set(op.expand()) == ref + + assert op.min == 0 + assert op.max == max(child.expand()) * k_max + + div = random.randint(1, 64) + assert set(op.modulo(div)) == {typing.cast(int, x) % div for x in ref} + + validate_numerically(op) + + +def _unittest_union() -> None: + from ._symbolic import UnionOperator + + op = UnionOperator( + [ + NullaryOperator([1, 2, 3]), + NullaryOperator([4, 5, 6]), + NullaryOperator([7, 8, 9]), + ] + ) + assert op.min == 1 + assert op.max == 9 + assert set(op.expand()) == {1, 2, 3, 4, 5, 6, 7, 8, 9} + assert set(op.modulo(8)) == {x % 8 for x in op.expand()} + validate_numerically(op) + + op = UnionOperator( + [ + NullaryOperator([13, 17, 21, 29]), + NullaryOperator([8, 16]), + ] + ) + assert op.min == 8 + assert op.max == 29 + assert set(op.expand()) == {13, 17, 21, 29, 8, 16} + assert set(op.modulo(7)) == {x % 7 for x in op.expand()} + assert set(op.modulo(8)) == {x % 8 for x in op.expand()} + validate_numerically(op) + + for _ in range(10): + op = UnionOperator( + [ + NullaryOperator(random.randint(0, 1024) for _ in range(random.randint(0, 10))) + for _ in range(random.randint(1, 10)) + ] + ) + validate_numerically(op) + + +def _unittest_repr() -> None: + from ._symbolic import ( + PaddingOperator, + ConcatenationOperator, + RepetitionOperator, + RangeRepetitionOperator, + UnionOperator, + ) + + op = UnionOperator( + [ + PaddingOperator(NullaryOperator([1, 2, 3, 4, 5, 6, 7, 8]), 4), + ConcatenationOperator( + [ + NullaryOperator([8, 16]), + NullaryOperator([96, 112, 120]), + RangeRepetitionOperator(NullaryOperator([64]), 8), + ] + ), + RepetitionOperator( + UnionOperator( + [ + NullaryOperator([32]), + NullaryOperator([40]), + ] + ), + 2, + ), + ] + ) + validate_numerically(op) + assert repr(op) == "(pad(4,{1,2,3,4,5,6,7,8})|concat({8,16},{96,112,120},repeat(<=8,{64}))|repeat(2,({32}|{40})))" From 9e23e6f7b864bf0f96a386273b6a81ac87b1ef0f Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Mon, 5 Apr 2021 23:08:04 +0300 Subject: [PATCH 05/21] Update the bit length set class to use the new analytical solver --- pydsdl/_bit_length_set/_bit_length_set.py | 337 +++++++++++++++------- pydsdl/_bit_length_set/_symbolic.py | 53 +++- pydsdl/_bit_length_set/_symbolic_test.py | 37 +-- 3 files changed, 298 insertions(+), 129 deletions(-) diff --git a/pydsdl/_bit_length_set/_bit_length_set.py b/pydsdl/_bit_length_set/_bit_length_set.py index 1262d74..bb094d9 100644 --- a/pydsdl/_bit_length_set/_bit_length_set.py +++ b/pydsdl/_bit_length_set/_bit_length_set.py @@ -3,7 +3,9 @@ # Author: Pavel Kirienko import typing +import warnings import itertools +from ._symbolic import Operator, NullaryOperator, MemoizationOperator class BitLengthSet: @@ -11,27 +13,32 @@ class BitLengthSet: This type represents the Bit Length Set as defined in the Specification. It is used for representing bit offsets of fields and bit lengths of serialized representations. - Instances are comparable between each other, with plain integers, and with native sets of integers. - The methods do not mutate the instance they are invoked on; instead, the result is returned as a new instance. + Most of the methods are evaluated analytically in nearly constant time rather than numerically. + This is critical for complex layouts where numerical methods break due to combinatorial explosion and/or memory + limits (see this discussed in https://github.com/UAVCAN/pydsdl/issues/23). + There are several methods that trigger numerical expansion of the solution; + due to the aforementioned combinatorial difficulties, they may be effectively incomputable in reasonable time, + so production systems should not rely on them. """ - def __init__(self, values: typing.Union[typing.Iterable[int], int] = 0): + def __init__(self, value: typing.Union[typing.Iterable[int], int, Operator, "BitLengthSet"] = 0): """ - Accepts any iterable that yields integers (like another bit length set) or a single integer, - in which case it will result in the set containing only the one specified integer. - The source container is always deep-copied. + Accepts any iterable that yields integers (like another bit length set) or a single integer. """ - if isinstance(values, set): - self._value = values # Do not convert if already a set - elif isinstance(values, int): - self._value = {values} + if isinstance(value, BitLengthSet): + self._op = value._op # type: Operator + elif isinstance(value, Operator): + self._op = MemoizationOperator(value) + elif isinstance(value, int): + self._op = NullaryOperator([value]) else: - self._value = set(map(int, values)) + self._op = NullaryOperator(value) + + # ======================================== QUERY METHODS ======================================== def is_aligned_at(self, bit_length: int) -> bool: """ - Checks whether all of the contained offset values match the specified alignment goal. - A zero-length bit length set is considered to have infinite alignment. + Shorthand for ``set(self % bit_length) == {0}``. >>> BitLengthSet(64).is_aligned_at(32) True @@ -42,7 +49,7 @@ def is_aligned_at(self, bit_length: int) -> bool: >>> BitLengthSet().is_aligned_at(1234567) True """ - return set(map(lambda x: x % bit_length, self._value)) == {0} + return set(self % bit_length) == {0} def is_aligned_at_byte(self) -> bool: """ @@ -57,9 +64,52 @@ def is_aligned_at_byte(self) -> bool: return self.is_aligned_at(SerializableType.BITS_PER_BYTE) + @property + def min(self) -> int: + """ + The smallest element in the set derived analytically. + + >>> BitLengthSet.concatenate([{1, 2, 3}, {4, 5, 6}, {7, 8, 9}]).min + 12 + """ + return self._op.min + + @property + def max(self) -> int: + """ + The largest element in the set derived analytically. + + >>> BitLengthSet.concatenate([{1, 2, 3}, {4, 5, 6}, {7, 8, 9}]).pad_to_alignment(8).max + 24 + """ + return self._op.max + + @property + def fixed_length(self) -> bool: + """ + Shorthand for ``self.min == self.max``. + """ + return self.min == self.max + + def __mod__(self, divisor: typing.Any) -> typing.Iterable[int]: + """ + Elementwise modulus derived analytically. + + >>> sorted(BitLengthSet([]) % 12345) # Empty set, always zero. + [0] + >>> sorted(BitLengthSet([8, 12, 16]) % 8) + [0, 4] + """ + if isinstance(divisor, int): + # The type is reported as iterable[int], not sure yet if we should specialize it further. Time will tell. + return BitLengthSet(self._op.modulo(divisor)) + return NotImplemented + + # ======================================== COMPOSITION METHODS ======================================== + def pad_to_alignment(self, bit_length: int) -> "BitLengthSet": """ - Pad each element in the set such that the set becomes aligned at the specified alignment goal. + Transform the bit length set expression such that the set becomes aligned at the specified alignment goal. After this transformation is applied, elements may become up to ``bit_length-1`` bits larger. The argument shall be a positive integer, otherwise it's a :class:`ValueError`. @@ -68,119 +118,196 @@ def pad_to_alignment(self, bit_length: int) -> "BitLengthSet": >>> BitLengthSet(randint(1, 1000) for _ in range(100)).pad_to_alignment(alignment).is_aligned_at(alignment) True """ - r = int(bit_length) - if r < 1: - raise ValueError("Invalid alignment: %r bits" % r) - assert r >= 1 - out = BitLengthSet(((x + r - 1) // r) * r for x in self) - assert not out or 0 <= min(out) - min(self) < r - assert not out or 0 <= max(out) - max(self) < r - assert len(out) <= len(self) - return out + from ._symbolic import PaddingOperator - def elementwise_sum_k_multicombinations(self, k: int) -> "BitLengthSet": + return BitLengthSet(PaddingOperator(self._op, bit_length)) + + def repeat(self, k: int) -> "BitLengthSet": """ - This is a special case of :meth:`elementwise_sum_cartesian_product`. - The original object is not modified. + Construct a new bit length set expression that repeats the current one the specified number of times. + This reflects the arrangement of fixed-length DSDL array elements. + + >>> sorted(BitLengthSet(1).repeat(1)) + [1] + >>> sorted(BitLengthSet({1, 2, 3}).repeat(1)) + [1, 2, 3] + >>> sorted(BitLengthSet({1, 2, 3}).repeat(2)) + [2, 3, 4, 5, 6] """ - k_multicombination = itertools.combinations_with_replacement(self, k) - elementwise_sums = map(sum, k_multicombination) - return BitLengthSet(elementwise_sums) # type: ignore + from ._symbolic import RepetitionOperator + + return BitLengthSet(RepetitionOperator(self._op, k)) + + def repeat_range(self, k: int) -> "BitLengthSet": + """ + >>> sorted(BitLengthSet({1, 2, 3}).repeat_range(2)) + [0, 1, 2, 3, 4, 5, 6] + """ + from ._symbolic import RangeRepetitionOperator + + return BitLengthSet(RangeRepetitionOperator(self._op, k)) @staticmethod - def elementwise_sum_cartesian_product( - sets: typing.Iterable[typing.Union[typing.Iterable[int], int]] - ) -> "BitLengthSet": + def concatenate(sets: typing.Iterable[typing.Union["BitLengthSet", typing.Iterable[int], int]]) -> "BitLengthSet": """ - This operation is fundamental for bit length and bit offset (which are, generally, the same thing) computation. + Construct a new bit length set expression that concatenates multiple bit length sets one after another. + This reflects the data fields arrangement in a DSDL structure type. + + >>> sorted(BitLengthSet.concatenate([1, 2, 10])) + [13] + >>> sorted(BitLengthSet.concatenate([{1, 2}, {4, 5}])) + [5, 6, 7] + >>> sorted(BitLengthSet.concatenate([{1, 2, 3}, {4, 5, 6}])) + [5, 6, 7, 8, 9] + >>> sorted(BitLengthSet.concatenate([{1, 2, 3}, {4, 5, 6}, {7, 8, 9}])) + [12, 13, 14, 15, 16, 17, 18] + """ + from ._symbolic import ConcatenationOperator + + op = ConcatenationOperator(BitLengthSet(s)._op for s in sets) + return BitLengthSet(op) - The basic background is explained in the specification. The idea is that the bit offset of a given entity - in a data type definition of the structure category (or, in other words, the bit length set of serialized - representations of the preceding entities, which is the same thing, assuming that the data type is of the - structure category) is a function of bit length sets of each preceding entity. Various combinations of - bit lengths of the preceding entities are possible, which can be expressed through the Cartesian product over - the bit length sets of the preceding entities. Since in a type of the structure category entities are arranged - as an ordered sequence of a fixed length (meaning that entities can't be added or removed), the resulting - bit length (offset) is computed by elementwise summation of each element of the Cartesian product. + @staticmethod + def unite(sets: typing.Iterable[typing.Union["BitLengthSet", typing.Iterable[int], int]]) -> "BitLengthSet": + """ + Construct a new bit length set expression that is a union of multiple bit length sets. + This reflects the data fields arrangement in a DSDL discriminated union. - This method is not applicable for the tagged union type category, since a tagged union holds exactly one - value at any moment; therefore, the bit length set of a tagged union is simply a union of bit length sets - of each entity that can be contained in the union, plus the length of the implicit union tag field. + >>> sorted(BitLengthSet.unite([1, 2, 10])) + [1, 2, 10] + >>> sorted(BitLengthSet.unite([{1, 2}, {2, 3}])) + [1, 2, 3] + """ + from ._symbolic import UnionOperator - From the standpoint of bit length combination analysis, fixed-length arrays are a special case of structures, - because they also contain a fixed ordered sequence of fields, where all fields are of the same type. - The method defined for structures applies to fixed-length arrays, but one should be aware that it may be - computationally suboptimal, since the fact that all array elements are of the same type allows us to replace - the computationally expensive Cartesian product with k-multicombinations (k-selections). + op = UnionOperator(BitLengthSet(s)._op for s in sets) + return BitLengthSet(op) - In the context of bit length analysis, variable-length arrays do not require any special treatment, since a - variable-length array with the capacity of N elements can be modeled as a tagged union containing - N fixed arrays of length from 1 to N, plus one empty field (representing the case of an empty variable-length - array). + def __add__(self, other: typing.Union["BitLengthSet", typing.Iterable[int], int]) -> "BitLengthSet": """ - cartesian_product = itertools.product(*list(map(BitLengthSet, sets))) - elementwise_sums = map(sum, cartesian_product) - return BitLengthSet(elementwise_sums) # type: ignore + A shorthand for ``concatenate([self, other])``. + One can easily see that if the argument is a set of one value (or a scalar), + this method will result in the addition of said scalar to every element of the original set. - def __iter__(self) -> typing.Iterator[int]: - return iter(self._value) + >>> sorted(BitLengthSet() + BitLengthSet()) + [0] + >>> sorted(BitLengthSet(4) + BitLengthSet(3)) + [7] + >>> sorted(BitLengthSet({4, 91}) + 3) + [7, 94] + >>> sorted(BitLengthSet({4, 91}) + {5, 7}) + [9, 11, 96, 98] + """ + return BitLengthSet.concatenate([self, other]) - def __len__(self) -> int: - """Cardinality.""" - return len(self._value) + def __radd__(self, other: typing.Union["BitLengthSet", typing.Iterable[int], int]) -> "BitLengthSet": + """ + See :meth:`__add__`. - def __eq__(self, other: typing.Any) -> bool: + >>> sorted({1, 2, 3} + BitLengthSet({4, 5, 6})) + [5, 6, 7, 8, 9] + >>> sorted(1 + BitLengthSet({2, 5, 7})) + [3, 6, 8] """ - Whether the current set equals the other. + return BitLengthSet.concatenate([other, self]) + + def __or__(self, other: typing.Union["BitLengthSet", typing.Iterable[int], int]) -> "BitLengthSet": """ - if isinstance(other, BitLengthSet): - return self._value == other._value - return NotImplemented + A shorthand for ``unite([self, other])``. + + >>> a = BitLengthSet() + >>> a = a | BitLengthSet({1, 2, 3}) + >>> sorted(a) + [0, 1, 2, 3] + >>> a = a | {3, 4, 5} + >>> sorted(a) + [0, 1, 2, 3, 4, 5] + >>> sorted(a | 6) + [0, 1, 2, 3, 4, 5, 6] + """ + return BitLengthSet.unite([self, other]) - def __add__(self, other: typing.Any) -> "BitLengthSet": + def __ror__(self, other: typing.Union["BitLengthSet", typing.Iterable[int], int]) -> "BitLengthSet": """ - This operation models the addition of a new object to a serialized representation; - i.e., it is an alias for ``elementwise_sum_cartesian_product([self, other])``. - The result is stored into a new instance which is returned. + See :meth:`__or__`. - If the argument is a bit length set, an elementwise sum set of the Cartesian product of the argument set - with the current set will be computed, and the result will be returned as a new set (self is not modified). - One can easily see that if the argument is a set of one value (or a scalar), - this method will result in the addition of said scalar to every element of the original set. + >>> sorted({1, 2, 3} | BitLengthSet({4, 5, 6})) + [1, 2, 3, 4, 5, 6] + >>> sorted(1 | BitLengthSet({2, 5, 7})) + [1, 2, 5, 7] """ - if isinstance(other, BitLengthSet): - return BitLengthSet.elementwise_sum_cartesian_product([self, other]) - return NotImplemented + return BitLengthSet.unite([other, self]) - def __radd__(self, other: typing.Any) -> "BitLengthSet": + # ======================================== SLOW NUMERICAL METHODS ======================================== + + def __iter__(self) -> typing.Iterator[int]: """ - See :meth:`__add__`. + .. attention:: + This method triggers slow numerical expansion. + + You might be tempted to use ``min(foo)`` or ``max(foo)`` for detecting length bounds. + This may be effectively incomputable for data types with complex layout. + Instead, use :attr:`min` and :attr:`max`. """ - if isinstance(other, BitLengthSet): - return other + self - return NotImplemented + return iter(self._op.expand()) - def __or__(self, other: typing.Any) -> "BitLengthSet": + def __len__(self) -> int: """ - Creates and returns a new set that is a union of this set with another bit length set. + .. attention:: + This method triggers slow numerical expansion. + + You might be tempted to use something like ``len(foo) == 1`` for detecting fixed-length sets. + This may be effectively incomputable for data types with complex layout. + Instead, use :attr:`fixed_length`. + + >>> len(BitLengthSet(0)) + 1 + >>> len(BitLengthSet([1, 2, 3])) + 3 """ - if isinstance(other, BitLengthSet): - return BitLengthSet(self._value | other._value) - return NotImplemented + return len(self._op.expand()) - def __ror__(self, other: typing.Any) -> "BitLengthSet": + def __eq__(self, other: typing.Any) -> bool: """ - See :meth:`__or__`. + .. attention:: + This method triggers slow numerical expansion. + + >>> BitLengthSet([1, 2, 3]) == {1, 2, 3} + True + >>> BitLengthSet([123]) == 123 + True """ - if isinstance(other, BitLengthSet): - return other | self - return NotImplemented + try: + return set(self) == set(BitLengthSet(other)) + except TypeError: + return NotImplemented + + # ======================================== AUXILIARY METHODS ======================================== def __str__(self) -> str: - return "{" + ", ".join(map(str, sorted(self._value))) + "}" + return str(self._op) def __repr__(self) -> str: - return type(self).__name__ + "(" + str(self or "") + ")" + return "%s(%s)" % (type(self).__name__, self) + + # ======================================== DEPRECATED METHODS ======================================== + + def elementwise_sum_k_multicombinations(self, k: int) -> "BitLengthSet": + """ + :meta private: + """ + warnings.warn(DeprecationWarning("Use repeat() instead")) + return self.repeat(k) + + @staticmethod + def elementwise_sum_cartesian_product( + sets: typing.Iterable[typing.Union[typing.Iterable[int], int]] + ) -> "BitLengthSet": + """ + :meta private: + """ + warnings.warn(DeprecationWarning("Use concatenate() instead")) + return BitLengthSet.concatenate(sets) def _unittest_bit_length_set() -> None: @@ -193,9 +320,9 @@ def _unittest_bit_length_set() -> None: assert BitLengthSet(123) == 123 assert BitLengthSet(123) != 124 assert not (BitLengthSet(123) == "123") # pylint: disable=unneeded-not - assert str(BitLengthSet()) == "{}" + assert str(BitLengthSet()) == "{0}" assert str(BitLengthSet(123)) == "{123}" - assert str(BitLengthSet((123, 0, 456, 12))) == "{0, 12, 123, 456}" # Always sorted! + assert str(BitLengthSet((123, 0, 456, 12))) == "{0,12,123,456}" # Always sorted! assert BitLengthSet().is_aligned_at(1) assert BitLengthSet().is_aligned_at(1024) assert BitLengthSet(8).is_aligned_at_byte() @@ -214,24 +341,16 @@ def _unittest_bit_length_set() -> None: assert {1, 2, 3} + BitLengthSet([4, 5, 6]) == {5, 6, 7, 8, 9} with raises(TypeError): - assert BitLengthSet([4, 5, 6]) + "1" - - with raises(TypeError): - assert "1" + BitLengthSet([4, 5, 6]) - - with raises(TypeError): - s = BitLengthSet([4, 5, 6]) - s += "1" + assert BitLengthSet([4, 5, 6]) + "a" with raises(TypeError): - assert "1" | BitLengthSet([4, 5, 6]) + assert "a" + BitLengthSet([4, 5, 6]) with raises(TypeError): - assert BitLengthSet([4, 5, 6]) | "1" + assert "a" | BitLengthSet([4, 5, 6]) with raises(TypeError): - s = BitLengthSet([4, 5, 6]) - s |= "1" + assert BitLengthSet([4, 5, 6]) | "a" with raises(ValueError): BitLengthSet([4, 5, 6]).pad_to_alignment(0) diff --git a/pydsdl/_bit_length_set/_symbolic.py b/pydsdl/_bit_length_set/_symbolic.py index dcb55fc..ed849b6 100644 --- a/pydsdl/_bit_length_set/_symbolic.py +++ b/pydsdl/_bit_length_set/_symbolic.py @@ -9,6 +9,10 @@ class Operator(abc.ABC): + """ + Operators are immutable. This allows for aggressive caching and reference-sharing. + """ + @abc.abstractmethod def modulo(self, divisor: int) -> typing.Iterable[int]: """ @@ -47,7 +51,10 @@ class NullaryOperator(Operator): """ def __init__(self, values: typing.Iterable[int]) -> None: - self._value = frozenset(values) or frozenset({0}) + self._value = set(values) or {0} + for x in self._value: + if not isinstance(x, int): + raise TypeError("Invalid element for nullary set operator: %r" % x) def modulo(self, divisor: int) -> typing.Iterable[int]: return map(lambda x: x % divisor, self._value) @@ -60,7 +67,7 @@ def min(self) -> int: def max(self) -> int: return max(self._value) - def expand(self) -> typing.Iterable[int]: + def expand(self) -> typing.Set[int]: return self._value def __repr__(self) -> str: @@ -73,7 +80,7 @@ class PaddingOperator(Operator): """ def __init__(self, child: Operator, alignment: int) -> None: - if alignment < 1: # pragma: no cover + if alignment < 1: raise ValueError("Invalid alignment: %r bits" % alignment) self._child = child self._padding = int(alignment) @@ -232,6 +239,46 @@ def __repr__(self) -> str: return "(%s)" % "|".join(map(repr, self._children)) +class MemoizationOperator(Operator): + """ + This is a no-op transparent lazy cache on top of the child operator. + """ + + def __init__(self, child: Operator) -> None: + self._child = child + self._min = None # type: typing.Optional[int] + self._max = None # type: typing.Optional[int] + self._modula = {} # type: typing.Dict[int, typing.Set[int]] + self._expansion = None # type: typing.Optional[typing.Set[int]] + + def modulo(self, divisor: int) -> typing.Set[int]: + try: + return self._modula[divisor] + except LookupError: + self._modula[divisor] = set(self._child.modulo(divisor)) + return self._modula[divisor] + + @property + def min(self) -> int: + if self._min is None: + self._min = self._child.min + return self._min + + @property + def max(self) -> int: + if self._max is None: + self._max = self._child.max + return self._max + + def expand(self) -> typing.Set[int]: + if self._expansion is None: + self._expansion = set(self._child.expand()) + return self._expansion + + def __repr__(self) -> str: + return repr(self._child) # Not sure if we should indicate our presence considering that we're a no-op + + def validate_numerically(op: Operator) -> None: """ Validates the correctness of symbolic derivations by comparing the results against reference values diff --git a/pydsdl/_bit_length_set/_symbolic_test.py b/pydsdl/_bit_length_set/_symbolic_test.py index dbba737..eb221e7 100644 --- a/pydsdl/_bit_length_set/_symbolic_test.py +++ b/pydsdl/_bit_length_set/_symbolic_test.py @@ -249,28 +249,31 @@ def _unittest_repr() -> None: RepetitionOperator, RangeRepetitionOperator, UnionOperator, + MemoizationOperator, ) - op = UnionOperator( - [ - PaddingOperator(NullaryOperator([1, 2, 3, 4, 5, 6, 7, 8]), 4), - ConcatenationOperator( - [ - NullaryOperator([8, 16]), - NullaryOperator([96, 112, 120]), - RangeRepetitionOperator(NullaryOperator([64]), 8), - ] - ), - RepetitionOperator( - UnionOperator( + op = MemoizationOperator( + UnionOperator( + [ + PaddingOperator(NullaryOperator([1, 2, 3, 4, 5, 6, 7, 8]), 4), + ConcatenationOperator( [ - NullaryOperator([32]), - NullaryOperator([40]), + NullaryOperator([8, 16]), + NullaryOperator([96, 112, 120]), + RangeRepetitionOperator(NullaryOperator([64]), 8), ] ), - 2, - ), - ] + RepetitionOperator( + UnionOperator( + [ + NullaryOperator([32]), + NullaryOperator([40]), + ] + ), + 2, + ), + ] + ) ) validate_numerically(op) assert repr(op) == "(pad(4,{1,2,3,4,5,6,7,8})|concat({8,16},{96,112,120},repeat(<=8,{64}))|repeat(2,({32}|{40})))" From 3bd97367bd6133e4aac2d9c9e166312efab412e4 Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Tue, 6 Apr 2021 00:17:40 +0300 Subject: [PATCH 06/21] Update the bit length set usage --- pydsdl/_bit_length_set/_bit_length_set.py | 34 ++++--- pydsdl/_bit_length_set/_symbolic.py | 4 +- pydsdl/_bit_length_set/_symbolic_test.py | 22 ++--- pydsdl/_data_type_builder.py | 2 + pydsdl/_serializable/_array.py | 47 +++------ pydsdl/_serializable/_composite.py | 115 +++++++++++----------- pydsdl/_serializable/_serializable.py | 2 +- 7 files changed, 106 insertions(+), 120 deletions(-) diff --git a/pydsdl/_bit_length_set/_bit_length_set.py b/pydsdl/_bit_length_set/_bit_length_set.py index bb094d9..b3637b4 100644 --- a/pydsdl/_bit_length_set/_bit_length_set.py +++ b/pydsdl/_bit_length_set/_bit_length_set.py @@ -4,7 +4,6 @@ import typing import warnings -import itertools from ._symbolic import Operator, NullaryOperator, MemoizationOperator @@ -19,9 +18,11 @@ class BitLengthSet: There are several methods that trigger numerical expansion of the solution; due to the aforementioned combinatorial difficulties, they may be effectively incomputable in reasonable time, so production systems should not rely on them. + + Instances are guaranteed to be immutable. """ - def __init__(self, value: typing.Union[typing.Iterable[int], int, Operator, "BitLengthSet"] = 0): + def __init__(self, value: typing.Union[typing.Iterable[int], int, Operator, "BitLengthSet"]): """ Accepts any iterable that yields integers (like another bit length set) or a single integer. """ @@ -46,7 +47,7 @@ def is_aligned_at(self, bit_length: int) -> bool: False >>> BitLengthSet(48).is_aligned_at(16) True - >>> BitLengthSet().is_aligned_at(1234567) + >>> BitLengthSet(0).is_aligned_at(1234567) True """ return set(self % bit_length) == {0} @@ -95,7 +96,7 @@ def __mod__(self, divisor: typing.Any) -> typing.Iterable[int]: """ Elementwise modulus derived analytically. - >>> sorted(BitLengthSet([]) % 12345) # Empty set, always zero. + >>> sorted(BitLengthSet([0]) % 12345) [0] >>> sorted(BitLengthSet([8, 12, 16]) % 8) [0, 4] @@ -126,7 +127,10 @@ def repeat(self, k: int) -> "BitLengthSet": """ Construct a new bit length set expression that repeats the current one the specified number of times. This reflects the arrangement of fixed-length DSDL array elements. + This is a special case of :meth:`concatenate`. + >>> sorted(BitLengthSet(1).repeat(0)) + [0] >>> sorted(BitLengthSet(1).repeat(1)) [1] >>> sorted(BitLengthSet({1, 2, 3}).repeat(1)) @@ -138,14 +142,16 @@ def repeat(self, k: int) -> "BitLengthSet": return BitLengthSet(RepetitionOperator(self._op, k)) - def repeat_range(self, k: int) -> "BitLengthSet": + def repeat_range(self, k_max: int) -> "BitLengthSet": """ + This is like :meth:`repeat` but ``k`` spans the range ``[0, k_max]``. + >>> sorted(BitLengthSet({1, 2, 3}).repeat_range(2)) [0, 1, 2, 3, 4, 5, 6] """ from ._symbolic import RangeRepetitionOperator - return BitLengthSet(RangeRepetitionOperator(self._op, k)) + return BitLengthSet(RangeRepetitionOperator(self._op, k_max)) @staticmethod def concatenate(sets: typing.Iterable[typing.Union["BitLengthSet", typing.Iterable[int], int]]) -> "BitLengthSet": @@ -189,7 +195,7 @@ def __add__(self, other: typing.Union["BitLengthSet", typing.Iterable[int], int] One can easily see that if the argument is a set of one value (or a scalar), this method will result in the addition of said scalar to every element of the original set. - >>> sorted(BitLengthSet() + BitLengthSet()) + >>> sorted(BitLengthSet(0) + BitLengthSet(0)) [0] >>> sorted(BitLengthSet(4) + BitLengthSet(3)) [7] @@ -215,7 +221,7 @@ def __or__(self, other: typing.Union["BitLengthSet", typing.Iterable[int], int]) """ A shorthand for ``unite([self, other])``. - >>> a = BitLengthSet() + >>> a = BitLengthSet(0) >>> a = a | BitLengthSet({1, 2, 3}) >>> sorted(a) [0, 1, 2, 3] @@ -313,18 +319,18 @@ def elementwise_sum_cartesian_product( def _unittest_bit_length_set() -> None: from pytest import raises - assert BitLengthSet() == BitLengthSet() - assert not (BitLengthSet() != BitLengthSet()) # pylint: disable=unneeded-not + assert BitLengthSet(0) == BitLengthSet(0) + assert not (BitLengthSet(0) != BitLengthSet(0)) # pylint: disable=unneeded-not assert BitLengthSet(123) == BitLengthSet([123]) assert BitLengthSet(123) != BitLengthSet(124) assert BitLengthSet(123) == 123 assert BitLengthSet(123) != 124 assert not (BitLengthSet(123) == "123") # pylint: disable=unneeded-not - assert str(BitLengthSet()) == "{0}" + assert str(BitLengthSet(0)) == "{0}" assert str(BitLengthSet(123)) == "{123}" assert str(BitLengthSet((123, 0, 456, 12))) == "{0,12,123,456}" # Always sorted! - assert BitLengthSet().is_aligned_at(1) - assert BitLengthSet().is_aligned_at(1024) + assert BitLengthSet(0).is_aligned_at(1) + assert BitLengthSet(0).is_aligned_at(1024) assert BitLengthSet(8).is_aligned_at_byte() assert not BitLengthSet(8).is_aligned_at(16) @@ -334,7 +340,7 @@ def _unittest_bit_length_set() -> None: s = s + {0, 4, 8} assert s == {8, 16, 12, 20, 24} - assert BitLengthSet() + BitLengthSet() == BitLengthSet() + assert BitLengthSet(0) + BitLengthSet(0) == BitLengthSet(0) assert BitLengthSet(4) + BitLengthSet(3) == {7} assert BitLengthSet({4, 91}) + 3 == {7, 94} assert BitLengthSet(7) + {12, 15} == {19, 22} diff --git a/pydsdl/_bit_length_set/_symbolic.py b/pydsdl/_bit_length_set/_symbolic.py index ed849b6..133e154 100644 --- a/pydsdl/_bit_length_set/_symbolic.py +++ b/pydsdl/_bit_length_set/_symbolic.py @@ -51,7 +51,9 @@ class NullaryOperator(Operator): """ def __init__(self, values: typing.Iterable[int]) -> None: - self._value = set(values) or {0} + self._value = set(values) + if not self._value: + raise ValueError("A bit length set cannot be empty. Did you mean to pass {0}?") for x in self._value: if not isinstance(x, int): raise TypeError("Invalid element for nullary set operator: %r" % x) diff --git a/pydsdl/_bit_length_set/_symbolic_test.py b/pydsdl/_bit_length_set/_symbolic_test.py index eb221e7..ae17494 100644 --- a/pydsdl/_bit_length_set/_symbolic_test.py +++ b/pydsdl/_bit_length_set/_symbolic_test.py @@ -9,7 +9,7 @@ def _unittest_nullary() -> None: - op = NullaryOperator([]) + op = NullaryOperator([0]) assert set(op.expand()) == {0} assert set(op.modulo(12345)) == {0} assert op.min == op.max == 0 @@ -44,8 +44,8 @@ def _unittest_padding() -> None: assert set(x % 7 for x in op.expand()) == {1, 4, 5} # Reference assert set(op.modulo(7)) == {1, 4, 5} - for _ in range(100): - child = NullaryOperator(random.randint(0, 1024) for _ in range(random.randint(0, 100))) + for _ in range(1): + child = NullaryOperator(random.randint(0, 1024) for _ in range(random.randint(1, 100))) alignment = random.randint(1, 64) op = PaddingOperator(child, alignment) div = random.randint(1, 64) @@ -110,10 +110,10 @@ def _unittest_concatenation() -> None: assert set(op.modulo(8)) == {0, 1, 2, 4, 5, 6, 7} # 3 is missing validate_numerically(op) - for _ in range(10): + for _ in range(1): op = ConcatenationOperator( [ - NullaryOperator(random.randint(0, 1024) for _ in range(random.randint(0, 10))) + NullaryOperator(random.randint(0, 1024) for _ in range(random.randint(1, 10))) for _ in range(random.randint(1, 10)) ] ) @@ -137,8 +137,8 @@ def _unittest_repetition() -> None: assert set(op.modulo(8)) == {1, 3, 5, 7} validate_numerically(op) - for _ in range(10): - child = NullaryOperator(random.randint(0, 100) for _ in range(random.randint(0, 10))) + for _ in range(1): + child = NullaryOperator(random.randint(0, 100) for _ in range(random.randint(1, 10))) k = random.randint(0, 10) ref = set(map(sum, itertools.combinations_with_replacement(child.expand(), k))) op = RepetitionOperator(child, k) @@ -183,8 +183,8 @@ def _unittest_range_repetition() -> None: assert set(op.modulo(8)) == {0, 2, 3, 6, 7} validate_numerically(op) - for _ in range(10): - child = NullaryOperator(random.randint(0, 100) for _ in range(random.randint(0, 10))) + for _ in range(1): + child = NullaryOperator(random.randint(0, 100) for _ in range(random.randint(1, 10))) k_max = random.randint(0, 10) ref = set( itertools.chain( @@ -232,10 +232,10 @@ def _unittest_union() -> None: assert set(op.modulo(8)) == {x % 8 for x in op.expand()} validate_numerically(op) - for _ in range(10): + for _ in range(1): op = UnionOperator( [ - NullaryOperator(random.randint(0, 1024) for _ in range(random.randint(0, 10))) + NullaryOperator(random.randint(0, 1024) for _ in range(random.randint(1, 10))) for _ in range(random.randint(1, 10)) ] ) diff --git a/pydsdl/_data_type_builder.py b/pydsdl/_data_type_builder.py index 99ff460..c6f81ac 100644 --- a/pydsdl/_data_type_builder.py +++ b/pydsdl/_data_type_builder.py @@ -181,6 +181,8 @@ def resolve_top_level_identifier(self, name: str) -> _expression.Any: if name == "_offset_": bls = self._structs[-1].offset assert len(bls) > 0 and all(map(lambda x: isinstance(x, int), bls)) + # FIXME: THIS OPERATION TRIGGERS NUMERICAL EXPANSION OF THE BIT LENGTH SET. + # TODO: INTEGRATE THE SET EXPRESSION WITH THE BIT LENGTH SET SOLVER TO IMPROVE PERFORMANCE. return _expression.Set(map(_expression.Rational, bls)) raise UndefinedIdentifierError("Undefined identifier: %r" % name) diff --git a/pydsdl/_serializable/_array.py b/pydsdl/_serializable/_array.py index 108390d..4a54853 100644 --- a/pydsdl/_serializable/_array.py +++ b/pydsdl/_serializable/_array.py @@ -59,16 +59,12 @@ def __str__(self) -> str: # pragma: no cover class FixedLengthArrayType(ArrayType): def __init__(self, element_type: SerializableType, capacity: int): super().__init__(element_type, capacity) + self._bls = self.element_type.bit_length_set.repeat(self.capacity) + assert self._bls.is_aligned_at(self.alignment_requirement) @property def bit_length_set(self) -> BitLengthSet: - # This can be further generalized as a Cartesian product of the element type's bit length set taken N times, - # where N is the capacity of the array. However, we avoid such generalization because it leads to a mild - # combinatorial explosion even with small arrays, resorting to this special case instead. The difference in - # performance measured on the standard data type set was about tenfold. - return self.element_type.bit_length_set.elementwise_sum_k_multicombinations(self.capacity).pad_to_alignment( - self.alignment_requirement - ) + return self._bls def enumerate_elements_with_offsets( self, base_offset: typing.Optional[BitLengthSet] = None @@ -85,18 +81,10 @@ def enumerate_elements_with_offsets( of the array element (zero-based) and its offset as a bit length set. """ base_offset = BitLengthSet(base_offset or 0).pad_to_alignment(self.alignment_requirement) - _self_test_base_offset = BitLengthSet(0) for index in range(self.capacity): - assert base_offset.is_aligned_at( - self.element_type.alignment_requirement - ), "The bit length set of the element type computed incorrectly: length % alignment = 0 does not hold." - yield index, BitLengthSet(base_offset) # We yield a copy of the offset to prevent mutation - base_offset += self.element_type.bit_length_set - - # This is only for ensuring that the logic is functioning as intended. - # Combinatorial transformations are easy to mess up, so we have to employ defensive programming. - assert self.element_type.bit_length_set.elementwise_sum_k_multicombinations(index) == _self_test_base_offset - _self_test_base_offset += self.element_type.bit_length_set + offset = base_offset + self.element_type.bit_length_set.repeat(index) + assert offset.is_aligned_at(self.element_type.alignment_requirement) + yield index, offset def __str__(self) -> str: return "%s[%d]" % (self.element_type, self.capacity) @@ -151,16 +139,13 @@ def __init__(self, element_type: SerializableType, capacity: int): self._length_field_type = UnsignedIntegerType(length_field_length, PrimitiveType.CastMode.TRUNCATED) + self._bls = self.length_field_type.bit_length + self.element_type.bit_length_set.repeat_range(self.capacity) + assert self._bls.is_aligned_at(self.alignment_requirement) + @property def bit_length_set(self) -> BitLengthSet: - # Can't use @cached_property because it is unavailable before Python 3.8 and it breaks Sphinx and MyPy. - # Caching is important because bit length set derivation is a very expensive operation. - att = "_8467150963" - if not hasattr(self, att): - setattr(self, att, self._compute_bit_length_set()) - out = getattr(self, att) - assert isinstance(out, BitLengthSet) - return out + assert self._bls.is_aligned_at(self.alignment_requirement) + return self._bls @property def string_like(self) -> bool: @@ -177,16 +162,6 @@ def length_field_type(self) -> UnsignedIntegerType: assert self._length_field_type.bit_length % self.element_type.alignment_requirement == 0 return self._length_field_type - def _compute_bit_length_set(self) -> BitLengthSet: - # Please refer to the corresponding implementation for the fixed-length array. - # The idea here is that we treat the variable-length array as a combination of fixed-length arrays of - # different sizes, from zero elements up to the maximum number of elements. - output = BitLengthSet() - for capacity in range(self.capacity + 1): - output |= self.element_type.bit_length_set.elementwise_sum_k_multicombinations(capacity) - output += self.length_field_type.bit_length - return output.pad_to_alignment(self.alignment_requirement) - def __str__(self) -> str: return "%s[<=%d]" % (self.element_type, self.capacity) diff --git a/pydsdl/_serializable/_composite.py b/pydsdl/_serializable/_composite.py index 0a428da..84121eb 100644 --- a/pydsdl/_serializable/_composite.py +++ b/pydsdl/_serializable/_composite.py @@ -180,9 +180,9 @@ def extent(self) -> int: The amount of memory, in bits, that needs to be allocated in order to store a serialized representation of this type or any of its minor versions under the same major version. This value is always at least as large as the sum of maximum bit lengths of all fields padded to one byte. - If the type is sealed, its extent equals ``max(bit_length_set)``. + If the type is sealed, its extent equals ``bit_length_set.max``. """ - return max(self.bit_length_set or {0}) + return self.bit_length_set.max @property def bit_length_set(self) -> BitLengthSet: @@ -291,9 +291,6 @@ def iterate_fields_with_offsets( The base offset will be implicitly padded out to :attr:`alignment_requirement`. :return: A generator of ``(Field, BitLengthSet)``. - Each instance of :class:`pydsdl.BitLengthSet` yielded by the generator is a dedicated copy, - meaning that the consumer can mutate the returned instances arbitrarily without affecting future values. - It is guaranteed that each yielded instance is non-empty. """ raise NotImplementedError @@ -381,16 +378,13 @@ def __init__( # pylint: disable=too-many-arguments self._compute_tag_bit_length([x.data_type for x in self.fields]), PrimitiveType.CastMode.TRUNCATED ) + self._bls = self.aggregate_bit_length_sets( + [f.data_type for f in self.fields], + ).pad_to_alignment(self.alignment_requirement) + @property def bit_length_set(self) -> BitLengthSet: - # Can't use @cached_property because it is unavailable before Python 3.8 and it breaks Sphinx and MyPy. - att = "_8579621435" - if not hasattr(self, att): - agr = self.aggregate_bit_length_sets - setattr(self, att, agr([f.data_type for f in self.fields]).pad_to_alignment(self.alignment_requirement)) - out = getattr(self, att) - assert isinstance(out, BitLengthSet) - return out + return self._bls @property def number_of_variants(self) -> int: @@ -408,11 +402,13 @@ def iterate_fields_with_offsets( self, base_offset: typing.Optional[BitLengthSet] = None ) -> typing.Iterator[typing.Tuple[Field, BitLengthSet]]: """See the base class.""" - base_offset = BitLengthSet(base_offset or {0}).pad_to_alignment(self.alignment_requirement) - base_offset += self.tag_field_type.bit_length + offset = ( + BitLengthSet(base_offset or {0}).pad_to_alignment(self.alignment_requirement) + + self.tag_field_type.bit_length + ) for f in self.fields: # Same offset for every field, because it's a tagged union, not a struct - assert base_offset.is_aligned_at(f.data_type.alignment_requirement) - yield f, BitLengthSet(base_offset) # We yield a copy of the offset to prevent mutation + assert offset.is_aligned_at(f.data_type.alignment_requirement) + yield f, offset @staticmethod def aggregate_bit_length_sets(field_types: typing.Sequence[SerializableType]) -> BitLengthSet: @@ -436,11 +432,7 @@ def aggregate_bit_length_sets(field_types: typing.Sequence[SerializableType]) -> return BitLengthSet(ms[0]) tbl = UnionType._compute_tag_bit_length(field_types) - out = BitLengthSet() - for s in ms: - out |= s + tbl - assert len(out) > 0, "Empty sets forbidden" - return out + return tbl + BitLengthSet.unite(ms) @staticmethod def _compute_tag_bit_length(field_types: typing.Sequence[SerializableType]) -> int: @@ -459,38 +451,55 @@ class StructureType(CompositeType): A message type that is NOT marked ``@union``. """ + def __init__( # pylint: disable=too-many-arguments + self, + name: str, + version: Version, + attributes: typing.Iterable[Attribute], + deprecated: bool, + fixed_port_id: typing.Optional[int], + source_file_path: str, + has_parent_service: bool, + doc: str = "", + ): + super().__init__( + name=name, + version=version, + attributes=attributes, + deprecated=deprecated, + fixed_port_id=fixed_port_id, + source_file_path=source_file_path, + has_parent_service=has_parent_service, + doc=doc, + ) + self._bls = self.aggregate_bit_length_sets( + [f.data_type for f in self.fields], + ).pad_to_alignment(self.alignment_requirement) + def iterate_fields_with_offsets( self, base_offset: typing.Optional[BitLengthSet] = None ) -> typing.Iterator[typing.Tuple[Field, BitLengthSet]]: """See the base class.""" - base_offset = BitLengthSet(base_offset or 0).pad_to_alignment(self.alignment_requirement) + offset = BitLengthSet(base_offset or 0).pad_to_alignment(self.alignment_requirement) for f in self.fields: - base_offset = base_offset.pad_to_alignment(f.data_type.alignment_requirement) - yield f, BitLengthSet(base_offset) # We yield a copy of the offset to prevent mutation - base_offset += f.data_type.bit_length_set + offset = offset.pad_to_alignment(f.data_type.alignment_requirement) + yield f, offset + offset = offset + f.data_type.bit_length_set @property def bit_length_set(self) -> BitLengthSet: - # Can't use @cached_property because it is unavailable before Python 3.8 and it breaks Sphinx and MyPy. - att = "_7953874601" - if not hasattr(self, att): - agr = self.aggregate_bit_length_sets - setattr(self, att, agr([f.data_type for f in self.fields]).pad_to_alignment(self.alignment_requirement)) - out = getattr(self, att) - assert isinstance(out, BitLengthSet) - return out + return self._bls @staticmethod def aggregate_bit_length_sets(field_types: typing.Sequence[SerializableType]) -> BitLengthSet: """ Computes the bit length set for a structure type given the type of each of its fields. - The final padding is not applied. + The final padding is not applied (but inter-field padding obviously is). """ - bls = BitLengthSet() - for t in field_types: - bls = bls.pad_to_alignment(t.alignment_requirement) - bls += t.bit_length_set - return bls or BitLengthSet(0) # Empty bit length sets are forbidden + bls = field_types[0].bit_length_set if len(field_types) > 0 else BitLengthSet(0) + for t in field_types[1:]: + bls = bls.pad_to_alignment(t.alignment_requirement) + t.bit_length_set + return bls class DelimitedType(CompositeType): @@ -544,15 +553,16 @@ def __init__(self, inner: CompositeType, extent: int): delimiter_header_bit_length, UnsignedIntegerType.CastMode.TRUNCATED ) + self._bls = self.delimiter_header_type.bit_length + BitLengthSet(self.alignment_requirement).repeat_range( + self._extent // self.alignment_requirement + ) + assert self.extent % self.BITS_PER_BYTE == 0 assert self.extent % self.alignment_requirement == 0 assert self.extent >= self.inner_type.extent - assert len(self.bit_length_set) > 0 assert self.bit_length_set.is_aligned_at_byte() assert self.bit_length_set.is_aligned_at(self.alignment_requirement) - assert ( - not self.bit_length_set or self.extent >= max(self.bit_length_set) - self.delimiter_header_type.bit_length - ) + assert self.extent >= (self.bit_length_set.max - self.delimiter_header_type.bit_length) assert self.has_parent_service == inner.has_parent_service @property @@ -585,17 +595,7 @@ def bit_length_set(self) -> BitLengthSet: For example, a type that contains a single field of type ``uint32[2]`` would have the bit length set of ``{h, h+8, h+16, ..., h+56, h+64}`` where ``h`` is the length of the delimiter header. """ - # Can't use @cached_property because it is unavailable before Python 3.8 and it breaks Sphinx and MyPy. - att = "_3476583631" - if not hasattr(self, att): - x = ( - BitLengthSet(range(self.extent + 1)).pad_to_alignment(self.alignment_requirement) - + self.delimiter_header_type.bit_length - ) - setattr(self, att, x) - out = getattr(self, att) - assert isinstance(out, BitLengthSet) - return out + return self._bls @property def delimiter_header_type(self) -> UnsignedIntegerType: @@ -864,6 +864,7 @@ def try_union_fields(field_types: typing.List[SerializableType]) -> UnionType: assert u.bit_length_set == {24} assert u.extent == 24 assert DelimitedType(u, 40).extent == 40 + assert set(DelimitedType(u, 40).bit_length_set) == {32, 40, 48, 56, 64, 72} assert DelimitedType(u, 40).bit_length_set == {32, 40, 48, 56, 64, 72} assert DelimitedType(u, 24).extent == 24 assert DelimitedType(u, 24).bit_length_set == {32, 40, 48, 56} @@ -1191,9 +1192,9 @@ def validate_iterator( ) # Ensuring the equivalency between bit length and aligned bit offset - b_offset = BitLengthSet() + b_offset = BitLengthSet(0) for f in b.fields: - b_offset += f.data_type.bit_length_set + b_offset = b_offset + f.data_type.bit_length_set print("b_offset:", b_offset) assert b_offset.pad_to_alignment(8) == b.bit_length_set assert not b_offset.is_aligned_at_byte() diff --git a/pydsdl/_serializable/_serializable.py b/pydsdl/_serializable/_serializable.py index 01bc491..eed1595 100644 --- a/pydsdl/_serializable/_serializable.py +++ b/pydsdl/_serializable/_serializable.py @@ -75,7 +75,7 @@ def __hash__(self) -> int: try: bls = self.bit_length_set except TypeError: # If the type is non-serializable. - bls = BitLengthSet() + bls = BitLengthSet(0) return hash(str(self) + str(bls)) def __eq__(self, other: object) -> bool: From 8bb423690daa76bd761117165fa45adb6741354d Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Tue, 6 Apr 2021 00:52:27 +0300 Subject: [PATCH 07/21] Log slow operations to help with migrating Nunavut to the new solver API --- pydsdl/__init__.py | 4 ++-- pydsdl/_bit_length_set/_symbolic.py | 25 +++++++++++++++++++++++++ pydsdl/_serializable/_composite.py | 2 +- setup.cfg | 2 ++ 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/pydsdl/__init__.py b/pydsdl/__init__.py index 7714a31..d8d09ad 100644 --- a/pydsdl/__init__.py +++ b/pydsdl/__init__.py @@ -7,8 +7,8 @@ import os as _os import sys as _sys -__version__ = "1.11.0" -__version_info__ = tuple(map(int, __version__.split("."))) +__version__ = "1.12.0.dev0" +__version_info__ = tuple(map(int, __version__.split(".")[:3])) __license__ = "MIT" __author__ = "UAVCAN Consortium" __copyright__ = "Copyright (c) 2018 UAVCAN Consortium" diff --git a/pydsdl/_bit_length_set/_symbolic.py b/pydsdl/_bit_length_set/_symbolic.py index 133e154..f8cb04c 100644 --- a/pydsdl/_bit_length_set/_symbolic.py +++ b/pydsdl/_bit_length_set/_symbolic.py @@ -5,6 +5,7 @@ import abc import math import typing +import logging import itertools @@ -244,6 +245,7 @@ def __repr__(self) -> str: class MemoizationOperator(Operator): """ This is a no-op transparent lazy cache on top of the child operator. + It also logs a stack trace if the child takes too long to expand to help with bottleneck optimization. """ def __init__(self, child: Operator) -> None: @@ -274,7 +276,27 @@ def max(self) -> int: def expand(self) -> typing.Set[int]: if self._expansion is None: + from time import monotonic + + # Track the time and log occurrences that take a long time to help find bottlenecks in user code + # that accidentally relies on numerical expansion. This is mainly intended to help us transition + # Nunavut to the new solver API instead of numerical methods. It may be removed later. + started_at = monotonic() self._expansion = set(self._child.expand()) + elapsed = monotonic() - started_at + if elapsed > 2.0: + _logger.info( + "Numerical expansion took %.1f seconds; the result contains %d items:\n%s", + elapsed, + len(self._expansion), + self._child, + ) + + # Since we did an expansion anyway, the set must be compact, + # so we use this opportunity to validate the correctness of the solver. + # This may be removed easily since it has no visible effects. + validate_numerically(self) + return self._expansion def __repr__(self) -> str: @@ -293,3 +315,6 @@ def validate_numerically(op: Operator) -> None: assert max(s) == op.max for div in range(1, 65): assert set(op.modulo(div)) == {x % div for x in s} + + +_logger = logging.getLogger(__name__) diff --git a/pydsdl/_serializable/_composite.py b/pydsdl/_serializable/_composite.py index 84121eb..1ae2f39 100644 --- a/pydsdl/_serializable/_composite.py +++ b/pydsdl/_serializable/_composite.py @@ -1155,7 +1155,7 @@ def validate_iterator( ), ( "x", - { # The lone "+2" is for the variable-length array's implicit length field + { # The lone "+8" is for the variable-length array's implicit length field # First length option of z a_bls_padded[0] + 8 + a_bls_padded[0] * 0, # suka a_bls_padded[0] + 8 + a_bls_padded[1] * 0, diff --git a/setup.cfg b/setup.cfg index d29f547..357699b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,6 +40,8 @@ norecursedirs = third_party python_files = *.py python_classes = _UnitTest python_functions = _unittest_ +log_file = pytest.log +log_level = DEBUG log_cli = true log_cli_level = WARNING addopts = --doctest-modules -v -p no:unraisableexception From fab989ac7c0b136a3bd6461902fb28bb0de29a58 Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Tue, 6 Apr 2021 00:58:09 +0300 Subject: [PATCH 08/21] Support Python before 3.9 --- pydsdl/_bit_length_set/_symbolic.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pydsdl/_bit_length_set/_symbolic.py b/pydsdl/_bit_length_set/_symbolic.py index f8cb04c..d932f0d 100644 --- a/pydsdl/_bit_length_set/_symbolic.py +++ b/pydsdl/_bit_length_set/_symbolic.py @@ -91,7 +91,7 @@ def __init__(self, child: Operator, alignment: int) -> None: def modulo(self, divisor: int) -> typing.Iterable[int]: r = self._padding mx = self.max - lcm = math.lcm(r, divisor) + lcm = least_common_multiple(r, divisor) for x in set(self._child.modulo(lcm)): assert x <= mx and x < lcm yield self._pad(x) % divisor @@ -303,6 +303,13 @@ def __repr__(self) -> str: return repr(self._child) # Not sure if we should indicate our presence considering that we're a no-op +def least_common_multiple(a: int, b: int) -> int: + """ + This replicates :func:`math.lcm` to support Python <3.9. + """ + return abs(a * b) // math.gcd(a, b) + + def validate_numerically(op: Operator) -> None: """ Validates the correctness of symbolic derivations by comparing the results against reference values From f6aca9378f3d6e040f1bb315dda60ae2f86a0559 Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Tue, 6 Apr 2021 01:02:04 +0300 Subject: [PATCH 09/21] MyPy fixes --- pydsdl/_bit_length_set/_bit_length_set.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pydsdl/_bit_length_set/_bit_length_set.py b/pydsdl/_bit_length_set/_bit_length_set.py index b3637b4..ee31910 100644 --- a/pydsdl/_bit_length_set/_bit_length_set.py +++ b/pydsdl/_bit_length_set/_bit_length_set.py @@ -271,7 +271,9 @@ def __len__(self) -> int: >>> len(BitLengthSet([1, 2, 3])) 3 """ - return len(self._op.expand()) + exp = self._op.expand() + assert isinstance(exp, set) # We know that memoization and nullary operators return sets. + return len(exp) def __eq__(self, other: typing.Any) -> bool: """ @@ -347,16 +349,16 @@ def _unittest_bit_length_set() -> None: assert {1, 2, 3} + BitLengthSet([4, 5, 6]) == {5, 6, 7, 8, 9} with raises(TypeError): - assert BitLengthSet([4, 5, 6]) + "a" + assert BitLengthSet([4, 5, 6]) + "a" # type: ignore with raises(TypeError): - assert "a" + BitLengthSet([4, 5, 6]) + assert "a" + BitLengthSet([4, 5, 6]) # type: ignore with raises(TypeError): - assert "a" | BitLengthSet([4, 5, 6]) + assert "a" | BitLengthSet([4, 5, 6]) # type: ignore with raises(TypeError): - assert BitLengthSet([4, 5, 6]) | "a" + assert BitLengthSet([4, 5, 6]) | "a" # type: ignore with raises(ValueError): BitLengthSet([4, 5, 6]).pad_to_alignment(0) From 6ec9ef26e9d631f7b940db66d309ff2207c8b241 Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Tue, 6 Apr 2021 01:05:37 +0300 Subject: [PATCH 10/21] PyLint fixes --- pydsdl/_bit_length_set/_bit_length_set.py | 4 ++-- pydsdl/_parser.py | 2 +- setup.cfg | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pydsdl/_bit_length_set/_bit_length_set.py b/pydsdl/_bit_length_set/_bit_length_set.py index ee31910..2b306d3 100644 --- a/pydsdl/_bit_length_set/_bit_length_set.py +++ b/pydsdl/_bit_length_set/_bit_length_set.py @@ -170,7 +170,7 @@ def concatenate(sets: typing.Iterable[typing.Union["BitLengthSet", typing.Iterab """ from ._symbolic import ConcatenationOperator - op = ConcatenationOperator(BitLengthSet(s)._op for s in sets) + op = ConcatenationOperator(BitLengthSet(s)._op for s in sets) # pylint: disable=protected-access return BitLengthSet(op) @staticmethod @@ -186,7 +186,7 @@ def unite(sets: typing.Iterable[typing.Union["BitLengthSet", typing.Iterable[int """ from ._symbolic import UnionOperator - op = UnionOperator(BitLengthSet(s)._op for s in sets) + op = UnionOperator(BitLengthSet(s)._op for s in sets) # pylint: disable=protected-access return BitLengthSet(op) def __add__(self, other: typing.Union["BitLengthSet", typing.Iterable[int], int]) -> "BitLengthSet": diff --git a/pydsdl/_parser.py b/pydsdl/_parser.py index 89f1409..91432b7 100644 --- a/pydsdl/_parser.py +++ b/pydsdl/_parser.py @@ -118,7 +118,7 @@ def _make_binary_operator_handler(operator: _expression.BinaryOperator[_expressi # noinspection PyMethodMayBeStatic -class _ParseTreeProcessor(parsimonious.NodeVisitor): # pylint: disable=too-many-public-methods +class _ParseTreeProcessor(parsimonious.NodeVisitor): """ This class processes the parse tree, evaluates the expressions and emits a high-level representation of the processed description. Essentially it does most of the ground work related to supporting the DSDL diff --git a/setup.cfg b/setup.cfg index 357699b..bef7b4a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -115,7 +115,8 @@ disable= arguments-differ, too-many-statements, useless-super-delegation, - too-many-instance-attributes + too-many-instance-attributes, + too-many-public-methods [pylint.REPORTS] output-format=colorized From 50095e5d66e0e928b56c0c879e65d05cf3fafc55 Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Tue, 6 Apr 2021 01:12:47 +0300 Subject: [PATCH 11/21] Bump up test coverage --- pydsdl/_bit_length_set/_bit_length_set.py | 17 ++++++++++------- pydsdl/_bit_length_set/_symbolic_test.py | 13 +++++++++++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/pydsdl/_bit_length_set/_bit_length_set.py b/pydsdl/_bit_length_set/_bit_length_set.py index 2b306d3..745734d 100644 --- a/pydsdl/_bit_length_set/_bit_length_set.py +++ b/pydsdl/_bit_length_set/_bit_length_set.py @@ -89,10 +89,15 @@ def max(self) -> int: def fixed_length(self) -> bool: """ Shorthand for ``self.min == self.max``. + + >>> BitLengthSet(8).repeat(1).fixed_length + True + >>> BitLengthSet(8).repeat_range(1).fixed_length + False """ return self.min == self.max - def __mod__(self, divisor: typing.Any) -> typing.Iterable[int]: + def __mod__(self, divisor: int) -> typing.Iterable[int]: """ Elementwise modulus derived analytically. @@ -101,10 +106,8 @@ def __mod__(self, divisor: typing.Any) -> typing.Iterable[int]: >>> sorted(BitLengthSet([8, 12, 16]) % 8) [0, 4] """ - if isinstance(divisor, int): - # The type is reported as iterable[int], not sure yet if we should specialize it further. Time will tell. - return BitLengthSet(self._op.modulo(divisor)) - return NotImplemented + # The type is reported as iterable[int], not sure yet if we should specialize it further. Time will tell. + return BitLengthSet(self._op.modulo(int(divisor))) # ======================================== COMPOSITION METHODS ======================================== @@ -300,7 +303,7 @@ def __repr__(self) -> str: # ======================================== DEPRECATED METHODS ======================================== - def elementwise_sum_k_multicombinations(self, k: int) -> "BitLengthSet": + def elementwise_sum_k_multicombinations(self, k: int) -> "BitLengthSet": # pragma: no cover """ :meta private: """ @@ -310,7 +313,7 @@ def elementwise_sum_k_multicombinations(self, k: int) -> "BitLengthSet": @staticmethod def elementwise_sum_cartesian_product( sets: typing.Iterable[typing.Union[typing.Iterable[int], int]] - ) -> "BitLengthSet": + ) -> "BitLengthSet": # pragma: no cover """ :meta private: """ diff --git a/pydsdl/_bit_length_set/_symbolic_test.py b/pydsdl/_bit_length_set/_symbolic_test.py index ae17494..a69ee7f 100644 --- a/pydsdl/_bit_length_set/_symbolic_test.py +++ b/pydsdl/_bit_length_set/_symbolic_test.py @@ -9,6 +9,8 @@ def _unittest_nullary() -> None: + import pytest + op = NullaryOperator([0]) assert set(op.expand()) == {0} assert set(op.modulo(12345)) == {0} @@ -21,6 +23,9 @@ def _unittest_nullary() -> None: assert (op.min, op.max) == (1, 8) validate_numerically(op) + with pytest.raises(ValueError): + NullaryOperator([]) + def _unittest_padding() -> None: from ._symbolic import PaddingOperator @@ -54,6 +59,7 @@ def _unittest_padding() -> None: def _unittest_concatenation() -> None: + import pytest from ._symbolic import ConcatenationOperator op = ConcatenationOperator( @@ -121,6 +127,9 @@ def _unittest_concatenation() -> None: assert set(op.modulo(div)) == {x % div for x in op.expand()} validate_numerically(op) + with pytest.raises(ValueError): + ConcatenationOperator([]) + def _unittest_repetition() -> None: from ._symbolic import RepetitionOperator @@ -204,6 +213,7 @@ def _unittest_range_repetition() -> None: def _unittest_union() -> None: + import pytest from ._symbolic import UnionOperator op = UnionOperator( @@ -241,6 +251,9 @@ def _unittest_union() -> None: ) validate_numerically(op) + with pytest.raises(ValueError): + UnionOperator([]) + def _unittest_repr() -> None: from ._symbolic import ( From 0ad83b1e26573f3d45af3fa4af0e7d2b02338f91 Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Tue, 6 Apr 2021 04:08:22 +0300 Subject: [PATCH 12/21] Read env var PYDSDL_POISON_SLOW_EXPANSION to help performance optimization --- pydsdl/_bit_length_set/_symbolic.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pydsdl/_bit_length_set/_symbolic.py b/pydsdl/_bit_length_set/_symbolic.py index d932f0d..ceb018e 100644 --- a/pydsdl/_bit_length_set/_symbolic.py +++ b/pydsdl/_bit_length_set/_symbolic.py @@ -2,6 +2,7 @@ # This software is distributed under the terms of the MIT License. # Author: Pavel Kirienko +import os import abc import math import typing @@ -291,6 +292,7 @@ def expand(self) -> typing.Set[int]: len(self._expansion), self._child, ) + assert elapsed < _POISON_SLOW_EXPANSION # Since we did an expansion anyway, the set must be compact, # so we use this opportunity to validate the correctness of the solver. @@ -324,4 +326,10 @@ def validate_numerically(op: Operator) -> None: assert set(op.modulo(div)) == {x % div for x in s} +_POISON_SLOW_EXPANSION = float(os.environ.get("PYDSDL_POISON_SLOW_EXPANSION", "999999999")) +""" +This is intended for developers only so it is not mentioned in the public documentation. +The purpose is to trigger an assertion failure if a numerical expansion takes more than this many seconds. +""" + _logger = logging.getLogger(__name__) From a62ffb0a5067fd554f25ed42adca01f1ac1fbe8c Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Tue, 6 Apr 2021 05:12:04 +0300 Subject: [PATCH 13/21] Remove accidental reliance on numerical expansion through __len__ (discovered thanks to PYDSDL_POISON_SLOW_EXPANSION) --- pydsdl/__init__.py | 2 +- pydsdl/_serializable/_array.py | 4 ++-- pydsdl/_serializable/_composite.py | 21 +++++++++------------ 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/pydsdl/__init__.py b/pydsdl/__init__.py index d8d09ad..4cf6da7 100644 --- a/pydsdl/__init__.py +++ b/pydsdl/__init__.py @@ -7,7 +7,7 @@ import os as _os import sys as _sys -__version__ = "1.12.0.dev0" +__version__ = "1.12.0" __version_info__ = tuple(map(int, __version__.split(".")[:3])) __license__ = "MIT" __author__ = "UAVCAN Consortium" diff --git a/pydsdl/_serializable/_array.py b/pydsdl/_serializable/_array.py index 4a54853..903af23 100644 --- a/pydsdl/_serializable/_array.py +++ b/pydsdl/_serializable/_array.py @@ -67,7 +67,7 @@ def bit_length_set(self) -> BitLengthSet: return self._bls def enumerate_elements_with_offsets( - self, base_offset: typing.Optional[BitLengthSet] = None + self, base_offset: BitLengthSet = BitLengthSet(0) ) -> typing.Iterator[typing.Tuple[int, BitLengthSet]]: """ This is a convenience method for code generation. @@ -80,7 +80,7 @@ def enumerate_elements_with_offsets( :returns: For an N-element array, an iterator over N elements, where each element is a tuple of the index of the array element (zero-based) and its offset as a bit length set. """ - base_offset = BitLengthSet(base_offset or 0).pad_to_alignment(self.alignment_requirement) + base_offset = base_offset.pad_to_alignment(self.alignment_requirement) for index in range(self.capacity): offset = base_offset + self.element_type.bit_length_set.repeat(index) assert offset.is_aligned_at(self.element_type.alignment_requirement) diff --git a/pydsdl/_serializable/_composite.py b/pydsdl/_serializable/_composite.py index 1ae2f39..e5d93c1 100644 --- a/pydsdl/_serializable/_composite.py +++ b/pydsdl/_serializable/_composite.py @@ -261,7 +261,7 @@ def has_parent_service(self) -> bool: @abc.abstractmethod def iterate_fields_with_offsets( - self, base_offset: typing.Optional[BitLengthSet] = None + self, base_offset: BitLengthSet = BitLengthSet(0) ) -> typing.Iterator[typing.Tuple[Field, BitLengthSet]]: """ Iterates over every field (not attribute -- constants are excluded) of the data type, @@ -399,13 +399,10 @@ def tag_field_type(self) -> UnsignedIntegerType: return self._tag_field_type def iterate_fields_with_offsets( - self, base_offset: typing.Optional[BitLengthSet] = None + self, base_offset: BitLengthSet = BitLengthSet(0) ) -> typing.Iterator[typing.Tuple[Field, BitLengthSet]]: """See the base class.""" - offset = ( - BitLengthSet(base_offset or {0}).pad_to_alignment(self.alignment_requirement) - + self.tag_field_type.bit_length - ) + offset = base_offset.pad_to_alignment(self.alignment_requirement) + self.tag_field_type.bit_length for f in self.fields: # Same offset for every field, because it's a tagged union, not a struct assert offset.is_aligned_at(f.data_type.alignment_requirement) yield f, offset @@ -477,10 +474,10 @@ def __init__( # pylint: disable=too-many-arguments ).pad_to_alignment(self.alignment_requirement) def iterate_fields_with_offsets( - self, base_offset: typing.Optional[BitLengthSet] = None + self, base_offset: BitLengthSet = BitLengthSet(0) ) -> typing.Iterator[typing.Tuple[Field, BitLengthSet]]: """See the base class.""" - offset = BitLengthSet(base_offset or 0).pad_to_alignment(self.alignment_requirement) + offset = base_offset.pad_to_alignment(self.alignment_requirement) for f in self.fields: offset = offset.pad_to_alignment(f.data_type.alignment_requirement) yield f, offset @@ -606,12 +603,12 @@ def delimiter_header_type(self) -> UnsignedIntegerType: return self._delimiter_header_type def iterate_fields_with_offsets( - self, base_offset: typing.Optional[BitLengthSet] = None + self, base_offset: BitLengthSet = BitLengthSet(0) ) -> typing.Iterator[typing.Tuple[Field, BitLengthSet]]: """ Delegates the call to the inner type, but with the base offset increased by the size of the delimiter header. """ - base_offset = (base_offset or BitLengthSet(0)) + self.delimiter_header_type.bit_length_set + base_offset = base_offset + self.delimiter_header_type.bit_length_set return self.inner_type.iterate_fields_with_offsets(base_offset) def __repr__(self) -> str: @@ -677,7 +674,7 @@ def response_type(self) -> CompositeType: return self._response_type def iterate_fields_with_offsets( - self, base_offset: typing.Optional[BitLengthSet] = None + self, base_offset: BitLengthSet = BitLengthSet(0) ) -> typing.Iterator[typing.Tuple[Field, BitLengthSet]]: """Always raises a :class:`TypeError`.""" raise TypeError("Service types do not have serializable fields. Use either request or response.") @@ -981,7 +978,7 @@ def make_type(meta: typing.Type[CompositeType], attributes: typing.Iterable[Attr def validate_iterator( t: CompositeType, reference: typing.Iterable[typing.Tuple[str, typing.Set[int]]], - base_offset: typing.Optional[BitLengthSet] = None, + base_offset: BitLengthSet = BitLengthSet(0), ) -> None: for (name, ref_set), (field, real_set) in itertools.zip_longest( reference, t.iterate_fields_with_offsets(base_offset) From 01d44bbfcdb8ce2096153a9292f0cd1cd1219431 Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Tue, 6 Apr 2021 05:27:37 +0300 Subject: [PATCH 14/21] Override BitLengthSet.__bool__ --- pydsdl/_bit_length_set/_bit_length_set.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pydsdl/_bit_length_set/_bit_length_set.py b/pydsdl/_bit_length_set/_bit_length_set.py index 745734d..51aaa2d 100644 --- a/pydsdl/_bit_length_set/_bit_length_set.py +++ b/pydsdl/_bit_length_set/_bit_length_set.py @@ -295,6 +295,15 @@ def __eq__(self, other: typing.Any) -> bool: # ======================================== AUXILIARY METHODS ======================================== + def __bool__(self) -> bool: + """ + This method is overridden to avoid accidental invocation of :meth:`__len__` in boolean expressions + because it triggers numerical expansion. + + :return: Always True. + """ + return True # pragma: no cover + def __str__(self) -> str: return str(self._op) From be14126793695351b46794276c603f4e3730e16a Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Wed, 7 Apr 2021 15:14:25 +0300 Subject: [PATCH 15/21] Refactor the symbolic API slightly and apply modulo reduction in the repetition operator --- pydsdl/_bit_length_set/_bit_length_set.py | 4 +- pydsdl/_bit_length_set/_symbolic.py | 87 +++++++++++++++-------- 2 files changed, 57 insertions(+), 34 deletions(-) diff --git a/pydsdl/_bit_length_set/_bit_length_set.py b/pydsdl/_bit_length_set/_bit_length_set.py index 51aaa2d..3d30850 100644 --- a/pydsdl/_bit_length_set/_bit_length_set.py +++ b/pydsdl/_bit_length_set/_bit_length_set.py @@ -274,9 +274,7 @@ def __len__(self) -> int: >>> len(BitLengthSet([1, 2, 3])) 3 """ - exp = self._op.expand() - assert isinstance(exp, set) # We know that memoization and nullary operators return sets. - return len(exp) + return len(self._op.expand()) def __eq__(self, other: typing.Any) -> bool: """ diff --git a/pydsdl/_bit_length_set/_symbolic.py b/pydsdl/_bit_length_set/_symbolic.py index ceb018e..bbf9f2a 100644 --- a/pydsdl/_bit_length_set/_symbolic.py +++ b/pydsdl/_bit_length_set/_symbolic.py @@ -16,7 +16,7 @@ class Operator(abc.ABC): """ @abc.abstractmethod - def modulo(self, divisor: int) -> typing.Iterable[int]: + def modulo(self, divisor: int) -> typing.Set[int]: """ May return duplicates. """ @@ -33,7 +33,7 @@ def max(self) -> int: raise NotImplementedError @abc.abstractmethod - def expand(self) -> typing.Iterable[int]: + def expand(self) -> typing.Set[int]: """ Transform the symbolic form into numerical form. This is useful for cross-checking derived solutions and for DSDL expression evaluation. @@ -60,8 +60,8 @@ def __init__(self, values: typing.Iterable[int]) -> None: if not isinstance(x, int): raise TypeError("Invalid element for nullary set operator: %r" % x) - def modulo(self, divisor: int) -> typing.Iterable[int]: - return map(lambda x: x % divisor, self._value) + def modulo(self, divisor: int) -> typing.Set[int]: + return set(map(lambda x: x % divisor, self._value)) @property def min(self) -> int: @@ -72,7 +72,7 @@ def max(self) -> int: return max(self._value) def expand(self) -> typing.Set[int]: - return self._value + return set(self._value) def __repr__(self) -> str: return "{%s}" % ",".join(str(x) for x in sorted(self._value)) @@ -89,13 +89,15 @@ def __init__(self, child: Operator, alignment: int) -> None: self._child = child self._padding = int(alignment) - def modulo(self, divisor: int) -> typing.Iterable[int]: + def modulo(self, divisor: int) -> typing.Set[int]: r = self._padding mx = self.max lcm = least_common_multiple(r, divisor) - for x in set(self._child.modulo(lcm)): + out = set() # type: typing.Set[int] + for x in self._child.modulo(lcm): assert x <= mx and x < lcm - yield self._pad(x) % divisor + out.add(self._pad(x) % divisor) + return out @property def min(self) -> int: @@ -105,7 +107,7 @@ def min(self) -> int: def max(self) -> int: return self._pad(self._child.max) - def expand(self) -> typing.Iterable[int]: + def expand(self) -> typing.Set[int]: return set(map(self._pad, self._child.expand())) def _pad(self, x: int) -> int: @@ -127,11 +129,11 @@ def __init__(self, children: typing.Iterable[Operator]) -> None: if not self._children: raise ValueError("This operator is not defined on zero operands") - def modulo(self, divisor: int) -> typing.Iterable[int]: + def modulo(self, divisor: int) -> typing.Set[int]: # Take the modulus from each child and find all combinations. # The computational complexity is tightly bounded because the cardinality of the modulus set is less than # the bit length operand. - mods = [set(ch.modulo(divisor)) for ch in self._children] + mods = [ch.modulo(divisor) for ch in self._children] prod = itertools.product(*mods) sums = set(map(sum, prod)) return {typing.cast(int, x) % divisor for x in sums} @@ -144,7 +146,7 @@ def min(self) -> int: def max(self) -> int: return sum(x.max for x in self._children) - def expand(self) -> typing.Iterable[int]: + def expand(self) -> typing.Set[int]: return {sum(el) for el in itertools.product(*(x.expand() for x in self._children))} def __repr__(self) -> str: @@ -161,9 +163,20 @@ def __init__(self, child: Operator, k: int) -> None: self._k = int(k) self._child = child - def modulo(self, divisor: int) -> typing.Iterable[int]: + def modulo(self, divisor: int) -> typing.Set[int]: + # Values of k > divisor will yield repeated entries so we can apply a reduction, + # but the equivalent k modulo divisor shall be the same as k modulo divisor. + # This is because the solution doesn't converge at k -> inf: + # {sum(x) % 2 for x in combinations_with_replacement({1, 3}, 2)} == {0} + # {sum(x) % 2 for x in combinations_with_replacement({1, 3}, 3)} == {1} + # {sum(x) % 2 for x in combinations_with_replacement({1, 3}, 4)} == {0} + # {sum(x) % 2 for x in combinations_with_replacement({1, 3}, 5)} == {1} + # etc. + equivalent_k = min(self._k, divisor + self._k % divisor) + assert (self._k % divisor) == (equivalent_k % divisor), (divisor, self._k) return { - (sum(el) % divisor) for el in itertools.combinations_with_replacement(self._child.modulo(divisor), self._k) + (sum(el) % divisor) + for el in itertools.combinations_with_replacement(self._child.modulo(divisor), equivalent_k) } @property @@ -174,7 +187,7 @@ def min(self) -> int: def max(self) -> int: return self._child.max * self._k - def expand(self) -> typing.Iterable[int]: + def expand(self) -> typing.Set[int]: return {sum(el) for el in itertools.combinations_with_replacement(self._child.expand(), self._k)} def __repr__(self) -> str: @@ -191,13 +204,18 @@ def __init__(self, child: Operator, k_max: int) -> None: self._k_max = int(k_max) self._child = child - def modulo(self, divisor: int) -> typing.Iterable[int]: - single = set(self._child.modulo(divisor)) - # Values of k > divisor will yield repeated entries so we can apply a reduction. - equivalent_k_max = min(self._k_max, divisor) + def modulo(self, divisor: int) -> typing.Set[int]: + single = self._child.modulo(divisor) + assert isinstance(single, set) + # Values of k > divisor will yield repeated entries so we can apply a reduction (see non-range case above). + # This holds only if the argument does not contain repeated entries which is guaranteed by `set`. + equivalent_k_max = min(self._k_max, divisor + self._k_max % divisor) + assert (self._k_max % divisor) == (equivalent_k_max % divisor), (divisor, self._k_max) + out = set() # type: typing.Set[int] for k in range(equivalent_k_max + 1): for el in itertools.combinations_with_replacement(single, k): - yield sum(el) % divisor + out.add(sum(el) % divisor) + return out @property def min(self) -> int: @@ -207,11 +225,14 @@ def min(self) -> int: def max(self) -> int: return self._child.max * self._k_max - def expand(self) -> typing.Iterable[int]: - ch = set(self._child.expand()) + def expand(self) -> typing.Set[int]: + ch = self._child.expand() + assert isinstance(ch, set) + out = set() # type: typing.Set[int] for k in range(self._k_max + 1): for el in itertools.combinations_with_replacement(ch, k): - yield sum(el) + out.add(sum(el)) + return out def __repr__(self) -> str: return "repeat(<=%d,%r)" % (self._k_max, self._child) @@ -223,9 +244,11 @@ def __init__(self, children: typing.Iterable[Operator]) -> None: if not self._children: raise ValueError("This operator is not defined on zero operands") - def modulo(self, divisor: int) -> typing.Iterable[int]: + def modulo(self, divisor: int) -> typing.Set[int]: + out = set() # type: typing.Set[int] for x in self._children: - yield from x.modulo(divisor) + out |= x.modulo(divisor) + return out @property def min(self) -> int: @@ -235,9 +258,11 @@ def min(self) -> int: def max(self) -> int: return max(x.max for x in self._children) - def expand(self) -> typing.Iterable[int]: + def expand(self) -> typing.Set[int]: + out = set() # type: typing.Set[int] for x in self._children: - yield from x.expand() + out |= x.expand() + return out def __repr__(self) -> str: return "(%s)" % "|".join(map(repr, self._children)) @@ -260,7 +285,7 @@ def modulo(self, divisor: int) -> typing.Set[int]: try: return self._modula[divisor] except LookupError: - self._modula[divisor] = set(self._child.modulo(divisor)) + self._modula[divisor] = self._child.modulo(divisor) return self._modula[divisor] @property @@ -283,7 +308,7 @@ def expand(self) -> typing.Set[int]: # that accidentally relies on numerical expansion. This is mainly intended to help us transition # Nunavut to the new solver API instead of numerical methods. It may be removed later. started_at = monotonic() - self._expansion = set(self._child.expand()) + self._expansion = self._child.expand() elapsed = monotonic() - started_at if elapsed > 2.0: _logger.info( @@ -319,11 +344,11 @@ def validate_numerically(op: Operator) -> None: The computational complexity may be prohibitively high for some inputs due to combinatorial explosion. In case of a divergence the function triggers an assertion failure. """ - s = set(op.expand()) + s = op.expand() assert min(s) == op.min assert max(s) == op.max for div in range(1, 65): - assert set(op.modulo(div)) == {x % div for x in s} + assert op.modulo(div) == {x % div for x in s}, div _POISON_SLOW_EXPANSION = float(os.environ.get("PYDSDL_POISON_SLOW_EXPANSION", "999999999")) From c2f009230790c01ff5e5d0802179ae146ad7e1f5 Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Wed, 7 Apr 2021 16:06:46 +0300 Subject: [PATCH 16/21] Log the stack trace if slow expansion is found --- pydsdl/_bit_length_set/_symbolic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pydsdl/_bit_length_set/_symbolic.py b/pydsdl/_bit_length_set/_symbolic.py index bbf9f2a..dbe1a2e 100644 --- a/pydsdl/_bit_length_set/_symbolic.py +++ b/pydsdl/_bit_length_set/_symbolic.py @@ -316,6 +316,7 @@ def expand(self) -> typing.Set[int]: elapsed, len(self._expansion), self._child, + stack_info=True, ) assert elapsed < _POISON_SLOW_EXPANSION From efc7997f334725d60d5cdd57c08d971493e210bb Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Wed, 7 Apr 2021 16:37:30 +0300 Subject: [PATCH 17/21] Make __eq__ and __hash__ constant-time, too, since type comparison depends on that --- pydsdl/_bit_length_set/_bit_length_set.py | 26 +++++++++++++++++------ pydsdl/_serializable/_serializable.py | 2 +- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/pydsdl/_bit_length_set/_bit_length_set.py b/pydsdl/_bit_length_set/_bit_length_set.py index 3d30850..29fe9dd 100644 --- a/pydsdl/_bit_length_set/_bit_length_set.py +++ b/pydsdl/_bit_length_set/_bit_length_set.py @@ -276,22 +276,36 @@ def __len__(self) -> int: """ return len(self._op.expand()) + # ======================================== AUXILIARY METHODS ======================================== + def __eq__(self, other: typing.Any) -> bool: """ - .. attention:: - This method triggers slow numerical expansion. + Currently, this method performs an approximate comparison that may yield a false-positive for some operands. + This is done to avoid performing the costly numerical expansion of the operands. + The implementation may be changed to perform exact comparison in the future if the underlying solver is + updated accordingly. - >>> BitLengthSet([1, 2, 3]) == {1, 2, 3} + >>> BitLengthSet([1, 2, 4]) == {1, 2, 4} True - >>> BitLengthSet([123]) == 123 + >>> BitLengthSet([1, 2, 4]) == {1, 3, 4} + False + >>> BitLengthSet([123]) == BitLengthSet(123) True """ try: - return set(self) == set(BitLengthSet(other)) + other = BitLengthSet(other) except TypeError: return NotImplemented + return self.min == other.min and self.max == other.max and set(self % 64) == set(other % 64) - # ======================================== AUXILIARY METHODS ======================================== + def __hash__(self) -> int: + """ + Hash is computed in (nearly) constant time (numerical expansion is not performed). + + >>> hash(BitLengthSet({1, 2, 3})) != hash(BitLengthSet({1, 3})) + True + """ + return hash((self.min, self.max, frozenset(self % 64))) def __bool__(self) -> bool: """ diff --git a/pydsdl/_serializable/_serializable.py b/pydsdl/_serializable/_serializable.py index eed1595..ba860b0 100644 --- a/pydsdl/_serializable/_serializable.py +++ b/pydsdl/_serializable/_serializable.py @@ -76,7 +76,7 @@ def __hash__(self) -> int: bls = self.bit_length_set except TypeError: # If the type is non-serializable. bls = BitLengthSet(0) - return hash(str(self) + str(bls)) + return hash((str(self), bls)) def __eq__(self, other: object) -> bool: if isinstance(other, SerializableType): From 2a0d5a58b9c9a71e6c4c92eb4476a5e0a1c40ff6 Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Wed, 7 Apr 2021 16:39:57 +0300 Subject: [PATCH 18/21] Do not cover the diagnostic --- pydsdl/_bit_length_set/_symbolic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydsdl/_bit_length_set/_symbolic.py b/pydsdl/_bit_length_set/_symbolic.py index dbe1a2e..4269e8e 100644 --- a/pydsdl/_bit_length_set/_symbolic.py +++ b/pydsdl/_bit_length_set/_symbolic.py @@ -310,7 +310,7 @@ def expand(self) -> typing.Set[int]: started_at = monotonic() self._expansion = self._child.expand() elapsed = monotonic() - started_at - if elapsed > 2.0: + if elapsed > 2.0: # pragma: no cover _logger.info( "Numerical expansion took %.1f seconds; the result contains %d items:\n%s", elapsed, From 2ff2f64600e2c558f6992115c91403410cd088a0 Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Wed, 7 Apr 2021 17:06:34 +0300 Subject: [PATCH 19/21] Reduce divisor used in __eq__ to reduce complexity --- pydsdl/_bit_length_set/_bit_length_set.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/pydsdl/_bit_length_set/_bit_length_set.py b/pydsdl/_bit_length_set/_bit_length_set.py index 29fe9dd..cce8069 100644 --- a/pydsdl/_bit_length_set/_bit_length_set.py +++ b/pydsdl/_bit_length_set/_bit_length_set.py @@ -20,6 +20,19 @@ class BitLengthSet: so production systems should not rely on them. Instances are guaranteed to be immutable. + + >>> b = 16 + BitLengthSet(8).repeat_range(256) + >>> b + BitLengthSet(concat({16},repeat(<=256,{8}))) + >>> b = 32 + b.repeat_range(65536) + >>> b + BitLengthSet(concat({32},repeat(<=65536,concat({16},repeat(<=256,{8}))))) + >>> b.min, b.max + (32, 135266336) + >>> sorted(b % 16) + [0, 8] + >>> sorted(b % 32) + [0, 8, 16, 24] """ def __init__(self, value: typing.Union[typing.Iterable[int], int, Operator, "BitLengthSet"]): @@ -296,16 +309,17 @@ def __eq__(self, other: typing.Any) -> bool: other = BitLengthSet(other) except TypeError: return NotImplemented - return self.min == other.min and self.max == other.max and set(self % 64) == set(other % 64) + divisor = 32 + return self.min == other.min and self.max == other.max and set(self % divisor) == set(other % divisor) def __hash__(self) -> int: """ - Hash is computed in (nearly) constant time (numerical expansion is not performed). + Hash is computed in constant time (numerical expansion is not performed). - >>> hash(BitLengthSet({1, 2, 3})) != hash(BitLengthSet({1, 3})) + >>> hash(BitLengthSet({1, 4})) != hash(BitLengthSet({1, 3})) True """ - return hash((self.min, self.max, frozenset(self % 64))) + return hash((self.min, self.max)) def __bool__(self) -> bool: """ From 27d0b808dd591a344b217ff9c5bfa7e160e5b7e5 Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Thu, 8 Apr 2021 00:03:12 +0300 Subject: [PATCH 20/21] Review --- pydsdl/_bit_length_set/_symbolic.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pydsdl/_bit_length_set/_symbolic.py b/pydsdl/_bit_length_set/_symbolic.py index 4269e8e..6bbffee 100644 --- a/pydsdl/_bit_length_set/_symbolic.py +++ b/pydsdl/_bit_length_set/_symbolic.py @@ -17,9 +17,6 @@ class Operator(abc.ABC): @abc.abstractmethod def modulo(self, divisor: int) -> typing.Set[int]: - """ - May return duplicates. - """ raise NotImplementedError @property @@ -38,7 +35,6 @@ def expand(self) -> typing.Set[int]: Transform the symbolic form into numerical form. This is useful for cross-checking derived solutions and for DSDL expression evaluation. For complex expressions this may be incomputable due to combinatorial explosion or memory limits. - May return duplicates. """ raise NotImplementedError @@ -318,7 +314,7 @@ def expand(self) -> typing.Set[int]: self._child, stack_info=True, ) - assert elapsed < _POISON_SLOW_EXPANSION + assert elapsed < _POISON_SLOW_EXPANSION_SECONDS # Since we did an expansion anyway, the set must be compact, # so we use this opportunity to validate the correctness of the solver. @@ -352,7 +348,7 @@ def validate_numerically(op: Operator) -> None: assert op.modulo(div) == {x % div for x in s}, div -_POISON_SLOW_EXPANSION = float(os.environ.get("PYDSDL_POISON_SLOW_EXPANSION", "999999999")) +_POISON_SLOW_EXPANSION_SECONDS = float(os.environ.get("PYDSDL_POISON_SLOW_EXPANSION_SECONDS", "999999999")) """ This is intended for developers only so it is not mentioned in the public documentation. The purpose is to trigger an assertion failure if a numerical expansion takes more than this many seconds. From a1be9d99f88ce34d964d480ac5afeedaaa48a4d9 Mon Sep 17 00:00:00 2001 From: Pavel Kirienko Date: Thu, 8 Apr 2021 00:06:20 +0300 Subject: [PATCH 21/21] Fix deprecation warning syntax --- pydsdl/_bit_length_set/_bit_length_set.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydsdl/_bit_length_set/_bit_length_set.py b/pydsdl/_bit_length_set/_bit_length_set.py index cce8069..e1b1676 100644 --- a/pydsdl/_bit_length_set/_bit_length_set.py +++ b/pydsdl/_bit_length_set/_bit_length_set.py @@ -342,7 +342,7 @@ def elementwise_sum_k_multicombinations(self, k: int) -> "BitLengthSet": # prag """ :meta private: """ - warnings.warn(DeprecationWarning("Use repeat() instead")) + warnings.warn("Use repeat() instead", DeprecationWarning) return self.repeat(k) @staticmethod @@ -352,7 +352,7 @@ def elementwise_sum_cartesian_product( """ :meta private: """ - warnings.warn(DeprecationWarning("Use concatenate() instead")) + warnings.warn("Use concatenate() instead", DeprecationWarning) return BitLengthSet.concatenate(sets)