From 9c01188671322d92e29d9610697402ab049e8882 Mon Sep 17 00:00:00 2001 From: Grégoire Détrez Date: Mon, 5 Sep 2022 16:44:42 +0200 Subject: Modularize: ASCII serialization/deserialization Add a sigsum.ascii module that handle Sigsum ASCII serialisation format and an associated test module. --- sigsum/__init__.py | 0 sigsum/ascii.py | 93 ++++++++++++++++++++++++++++++++++++++++ sigsum/ascii_test.py | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 212 insertions(+) create mode 100644 sigsum/__init__.py create mode 100644 sigsum/ascii.py create mode 100644 sigsum/ascii_test.py (limited to 'sigsum') diff --git a/sigsum/__init__.py b/sigsum/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sigsum/ascii.py b/sigsum/ascii.py new file mode 100644 index 0000000..f7f378c --- /dev/null +++ b/sigsum/ascii.py @@ -0,0 +1,93 @@ +import io + + +def dumps(data): + """ + dumps takes a key/values mapping and serializes it to ASCII. + If one of the values is not of type str, int or bytes (or a list of those) + a TypeError is raised. + """ + res = io.StringIO() + for key in data: + values = data[key] + if not isinstance(values, list): + values = [values] + for val in values: + if isinstance(val, (int, str)): + res.write(f"{key}={val}\n") + elif isinstance(val, bytes): + res.write(f"{key}={val.hex()}\n") + else: + raise TypeError( + f"Object of type {type(val).__name__} is not ASCII serializable" + ) + res.seek(0) + return res.read() + + +def loads(txt): + """ + loads deserialized the given string into an ASCIIValue. + """ + kv = [] + for lno, line in enumerate(txt.splitlines(), 1): + if "=" not in line: + raise ASCIIDecodeError("Expecting '=' delimiter line 1") + (key, val) = line.rstrip().split("=", 1) + if val == "": + raise ASCIIDecodeError("Expecting value after '=' line 1") + kv.append((key, val)) + return ASCIIValue(kv) + + +class ASCIIDecodeError(Exception): + """ + ASCIIDecodeError indicates that loads couldn't deserialize the given input. + """ + + +class ASCIIValue: + """ + ASCIIValue implements Mapping[str, List[str]] with convenience getters to + parse sigsum types. + """ + + def __init__(self, data): + self._d = {} + for k, v in data: + self._d.setdefault(k, []).append(v) + + def __getitem__(self, k): + return self._d.__getitem__(k) + + def __len__(self): + return self._d.__len__() + + def __iter__(self): + return self._d.__iter__() + + def getone(self, k): + v = self._d[k] + if len(v) > 1: + raise ValueError(f"{k}: expected a single value, got {len(v)}") + return self._d[k][0] + + def getint(self, k, many=False): + if many: + return [int(x) for x in self._d[k]] + return int(self.getone(k)) + + def getbytes(self, k, many=False): + if many: + return [bytes.fromhex(x) for x in self._d[k]] + return bytes.fromhex(self.getone(k)) + + def __repr__(self): + return f'ASCIIValue([{", ".join(f"({k!r}, {v!r})" for k,vs in self._d.items() for v in vs)}])' + + def __eq__(self, other): + if isinstance(other, ASCIIValue): + return self._d.__eq__(other._d) + if isinstance(other, dict): + return self._d.__eq__(other) + return NotImplemented diff --git a/sigsum/ascii_test.py b/sigsum/ascii_test.py new file mode 100644 index 0000000..6dfe025 --- /dev/null +++ b/sigsum/ascii_test.py @@ -0,0 +1,119 @@ +import io +import operator +from operator import methodcaller as M + +from . import ascii + + +def test(): + pass + + +import pytest + + +@pytest.mark.parametrize( + "txt, expected", + [ + ("", {}), + ("foo=bar", {"foo": ["bar"]}), + ("foo=bar\nqux=42", {"foo": ["bar"], "qux": ["42"]}), + ("foo=bar\nfoo=biz", {"foo": ["bar", "biz"]}), + ("error=something went wrong", {"error": ["something went wrong"]}), + ("error=a message with an = sign", {"error": ["a message with an = sign"]}), + ], +) +def test_loads(txt, expected): + assert ascii.loads(txt) == expected + + +@pytest.mark.parametrize( + "txt, message", + [ + ("foo", "Expecting '=' delimiter line 1"), + ("foo=", "Expecting value after '=' line 1"), + ], +) +def test_loads_error(txt, message): + with pytest.raises(ascii.ASCIIDecodeError, match=message): + ascii.loads(txt) + + +@pytest.mark.parametrize( + "data, expected", + [ + ({}, ""), + ({"foo": ["bar"], "baz": ["biz"]}, "foo=bar\nbaz=biz\n"), + ({"foo": ["bar", "baz"]}, "foo=bar\nfoo=baz\n"), + ({"foo": [42]}, "foo=42\n"), + ({"foo": [b"\xDE\xAD\xBE\xEF"]}, "foo=deadbeef\n"), + ({"foo": "bar"}, "foo=bar\n"), + ], + ids=["empty", "simple", "list", "int", "bytes", "single-value-shortcut"], +) +def test_dumps(data, expected): + assert ascii.dumps(data) == expected + + +def test_dumps_type_error(): + with pytest.raises( + TypeError, match="Object of type object is not ASCII serializable" + ): + ascii.dumps({"foo": [object()]}) + + +@pytest.mark.parametrize( + "data, func, expected", + [ + # Check that it behave like a Mapping[str, List[str]] + ([("foo", "bar"), ("foo", "baz")], operator.itemgetter("foo"), ["bar", "baz"]), + ([("foo", "bar"), ("foo", "baz")], len, 1), + ([("foo", "bar"), ("foo", "baz")], lambda x: list(iter(x)), ["foo"]), + # Check accessors + ([("foo", "bar")], M("getone", "foo"), "bar"), + ([("foo", "42")], M("getint", "foo"), 42), + ([("foo", "deadbeef")], M("getbytes", "foo"), b"\xDE\xAD\xBE\xEF"), + ([("foo", "42"), ("foo", "0")], M("getint", "foo", True), [42, 0]), + ( + [("foo", "dead"), ("foo", "beef")], + M("getbytes", "foo", True), + [b"\xDE\xAD", b"\xBE\xEF"], + ), + ], +) +def test_asciivalue_getters(data, func, expected): + kv = ascii.ASCIIValue(data) + assert func(kv) == expected + + +@pytest.mark.parametrize( + "data, func, error", + [ + # missing key + ([], M("getone", "foo"), KeyError), + ([], M("getint", "foo"), KeyError), + ([], M("getbytes", "foo"), KeyError), + # too many values + ([("foo", "bar"), ("foo", "baz")], M("getone", "foo"), ValueError), + ([("foo", "42"), ("foo", "0")], M("getint", "foo"), ValueError), + ([("foo", "dead"), ("foo", "beef")], M("getbytes", "foo"), ValueError), + # strconv errors + ([("foo", "xx")], M("getint", "foo"), ValueError), + ([("foo", "xx")], M("getbytes", "foo"), ValueError), + ], +) +def test_asciivalue_getters_errorrs(data, func, error): + kv = ascii.ASCIIValue(data) + with pytest.raises(error): + func(kv) + + +def test_asciivalue_repr(): + v = ascii.ASCIIValue([("foo", "bar"), ("foo", "baz"), ("qux", "quux")]) + assert repr(v) == "ASCIIValue([('foo', 'bar'), ('foo', 'baz'), ('qux', 'quux')])" + + +def test_asciivalue_eq(): + v = ascii.ASCIIValue([("foo", "bar"), ("foo", "baz"), ("qux", "quux")]) + assert v == ascii.ASCIIValue([("foo", "bar"), ("foo", "baz"), ("qux", "quux")]) + assert v == {"foo": ["bar", "baz"], "qux": ["quux"]} -- cgit v1.2.3