Skip to content

Commit 9f14681

Browse files
committed
Issue #17804: New function struct.iter_unpack allows for streaming struct unpacking.
1 parent 3da6707 commit 9f14681

5 files changed

Lines changed: 262 additions & 1 deletion

File tree

Doc/library/struct.rst

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,19 @@ The module defines the following exception and functions:
6666
format (``len(buffer[offset:])`` must be at least ``calcsize(fmt)``).
6767

6868

69+
.. function:: iter_unpack(fmt, buffer)
70+
71+
Iteratively unpack from the buffer *buffer* according to the format
72+
string *fmt*. This function returns an iterator which will read
73+
equally-sized chunks from the buffer until all its contents have been
74+
consumed. The buffer's size in bytes must be a multiple of the amount
75+
of data required by the format, as reflected by :func:`calcsize`.
76+
77+
Each iteration yields a tuple as specified by the format string.
78+
79+
.. versionadded:: 3.4
80+
81+
6982
.. function:: calcsize(fmt)
7083

7184
Return the size of the struct (and hence of the bytes object produced by
@@ -388,6 +401,13 @@ The :mod:`struct` module also defines the following type:
388401
(``len(buffer[offset:])`` must be at least :attr:`self.size`).
389402

390403

404+
.. method:: iter_unpack(buffer)
405+
406+
Identical to the :func:`iter_unpack` function, using the compiled format.
407+
(``len(buffer)`` must be a multiple of :attr:`self.size`).
408+
409+
.. versionadded:: 3.4
410+
391411
.. attribute:: format
392412

393413
The format string used to construct this Struct object.

Lib/struct.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
__all__ = [
22
# Functions
33
'calcsize', 'pack', 'pack_into', 'unpack', 'unpack_from',
4+
'iter_unpack',
45

56
# Classes
67
'Struct',

Lib/test/test_struct.py

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
from collections import abc
12
import array
3+
import operator
24
import unittest
35
import struct
46
import sys
@@ -593,8 +595,78 @@ def test__sizeof__(self):
593595
self.check_sizeof('0s', 1)
594596
self.check_sizeof('0c', 0)
595597

598+
599+
class UnpackIteratorTest(unittest.TestCase):
600+
"""
601+
Tests for iterative unpacking (struct.Struct.iter_unpack).
602+
"""
603+
604+
def test_construct(self):
605+
def _check_iterator(it):
606+
self.assertIsInstance(it, abc.Iterator)
607+
self.assertIsInstance(it, abc.Iterable)
608+
s = struct.Struct('>ibcp')
609+
it = s.iter_unpack(b"")
610+
_check_iterator(it)
611+
it = s.iter_unpack(b"1234567")
612+
_check_iterator(it)
613+
# Wrong bytes length
614+
with self.assertRaises(struct.error):
615+
s.iter_unpack(b"123456")
616+
with self.assertRaises(struct.error):
617+
s.iter_unpack(b"12345678")
618+
# Zero-length struct
619+
s = struct.Struct('>')
620+
with self.assertRaises(struct.error):
621+
s.iter_unpack(b"")
622+
with self.assertRaises(struct.error):
623+
s.iter_unpack(b"12")
624+
625+
def test_iterate(self):
626+
s = struct.Struct('>IB')
627+
b = bytes(range(1, 16))
628+
it = s.iter_unpack(b)
629+
self.assertEqual(next(it), (0x01020304, 5))
630+
self.assertEqual(next(it), (0x06070809, 10))
631+
self.assertEqual(next(it), (0x0b0c0d0e, 15))
632+
self.assertRaises(StopIteration, next, it)
633+
self.assertRaises(StopIteration, next, it)
634+
635+
def test_arbitrary_buffer(self):
636+
s = struct.Struct('>IB')
637+
b = bytes(range(1, 11))
638+
it = s.iter_unpack(memoryview(b))
639+
self.assertEqual(next(it), (0x01020304, 5))
640+
self.assertEqual(next(it), (0x06070809, 10))
641+
self.assertRaises(StopIteration, next, it)
642+
self.assertRaises(StopIteration, next, it)
643+
644+
def test_length_hint(self):
645+
lh = operator.length_hint
646+
s = struct.Struct('>IB')
647+
b = bytes(range(1, 16))
648+
it = s.iter_unpack(b)
649+
self.assertEqual(lh(it), 3)
650+
next(it)
651+
self.assertEqual(lh(it), 2)
652+
next(it)
653+
self.assertEqual(lh(it), 1)
654+
next(it)
655+
self.assertEqual(lh(it), 0)
656+
self.assertRaises(StopIteration, next, it)
657+
self.assertEqual(lh(it), 0)
658+
659+
def test_module_func(self):
660+
# Sanity check for the global struct.iter_unpack()
661+
it = struct.iter_unpack('>IB', bytes(range(1, 11)))
662+
self.assertEqual(next(it), (0x01020304, 5))
663+
self.assertEqual(next(it), (0x06070809, 10))
664+
self.assertRaises(StopIteration, next, it)
665+
self.assertRaises(StopIteration, next, it)
666+
667+
596668
def test_main():
597-
support.run_unittest(StructTest)
669+
support.run_unittest(__name__)
598670

599671
if __name__ == '__main__':
600672
test_main()

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ Core and Builtins
4949
Library
5050
-------
5151

52+
- Issue #17804: New function ``struct.iter_unpack`` allows for streaming
53+
struct unpacking.
54+
5255
- Issue #17830: When keyword.py is used to update a keyword file, it now
5356
preserves the line endings of the original file.
5457

Modules/_struct.c

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,9 @@ align(Py_ssize_t size, char c, const formatdef *e)
12471247
return size;
12481248
}
12491249

1250+
/*
1251+
* Struct object implementation.
1252+
*/
12501253

12511254
/* calculate the size of a format string */
12521255

@@ -1556,6 +1559,142 @@ s_unpack_from(PyObject *self, PyObject *args, PyObject *kwds)
15561559
}
15571560

15581561

1562+
/* Unpack iterator type */
1563+
1564+
typedef struct {
1565+
PyObject_HEAD
1566+
PyStructObject *so;
1567+
Py_buffer buf;
1568+
Py_ssize_t index;
1569+
} unpackiterobject;
1570+
1571+
static void
1572+
unpackiter_dealloc(unpackiterobject *self)
1573+
{
1574+
Py_XDECREF(self->so);
1575+
PyBuffer_Release(&self->buf);
1576+
PyObject_GC_Del(self);
1577+
}
1578+
1579+
static int
1580+
unpackiter_traverse(unpackiterobject *self, visitproc visit, void *arg)
1581+
{
1582+
Py_VISIT(self->so);
1583+
Py_VISIT(self->buf.obj);
1584+
return 0;
1585+
}
1586+
1587+
static PyObject *
1588+
unpackiter_len(unpackiterobject *self)
1589+
{
1590+
Py_ssize_t len;
1591+
if (self->so == NULL)
1592+
len = 0;
1593+
else
1594+
len = (self->buf.len - self->index) / self->so->s_size;
1595+
return PyLong_FromSsize_t(len);
1596+
}
1597+
1598+
static PyMethodDef unpackiter_methods[] = {
1599+
{"__length_hint__", (PyCFunction) unpackiter_len, METH_NOARGS, NULL},
1600+
{NULL, NULL} /* sentinel */
1601+
};
1602+
1603+
static PyObject *
1604+
unpackiter_iternext(unpackiterobject *self)
1605+
{
1606+
PyObject *result;
1607+
if (self->so == NULL)
1608+
return NULL;
1609+
if (self->index >= self->buf.len) {
1610+
/* Iterator exhausted */
1611+
Py_CLEAR(self->so);
1612+
PyBuffer_Release(&self->buf);
1613+
return NULL;
1614+
}
1615+
assert(self->index + self->so->s_size <= self->buf.len);
1616+
result = s_unpack_internal(self->so,
1617+
(char*) self->buf.buf + self->index);
1618+
self->index += self->so->s_size;
1619+
return result;
1620+
}
1621+
1622+
PyTypeObject unpackiter_type = {
1623+
PyVarObject_HEAD_INIT(&PyType_Type, 0)
1624+
"unpack_iterator", /* tp_name */
1625+
sizeof(unpackiterobject), /* tp_basicsize */
1626+
0, /* tp_itemsize */
1627+
(destructor)unpackiter_dealloc, /* tp_dealloc */
1628+
0, /* tp_print */
1629+
0, /* tp_getattr */
1630+
0, /* tp_setattr */
1631+
0, /* tp_reserved */
1632+
0, /* tp_repr */
1633+
0, /* tp_as_number */
1634+
0, /* tp_as_sequence */
1635+
0, /* tp_as_mapping */
1636+
0, /* tp_hash */
1637+
0, /* tp_call */
1638+
0, /* tp_str */
1639+
PyObject_GenericGetAttr, /* tp_getattro */
1640+
0, /* tp_setattro */
1641+
0, /* tp_as_buffer */
1642+
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1643+
0, /* tp_doc */
1644+
(traverseproc)unpackiter_traverse, /* tp_traverse */
1645+
0, /* tp_clear */
1646+
0, /* tp_richcompare */
1647+
0, /* tp_weaklistoffset */
1648+
PyObject_SelfIter, /* tp_iter */
1649+
(iternextfunc)unpackiter_iternext, /* tp_iternext */
1650+
unpackiter_methods /* tp_methods */
1651+
};
1652+
1653+
PyDoc_STRVAR(s_iter_unpack__doc__,
1654+
"S.iter_unpack(buffer) -> iterator(v1, v2, ...)\n\
1655+
\n\
1656+
Return an iterator yielding tuples unpacked from the given bytes\n\
1657+
source, like a repeated invocation of unpack_from(). Requires\n\
1658+
that the bytes length be a multiple of the struct size.");
1659+
1660+
static PyObject *
1661+
s_iter_unpack(PyObject *_so, PyObject *input)
1662+
{
1663+
PyStructObject *so = (PyStructObject *) _so;
1664+
unpackiterobject *self;
1665+
1666+
assert(PyStruct_Check(_so));
1667+
assert(so->s_codes != NULL);
1668+
1669+
if (so->s_size == 0) {
1670+
PyErr_Format(StructError,
1671+
"cannot iteratively unpack with a struct of length 0");
1672+
return NULL;
1673+
}
1674+
1675+
self = (unpackiterobject *) PyType_GenericAlloc(&unpackiter_type, 0);
1676+
if (self == NULL)
1677+
return NULL;
1678+
1679+
if (PyObject_GetBuffer(input, &self->buf, PyBUF_SIMPLE) < 0) {
1680+
Py_DECREF(self);
1681+
return NULL;
1682+
}
1683+
if (self->buf.len % so->s_size != 0) {
1684+
PyErr_Format(StructError,
1685+
"iterative unpacking requires a bytes length "
1686+
"multiple of %zd",
1687+
so->s_size);
1688+
Py_DECREF(self);
1689+
return NULL;
1690+
}
1691+
Py_INCREF(so);
1692+
self->so = so;
1693+
self->index = 0;
1694+
return (PyObject *) self;
1695+
}
1696+
1697+
15591698
/*
15601699
* Guts of the pack function.
15611700
*
@@ -1776,6 +1915,7 @@ s_sizeof(PyStructObject *self, void *unused)
17761915
/* List of functions */
17771916

17781917
static struct PyMethodDef s_methods[] = {
1918+
{"iter_unpack", s_iter_unpack, METH_O, s_iter_unpack__doc__},
17791919
{"pack", s_pack, METH_VARARGS, s_pack__doc__},
17801920
{"pack_into", s_pack_into, METH_VARARGS, s_pack_into__doc__},
17811921
{"unpack", s_unpack, METH_O, s_unpack__doc__},
@@ -2025,9 +2165,34 @@ unpack_from(PyObject *self, PyObject *args, PyObject *kwds)
20252165
return result;
20262166
}
20272167

2168+
PyDoc_STRVAR(iter_unpack_doc,
2169+
"iter_unpack(fmt, buffer) -> iterator(v1, v2, ...)\n\
2170+
\n\
2171+
Return an iterator yielding tuples unpacked from the given bytes\n\
2172+
source according to the format string, like a repeated invocation of\n\
2173+
unpack_from(). Requires that the bytes length be a multiple of the\n\
2174+
format struct size.");
2175+
2176+
static PyObject *
2177+
iter_unpack(PyObject *self, PyObject *args)
2178+
{
2179+
PyObject *s_object, *fmt, *input, *result;
2180+
2181+
if (!PyArg_ParseTuple(args, "OO:iter_unpack", &fmt, &input))
2182+
return NULL;
2183+
2184+
s_object = cache_struct(fmt);
2185+
if (s_object == NULL)
2186+
return NULL;
2187+
result = s_iter_unpack(s_object, input);
2188+
Py_DECREF(s_object);
2189+
return result;
2190+
}
2191+
20282192
static struct PyMethodDef module_functions[] = {
20292193
{"_clearcache", (PyCFunction)clearcache, METH_NOARGS, clearcache_doc},
20302194
{"calcsize", calcsize, METH_O, calcsize_doc},
2195+
{"iter_unpack", iter_unpack, METH_VARARGS, iter_unpack_doc},
20312196
{"pack", pack, METH_VARARGS, pack_doc},
20322197
{"pack_into", pack_into, METH_VARARGS, pack_into_doc},
20332198
{"unpack", unpack, METH_VARARGS, unpack_doc},

0 commit comments

Comments
 (0)