Skip to content

LazyReader

Bases: LazyItem

Source code in src/msglc/reader.py
Python
class LazyReader(LazyItem):
    def __init__(
        self,
        buffer_or_path: str | BufferReader,
        *,
        counter: LazyStats | None = None,
        cached: bool = True,
        unpacker: Unpacker | None = None,
    ):
        """
        It is possible to use a customized unpacker.
        Please inherit the `Unpacker` class from the `unpacker.py`.
        There are already several unpackers available using different libraries.

        ```py
        class CustomUnpacker(Unpacker):
            def decode(self, data: bytes):
                # provide the decoding logic
                ...

        with LazyReader("file.msg", unpacker=CustomUnpacker()) as reader:
            # read the data
            ...
        ```

        :param buffer_or_path: the buffer or path to the file
        :param counter: the counter object for tracking the number of bytes read
        :param cached: whether to cache the data
        :param unpacker: the unpacker object for reading the data
        """
        self._buffer_or_path: str | BufferReader = buffer_or_path

        buffer: BufferReader
        if isinstance(self._buffer_or_path, str):
            buffer = open(self._buffer_or_path, "rb", buffering=config.read_buffer_size)
        elif isinstance(self._buffer_or_path, (BytesIO, BufferedReader, MockIO)):
            buffer = self._buffer_or_path
        else:
            raise ValueError("Expecting a buffer or path.")

        sep_a, sep_b, sep_c = (
            LazyWriter.magic_len(),
            LazyWriter.magic_len() + 10,
            LazyWriter.magic_len() + 20,
        )

        # keep the buffer unchanged in case of failure
        original_pos: int = buffer.tell()
        header: bytes = buffer.read(sep_c)
        buffer.seek(original_pos)

        if header[:sep_a] != LazyWriter.magic:
            raise ValueError("Invalid file format.")

        super().__init__(
            buffer,
            original_pos + sep_c,
            counter=counter,
            cached=cached,
            unpacker=unpacker,
        )

        toc_start: int = self._unpack(header[sep_a:sep_b].lstrip(b"\0"))
        toc_size: int = self._unpack(header[sep_b:sep_c].lstrip(b"\0"))

        self._obj = self._child(self._read(toc_start, toc_start + toc_size))

    def __repr__(self):
        file_path: str = ""
        if isinstance(self._buffer_or_path, str):
            file_path = " (" + self._buffer_or_path + ")"

        return (
            f"LazyReader{file_path}"
            if config.simple_repr or not self._cached
            else self.to_obj().__repr__()
        )

    def __enter__(self):
        increment_gc_counter()

        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        decrement_gc_counter()

        if isinstance(self._buffer_or_path, str):
            self._buffer.close()

    def __getitem__(self, item):
        return self.read(item)

    def __len__(self):
        return len(self._obj)

    def __contains__(self, item):
        return item in self._obj

    def get(self, key, default=None):
        """
        Mimics the `get` method for dictionaries.
        """
        return self._obj.get(key, default)

    def keys(self):
        """
        Mimics the `keys` method for dictionaries.
        """
        return self._obj.keys()

    def values(self):
        """
        Mimics the `values` method for dictionaries.
        """
        return self._obj.values()

    def items(self):
        """
        Mimics the `items` method for dictionaries.
        """
        return self._obj.items()

    def read(self, path: str | list | slice | None = None):
        """
        Reads the data from the given path.

        This method navigates through the data structure based on the provided path.
        The path can be a string or a list. If it's a string, it's split into a list
        using '/' as the separator. Each element of the list is used to navigate
        through the data structure.

        If the path is None, it returns the root object.

        :param path: the path to the data to read
        :return: The data at the given path.
        """

        path_stack: list
        if path is None:
            path_stack = []
        elif isinstance(path, str):
            path_stack = path.split("/")
        elif isinstance(path, list):
            path_stack = path
        else:
            path_stack = [path]

        target = self._obj
        for key in (v for v in path_stack if v != ""):
            target = target[
                to_index(key, len(target))
                if isinstance(key, str) and isinstance(target, (list, LazyList))
                else key
            ]
        return target

    def visit(self, path: str = ""):
        """
        Reads the data from the given path.

        This method navigates through the data structure based on the provided path.
        The path can be a string of paths separated by '/'.

        If the path is None, it returns the root object.

        :param path: the path to the data to read
        :return: The data at the given path.
        """
        target = self._obj
        for key in (v for v in path.split("/") if v != ""):
            target = target[
                to_index(key, len(target))
                if isinstance(target, (list, LazyList))
                else key
            ]
        return target

    async def async_read(self, path: str | list | slice | None = None):
        """
        Reads the data from the given path.

        This method navigates through the data structure based on the provided path.
        The path can be a string or a list. If it's a string, it's split into a list
        using '/' as the separator. Each element of the list is used to navigate
        through the data structure.

        If the path is None, it returns the root object.

        :param path: the path to the data to read
        :return: The data at the given path.
        """

        path_stack: list
        if path is None:
            path_stack = []
        elif isinstance(path, str):
            path_stack = path.split("/")
        elif isinstance(path, list):
            path_stack = path
        else:
            path_stack = [path]

        target = self._obj
        for key in (v for v in path_stack if v != ""):
            target = await async_get(
                target,
                to_index(key, len(target))
                if isinstance(key, str) and isinstance(target, (list, LazyList))
                else key,
            )
        return target

    async def async_visit(self, path: str = ""):
        """
        Reads the data from the given path.

        This method navigates through the data structure based on the provided path.
        The path can be a string of paths separated by '/'.

        If the path is None, it returns the root object.

        :param path: the path to the data to read
        :return: The data at the given path.
        """
        target = self._obj
        for key in (v for v in path.split("/") if v != ""):
            target = await async_get(
                target,
                to_index(key, len(target))
                if isinstance(target, (list, LazyList))
                else key,
            )
        return target

    def to_obj(self):
        """
        Converts the data structure to a JSON serializable object.
        This method will read the entire data structure into memory.
        Data returned by this method can leave the `LazyReader` context.
        """
        return to_obj(self._obj)

__init__(buffer_or_path, *, counter=None, cached=True, unpacker=None)

It is possible to use a customized unpacker. Please inherit the Unpacker class from the unpacker.py. There are already several unpackers available using different libraries.

Python
1
2
3
4
5
6
7
8
class CustomUnpacker(Unpacker):
    def decode(self, data: bytes):
        # provide the decoding logic
        ...

with LazyReader("file.msg", unpacker=CustomUnpacker()) as reader:
    # read the data
    ...

Parameters:

Name Type Description Default
buffer_or_path str | BufferReader

the buffer or path to the file

required
counter LazyStats | None

the counter object for tracking the number of bytes read

None
cached bool

whether to cache the data

True
unpacker Unpacker | None

the unpacker object for reading the data

None
Source code in src/msglc/reader.py
Python
def __init__(
    self,
    buffer_or_path: str | BufferReader,
    *,
    counter: LazyStats | None = None,
    cached: bool = True,
    unpacker: Unpacker | None = None,
):
    """
    It is possible to use a customized unpacker.
    Please inherit the `Unpacker` class from the `unpacker.py`.
    There are already several unpackers available using different libraries.

    ```py
    class CustomUnpacker(Unpacker):
        def decode(self, data: bytes):
            # provide the decoding logic
            ...

    with LazyReader("file.msg", unpacker=CustomUnpacker()) as reader:
        # read the data
        ...
    ```

    :param buffer_or_path: the buffer or path to the file
    :param counter: the counter object for tracking the number of bytes read
    :param cached: whether to cache the data
    :param unpacker: the unpacker object for reading the data
    """
    self._buffer_or_path: str | BufferReader = buffer_or_path

    buffer: BufferReader
    if isinstance(self._buffer_or_path, str):
        buffer = open(self._buffer_or_path, "rb", buffering=config.read_buffer_size)
    elif isinstance(self._buffer_or_path, (BytesIO, BufferedReader, MockIO)):
        buffer = self._buffer_or_path
    else:
        raise ValueError("Expecting a buffer or path.")

    sep_a, sep_b, sep_c = (
        LazyWriter.magic_len(),
        LazyWriter.magic_len() + 10,
        LazyWriter.magic_len() + 20,
    )

    # keep the buffer unchanged in case of failure
    original_pos: int = buffer.tell()
    header: bytes = buffer.read(sep_c)
    buffer.seek(original_pos)

    if header[:sep_a] != LazyWriter.magic:
        raise ValueError("Invalid file format.")

    super().__init__(
        buffer,
        original_pos + sep_c,
        counter=counter,
        cached=cached,
        unpacker=unpacker,
    )

    toc_start: int = self._unpack(header[sep_a:sep_b].lstrip(b"\0"))
    toc_size: int = self._unpack(header[sep_b:sep_c].lstrip(b"\0"))

    self._obj = self._child(self._read(toc_start, toc_start + toc_size))

async_read(path=None) async

Reads the data from the given path.

This method navigates through the data structure based on the provided path. The path can be a string or a list. If it's a string, it's split into a list using '/' as the separator. Each element of the list is used to navigate through the data structure.

If the path is None, it returns the root object.

Parameters:

Name Type Description Default
path str | list | slice | None

the path to the data to read

None

Returns:

Type Description

The data at the given path.

Source code in src/msglc/reader.py
Python
async def async_read(self, path: str | list | slice | None = None):
    """
    Reads the data from the given path.

    This method navigates through the data structure based on the provided path.
    The path can be a string or a list. If it's a string, it's split into a list
    using '/' as the separator. Each element of the list is used to navigate
    through the data structure.

    If the path is None, it returns the root object.

    :param path: the path to the data to read
    :return: The data at the given path.
    """

    path_stack: list
    if path is None:
        path_stack = []
    elif isinstance(path, str):
        path_stack = path.split("/")
    elif isinstance(path, list):
        path_stack = path
    else:
        path_stack = [path]

    target = self._obj
    for key in (v for v in path_stack if v != ""):
        target = await async_get(
            target,
            to_index(key, len(target))
            if isinstance(key, str) and isinstance(target, (list, LazyList))
            else key,
        )
    return target

async_visit(path='') async

Reads the data from the given path.

This method navigates through the data structure based on the provided path. The path can be a string of paths separated by '/'.

If the path is None, it returns the root object.

Parameters:

Name Type Description Default
path str

the path to the data to read

''

Returns:

Type Description

The data at the given path.

Source code in src/msglc/reader.py
Python
async def async_visit(self, path: str = ""):
    """
    Reads the data from the given path.

    This method navigates through the data structure based on the provided path.
    The path can be a string of paths separated by '/'.

    If the path is None, it returns the root object.

    :param path: the path to the data to read
    :return: The data at the given path.
    """
    target = self._obj
    for key in (v for v in path.split("/") if v != ""):
        target = await async_get(
            target,
            to_index(key, len(target))
            if isinstance(target, (list, LazyList))
            else key,
        )
    return target

get(key, default=None)

Mimics the get method for dictionaries.

Source code in src/msglc/reader.py
Python
def get(self, key, default=None):
    """
    Mimics the `get` method for dictionaries.
    """
    return self._obj.get(key, default)

items()

Mimics the items method for dictionaries.

Source code in src/msglc/reader.py
Python
def items(self):
    """
    Mimics the `items` method for dictionaries.
    """
    return self._obj.items()

keys()

Mimics the keys method for dictionaries.

Source code in src/msglc/reader.py
Python
def keys(self):
    """
    Mimics the `keys` method for dictionaries.
    """
    return self._obj.keys()

read(path=None)

Reads the data from the given path.

This method navigates through the data structure based on the provided path. The path can be a string or a list. If it's a string, it's split into a list using '/' as the separator. Each element of the list is used to navigate through the data structure.

If the path is None, it returns the root object.

Parameters:

Name Type Description Default
path str | list | slice | None

the path to the data to read

None

Returns:

Type Description

The data at the given path.

Source code in src/msglc/reader.py
Python
def read(self, path: str | list | slice | None = None):
    """
    Reads the data from the given path.

    This method navigates through the data structure based on the provided path.
    The path can be a string or a list. If it's a string, it's split into a list
    using '/' as the separator. Each element of the list is used to navigate
    through the data structure.

    If the path is None, it returns the root object.

    :param path: the path to the data to read
    :return: The data at the given path.
    """

    path_stack: list
    if path is None:
        path_stack = []
    elif isinstance(path, str):
        path_stack = path.split("/")
    elif isinstance(path, list):
        path_stack = path
    else:
        path_stack = [path]

    target = self._obj
    for key in (v for v in path_stack if v != ""):
        target = target[
            to_index(key, len(target))
            if isinstance(key, str) and isinstance(target, (list, LazyList))
            else key
        ]
    return target

to_obj()

Converts the data structure to a JSON serializable object. This method will read the entire data structure into memory. Data returned by this method can leave the LazyReader context.

Source code in src/msglc/reader.py
Python
def to_obj(self):
    """
    Converts the data structure to a JSON serializable object.
    This method will read the entire data structure into memory.
    Data returned by this method can leave the `LazyReader` context.
    """
    return to_obj(self._obj)

values()

Mimics the values method for dictionaries.

Source code in src/msglc/reader.py
Python
def values(self):
    """
    Mimics the `values` method for dictionaries.
    """
    return self._obj.values()

visit(path='')

Reads the data from the given path.

This method navigates through the data structure based on the provided path. The path can be a string of paths separated by '/'.

If the path is None, it returns the root object.

Parameters:

Name Type Description Default
path str

the path to the data to read

''

Returns:

Type Description

The data at the given path.

Source code in src/msglc/reader.py
Python
def visit(self, path: str = ""):
    """
    Reads the data from the given path.

    This method navigates through the data structure based on the provided path.
    The path can be a string of paths separated by '/'.

    If the path is None, it returns the root object.

    :param path: the path to the data to read
    :return: The data at the given path.
    """
    target = self._obj
    for key in (v for v in path.split("/") if v != ""):
        target = target[
            to_index(key, len(target))
            if isinstance(target, (list, LazyList))
            else key
        ]
    return target