Skip to content

Replace diskcache client call with direct socket connection

The remote diskcache support introduced in Enable getting the latest diskcache over a socket (!79 - merged) uses the diskcache client to talk to the remote endpoint. This is quite a heavy dependency that can be replaced with direct socket.socket calls in Python.

The following is a proof-of-concept pure Python socket client for diskcache:

import socket
import struct

from ligo.segments import (segment, segmentlist)


def _read(s, count, start=0):
    results = b""
    if start:  # skip this number of bytes from the start
        s.recv(start)
    while count > 0:
        buf = s.recv(count)
        if len(buf) == 0:
            raise Exception("Unexpected end of socket data")
        count -= len(buf)
        results += buf
    return results


def send(host, port, cmd, start=0):
    query = f"{len(cmd)}{cmd}"

    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
        sock.connect((host, port))
        sock.send(query.encode("utf-8"))
        havedata = struct.unpack("<?", _read(sock, 1))[0]
        if havedata:
            datalen = struct.unpack("<I", _read(sock, 4))[0]
            return _read(sock, datalen-start, start=start).decode("utf-8")
        raise RuntimeError("No data returned")


def dump(host, port, version="0x101"):
    cmd = " ".join(map(str, [
        "dump",
        "--output-ascii", "-",
        "--version-ascii", version,
    ]))
    resp = send(host, port, cmd, start=12 + len(version)).strip()
    return map(_parse_dump_line, resp.splitlines())


def _parse_dump_line(line):
    # parse line
    header, modt, count, times = line.strip().split(' ', 3)
    path, site, tag, ext, _, dur = header.split(',')

    # format entries
    dur = float(dur)
    ext = ext.lstrip(".")

    # format times
    segments = _parse_diskcache_times(times)

    return site, tag, path, dur, ext, segments


def _parse_diskcache_times(times):
    """Parse a diskcache-format list of times as a segmentlist.
    """
    times = list(map(int, times.strip("{}").strip().split(' ')))
    return segmentlist(map(
        segment,
        (times[i:i+2] for i in range(0, len(times), 2)),
    ))