python - 使用 numpy.fromfile 加载每个第 n 个元素

Question

我想使用np.fromfile. 该文件包含一个 3D 数组，我只关心每一帧中的某个单元格。

x = np.fromfile(file, dtype='int32', count=width*height*frames)
vals = x[5::width*height]

上面的代码理论上可以工作，但是我的文件非常大，将其全部读入会x导致内存错误。有没有办法fromfile只vals用开始？

score 0 · Accepted Answer

这可能非常低效，但它有效：

import numpy as np

def read_in_chunks(fn, offset, step, steps_per_chunk, dtype=np.int32):
    out = []
    fd = open(fn, 'br')
    while True:
        chunk = (np.fromfile(fd, dtype=dtype, count=steps_per_chunk*step)
                 [offset::step])
        if chunk.size==0:
            break
        out.append(chunk)
    return np.r_[tuple(out)]

x = np.arange(100000)
x.tofile('test.bin')
b = read_in_chunks('test.bin', 2, 100, 6, int)
print(b)

更新：

这是一个seek用来跳过不需要的东西的方法。它对我有用，但完全被低估了。

def skip_load(fn, offset, step, dtype=np.float, n = 10**100):
    elsize = np.dtype(dtype).itemsize
    step *= elsize
    offset *= elsize
    fd = open(fn, 'rb') if isinstance(fn, str) else fn
    out = []
    pos = fd.tell()
    target = ((pos - offset - 1) // step + 1) * step + offset
    fd.seek(target)
    while n > 0:
        if (fd.tell() != target):
            return np.frombuffer(b"".join(out), dtype=dtype)
        out.append(fd.read(elsize))
        n -= 1
        if len(out[-1]) < elsize:
            return np.frombuffer(b"".join(out[:-1]), dtype=dtype)
        target += step
        fd.seek(target)
    return np.frombuffer(b"".join(out), dtype=dtype)

python - 使用 numpy.fromfile 加载每个第 n 个元素

1 回答 1

Related

Reference