Source: object-graph-bfs-iterator.ts

/* eslint-disable no-await-in-loop */

/**
 * @author Michael Hasenstein <hasenstein@yahoo.com>
 * @copyright REFINIO GmbH 2023
 * @license CC-BY-NC-SA-2.5; portions MIT License
 * @version 0.0.1
 */

/**
 * @module
 */

import {createError} from './errors';
import type {
    IteratorCbParam,
    IteratorCbParamBlobType,
    IteratorCbParamClobType,
    IteratorCbParamIdType,
    IteratorCbParamObjType,
    ObjectGraphIteratorOptions
} from './object-graph-bottom-up-iterator';
import {
    ID_LINK_ITERATION,
    ID_LINK_ITERATION_VALUES,
    STOP_ITERATOR_ERROR
} from './object-graph-bottom-up-iterator';
import {HashLinkType} from './object-to-microdata';
import type {BLOB, CLOB, HashTypes} from './recipes';
import {getObject} from './storage-unversioned-objects';
import {getIdObject} from './storage-versioned-objects';
import {fileSize} from './system/storage-base';
import {getAllHashesForIdHashes, getLatestHashesForIdHashes} from './util/hashes-from-id';
import {findLinkedHashesInObject} from './util/object-find-links';
import {createSimpleQueue} from './util/queue';
import type {SHA256Hash, SHA256IdHash} from './util/type-checks';

interface IteratorOptions {
    cb: (param: IteratorCbParam) => void | Promise<void>;
    idLinkIteration: (typeof ID_LINK_ITERATION)[keyof typeof ID_LINK_ITERATION];
    includeFileSize: boolean;
}

interface QItemObj {
    type: typeof HashLinkType.OBJ;
    hash: SHA256Hash;
    path: ReadonlyArray<SHA256Hash | SHA256IdHash>;
}

interface QItemId {
    type: typeof HashLinkType.ID;
    hash: SHA256IdHash;
    path: ReadonlyArray<SHA256Hash | SHA256IdHash>;
}

interface QItemBlob {
    type: typeof HashLinkType.BLOB;
    hash: SHA256Hash<BLOB>;
    path: ReadonlyArray<SHA256Hash | SHA256IdHash>;
}

interface QItemClob {
    type: typeof HashLinkType.CLOB;
    hash: SHA256Hash<CLOB>;
    path: ReadonlyArray<SHA256Hash | SHA256IdHash>;
}

type QItem = QItemObj | QItemId | QItemBlob | QItemClob;

async function iterate(
    root: SHA256Hash,
    {cb, idLinkIteration, includeFileSize}: IteratorOptions
): Promise<void> {
    // This should only ever be possible at iteration start, but we place it in the recursive
    // function just in case there is anything weird in the data.
    if (root === undefined) {
        throw createError('OGI-WALK1');
    }

    if (!ID_LINK_ITERATION_VALUES.has(idLinkIteration)) {
        throw createError('OGI-WALK2', {all: ID_LINK_ITERATION_VALUES, val: idLinkIteration});
    }

    const visited = new Set<SHA256Hash<HashTypes> | SHA256IdHash>();
    const q = createSimpleQueue<Readonly<QItem>>();

    q.enqueue({
        type: HashLinkType.OBJ,
        hash: root,
        path: []
    });

    while (!q.isEmpty()) {
        const {type, hash, path} = q.dequeue();

        if (visited.has(hash)) {
            continue;
        }

        visited.add(hash);

        const size = includeFileSize ? await fileSize(hash) : undefined;

        if (type === HashLinkType.BLOB || type === HashLinkType.CLOB) {
            await cb({
                type,
                hash,
                path,
                size
            } as const as IteratorCbParamBlobType | IteratorCbParamClobType);
            continue;
        }

        // Form here: type is one of "obj" or "id"

        const obj = type === HashLinkType.OBJ ? await getObject(hash) : await getIdObject(hash);
        const links = findLinkedHashesInObject(obj);

        await cb({
            type,
            hash,
            obj,
            links,
            path,
            size
        } as const as IteratorCbParamObjType | IteratorCbParamIdType);

        for (const ref of links.references) {
            q.enqueue({
                type: HashLinkType.OBJ,
                hash: ref,
                path: [...path, hash]
            });
        }

        if (idLinkIteration === ID_LINK_ITERATION.ID_OBJECT) {
            for (const idRef of links.idReferences) {
                q.enqueue({
                    type: HashLinkType.ID,
                    hash: idRef,
                    path: [...path, hash]
                });
            }
        } else if (idLinkIteration === ID_LINK_ITERATION.LATEST_VERSION) {
            const hashesForIdReferences = await getLatestHashesForIdHashes(links.idReferences);

            for (const {hash: childHash} of hashesForIdReferences) {
                q.enqueue({
                    type: HashLinkType.OBJ,
                    hash: childHash,
                    path: [...path, hash]
                });
            }
        } else if (idLinkIteration === ID_LINK_ITERATION.ALL_VERSIONS) {
            const hashesForIdReferences = await getAllHashesForIdHashes(links.idReferences);

            for (const {hash: childHash} of hashesForIdReferences) {
                q.enqueue({
                    type: HashLinkType.OBJ,
                    hash: childHash,
                    path: [...path, hash]
                });
            }
        }

        for (const ref of links.blobs) {
            q.enqueue({
                type: HashLinkType.BLOB,
                hash: ref,
                path: [...path, hash]
            });
        }

        for (const ref of links.clobs) {
            q.enqueue({
                type: HashLinkType.CLOB,
                hash: ref,
                path: [...path, hash]
            });
        }
    }
}

export async function iterateGraphFromObjectBFS(
    hash: SHA256Hash,
    cb: (param: IteratorCbParam) => void | Promise<void>,
    {
        idLinkIteration = ID_LINK_ITERATION.ID_OBJECT,
        includeFileSize = false
    }: ObjectGraphIteratorOptions = {}
): Promise<boolean> {
    try {
        await iterate(hash, {
            cb,
            idLinkIteration,
            includeFileSize
        });
    } catch (err) {
        // If the callback function throws this particular error the iterator just stops, but
        // the overall promise status remains a success.
        if (err.name === STOP_ITERATOR_ERROR) {
            return false;
        }

        throw err;
    }

    return true;
}