Source: version-map-updater.ts

/**
 * @author Michael Hasenstein <hasenstein@yahoo.com>
 * @copyright REFINIO GmbH 2017
 * @license CC-BY-NC-SA-2.5; portions MIT License
 * @version 0.0.1
 */

/**
 * **This module provides functions to other ONE core modules. It should not be used by
 * applications.**
 *
 * VersionMaps are key/value maps of SHA-256 hashes. Entries are separated by ';' and key and
 * value are separated by ','.
 *
 * VersionMaps are stored under the ID hash of the object whose versions they describe.
 *
 * VersionMaps are ordered if the versions are in time (the very last entry always points to the
 * one and only current version)* , it can be considered unordered if the versions are in space
 * (all versions are current).
 *
 * Also see module map-query
 *
 * ## What is a Version Map
 *
 * Version maps are used to make ONE objects that have properties marked with `isId: true` in
 * their recipe into versioned objects. They are stored under the ID hash of the object and contain
 * newline-separated lines of "Timestamp,Object-Hash\n" as values
 *
 * To find a particular version of an object one provides an ID object, from which an ID hash is
 * calculated, or an ID hash directly, which leads to the version map. The very last entry in
 * the map is the most recently created version of the object.
 *
 * ## Version Map Updates
 *
 * Version map updates are always performed if the object is new. There are two cases: 1) There
 * is no prior version of this object (create new version map), or 2) a prior version exists but
 * this version is new. In both cases version map update policy options are ignored. This means
 * that every version will have at least the version map entry for its original creation.
 *
 * **Version map updates always consist of adding entries**, no previous entries are ever
 * changed, moved or even removed.
 *
 * ## Versions in Time or in Space
 *
 * The version map in ONE can be interpreted by an application in either of two ways (but not
 * both simultaneously):
 *
 * - **Versions in time** means there is exactly one "current" version and all others are
 *   just "historical record".
 *
 * - **Versions in space** means that all versions are equally valid (and current).
 *
 * Applications have to decide which of those two meanings they want for any one versioned ONE
 * object. You cannot have an object with versions in both time and space unless the application
 * provides that interpretation itself using additional meta-data (for example an additional
 * ONE object versioned (in time) with a list of versions (in space) that are considered
 * "current"). We cannot have a situation where the 4th and 6th version (by version map
 * position) are "current" and the other versions are "history".
 *
 * If the versions are in space the positions of the values inside the map don't matter, the set
 * of unique values (object hashes) are all equal even if the positions provide information about
 * the order in which the versions were created.
 *
 * If the versions are in time only the very last position describes the currently valid object,
 * all other positions describe previous versions that are no longer valid. In this case we
 * probably don't want an entry for the creation of an already existing object, but one that is
 * not "current", from skipping the version map update. We probably want that object to become
 * the current one. This means we have to append a new version map entry for the object, but we
 * can still skip it if the object already is the value in the last (current) position.
 *
 * ## Available options
 *
 * The implicit condition is that the object already exists or these options are not even
 * considered.
 *
 * The first option is the simplest one:
 *
 * 1. **Skip the version map update &mdash; no additional checks**
 *
 *    This option leads to only the very first creation of any object having an entry in the
 *    version map.
 *
 *    The object hash exists as a value in the version map regardless of position.
 *
 *    This option may not be satisfactory for applications if the version map is used for versions
 *    in time and not versions in space because after creation of an object that can already be
 *    found on disk it is not made the "current" version unless its last version map entry (there
 *    is at least the one from when it was first created) already is at the very end of the version
 *    map file.
 *
 * The other options require that we read the existing version map and perform checks on it.
 * Both options are orthogonal, they serve completely different purposes, and you can use
 * each one alone or combine them.
 *
 * 2. **Skip version map update if the object already is the latest version**
 *
 *    This option is for versioned objects that are versions in time, where there is only one
 *    valid (current) version of the object, and its hash is always found at the very last
 *    position in the version map.
 *
 *    This option looks at the *value* in the key/value pair entry in the version map. It
 *    checks if the object hash exists as a value in the very last position in the version map.
 *
 * ## Cost
 *
 * Version map updates without any options are performed by using an `append` operation on
 * systems where it is available (node.js and react-native), i.e. no reading of the version map
 * file is required for an unconditional update. If you add an option to skip the version map
 * update except for the most basic one *the costs in terms of I/O (and also CPU) will
 * increase* because we have to read the current version map and examine its contents.
 *
 * An even lower cost than an unconditional update is the first option though because we don't
 * need to look at the version map at all. We know if the object already exists after saving the
 * object (or trying to) and with the first option can stop right there. This will generate the
 * lowest cost in terms of CPU, I/O and also space because it requires the least processing,
 * file operations and also the least amount of space since it writes the least amount into
 * version map files.
 *
 * @private
 * @module
 */

/**
 * The return value of version map updater module's function {@link createVersionMapOrAppendEntry}.
 * @private
 * @typedef {object} CreateVersionMapEntryResult
 * @property {FileCreationStatus} status
 * @property {number} timestamp
 */
export interface CreateVersionMapEntryResult {
    status: FileCreationStatus;
    timestamp: number;
}

/**
 * Result of listing all versions of a versioned ONE object. The version map that describes all
 * versions of a ONE object contains the millisecond timestamp of when the version map was
 * updated, and the hash of the actual object. Using the position index you can then ask for a
 * particular version of the versioned ONE object.
 * @global
 * @typedef {object} VersionMapEntry
 * @property {number} timestamp
 * @property {SHA256Hash} hash
 */
export interface VersionMapEntry<T extends OneVersionedObjectTypes = OneVersionedObjectTypes> {
    timestamp: number;
    hash: SHA256Hash<T>;
}

/**
 * These are the internal values for the enum `VERSION_UPDATES` defined in and exported by
 * storage-base-common. **Never use the numeric values,** always use the symbols in
 * `VERSION_UPDATES` exported by module `storage-base-common` and re-exported by the public
 * storage API module `storage`.
 * @global
 * @see {@link VERSION_UPDATES}
 * @typedef {(1|2|3|4|5)} VersionUpdatePolicyValues
 */
export type VersionUpdatePolicyValues = 1 | 2 | 3 | 4 | 5;

import {createError} from './errors';
import type {OneVersionedObjectTypes} from './recipes';
import type {FileCreationStatus} from './storage-base-common';
import {CREATION_STATUS, STORAGE} from './storage-base-common';
import {appendUTF8SystemMapFile} from './system/storage-base';
import {serializeWithType} from './util/promise';
import type {SHA256Hash, SHA256IdHash} from './util/type-checks';
import {looksLikeHash} from './util/type-checks';
import {getNthVersionMapEntry, getTimestampForHash} from './version-map-query';

/**
 * This structure provides meaningful names for a set of internal constants used to determine
 * the version map update policy when creating versioned objects.
 *
 * **Always use the names (keys) on this structure, never use the values themselves!**
 *
 * Enum-symbols for internally used numbers that indicate the desired version map policy for
 * version map updates after object creation.
 *
 * **Never use the numeric values,** always use the symbol, i.e. the key in `VERSION_UPDATES`
 * exported from here.
 *
 * ```
 * {
 *   ALWAYS: 1,
 *   NONE: 2,
 *   NONE_IF_LATEST: 3,
 * }
 * ```
 * @static
 * @type {object}
 * @property {1} ALWAYS
 * @property {2} NONE
 * @property {3} NONE_IF_LATEST
 */
export const VERSION_UPDATES = {
    ALWAYS: 1,
    NONE: 2,
    NONE_IF_LATEST: 3
} as const;

/**
 * (Millisecond) Timestamps converted to strings can be of variable length. We left-pad such
 * timestamp strings with "0" to get a fixed length for easier parsing.
 *
 * Record a fixed-string-length timestamp showing when the version map was updated. The
 * normal length is 13 string characters, we use a fixed length for easier parsing, and
 * 16 characters will be enough until ca. the year 275760, assuming a
 * milliseconds-since-1970 timestamp as we have now
 * (https://stackoverflow.com/a/11526569/544779).
 * @type {number}
 */
export const TIMESTAMP_LENGTH = 16;

/**
 * The "append" and "remove" functions have these three parameters in common. Deliberately not a
 * complete check (no regex test of the hashes, only test those three parameters). The purpose
 * only is to A have a minimum guard against programmer error, but B not waste CPU on regex
 * checks since this is only called from other ONE code, so it is pretty static and errors
 * unlikely, c) these are the core parameters with meta-data ending up in critical system files
 * @private
 * @param {SHA256IdHash} idHash - The filename of the version map, which is an ID hash
 * @param {SHA256Hash} objectHash
 * @returns {undefined} Returns nothing
 * @throws {Error} Throws an `Error` if a parameter is missing or wrong
 */
function checkHashParameters(idHash: SHA256IdHash, objectHash: SHA256Hash): void {
    if (!looksLikeHash(idHash) || !looksLikeHash(objectHash)) {
        throw createError('VMU-CP1', {idHash, objectHash});
    }
}

/**
 * This is a method to append to internal version maps.
 * @static
 * @async
 * @param {SHA256IdHash} idHash - The filename of the version map to append to, which should be an
 * ID hash
 * @param {SHA256Hash} objectHash - The hash of the object itself
 * @param {FileCreationStatus} status
 * @param {VersionUpdatePolicyValues} [versionMapUpdatePolicy=VERSION_UPDATES.ALWAYS]
 * @returns {Promise<CreateVersionMapEntryResult>} Returns a {@link CreateVersionMapEntryResult}
 * object.
 * @throws {Error} Throws an `Error` if any of the three required parameters are missing
 */
export function createVersionMapOrAppendEntry(
    idHash: SHA256IdHash,
    objectHash: SHA256Hash<OneVersionedObjectTypes>,
    status: FileCreationStatus,
    versionMapUpdatePolicy: VersionUpdatePolicyValues = VERSION_UPDATES.ALWAYS
): Promise<CreateVersionMapEntryResult> {
    async function addEntry(): Promise<CreateVersionMapEntryResult> {
        // To ensure every object has at least one entry for its creation in the version map
        // (otherwise we would not be able to find it using the ID object): Overwrite the
        // version map policy and set to ALWAYS if the object is new
        const policy =
            status === CREATION_STATUS.NEW ? VERSION_UPDATES.ALWAYS : versionMapUpdatePolicy;

        if (policy === VERSION_UPDATES.NONE) {
            const timestamp = await getTimestampForHash(idHash, objectHash);

            // If we get here we know that the object already existed when it was attempted to
            // be written, and therefore there is at least the entry of its first creation in
            // the map, because when the object is new the policy is set to "ALWAYS", so at least
            // one entry is created for all hashes under the ID.
            if (timestamp === undefined) {
                throw createError('VMU-CAVM1', {idHash, objectHash, status});
            }

            return {status: CREATION_STATUS.EXISTS, timestamp};
        }

        checkHashParameters(idHash, objectHash);

        const timestampNumeric = Date.now();
        const timestampString = timestampNumeric.toString().padStart(TIMESTAMP_LENGTH, '0');

        // Every character counts including the final newline. When we query this file we use
        // the fixed hash string lengths and the knowledge that the last character of the file
        // is a newline.
        const newEntry = timestampString + ',' + objectHash + '\n';

        // The fast path and an early exit: No reading of the file, and we append instead of
        // writing the whole file. This also creates the file if it does not exist yet.
        // "Always" is the default policy when none is set, or when a new object is created.
        if (policy === VERSION_UPDATES.ALWAYS) {
            const vmapStatus = await appendUTF8SystemMapFile(newEntry, idHash, STORAGE.VMAPS);
            return {status: vmapStatus, timestamp: timestampNumeric};
        }

        // ==============================================
        // VERSION MAP DEFINITELY EXISTS BELOW THIS POINT
        // ==============================================

        // We only get here when the version map already exists! That's because if after writing
        // the object we have a status of "new" for the object file the version map policy is
        // set to ALWAYS. (If on the other hand the object's creation status was "exists" then
        // the version map already exists too.)

        if (policy === VERSION_UPDATES.NONE_IF_LATEST) {
            const lastEntry = await getNthVersionMapEntry(idHash);

            if (lastEntry.hash === objectHash) {
                return {status: CREATION_STATUS.EXISTS, timestamp: lastEntry.timestamp};
            } else {
                await appendUTF8SystemMapFile(newEntry, idHash, STORAGE.VMAPS);
                return {status: CREATION_STATUS.EXISTS, timestamp: timestampNumeric};
            }
        }

        await appendUTF8SystemMapFile(newEntry, idHash, STORAGE.VMAPS);

        return {status: CREATION_STATUS.EXISTS, timestamp: timestampNumeric};
    }

    return serializeWithType(
        // It is sufficient to serialize only this operation, interference with versioned-object
        // creation or retrieval does not matter. So we only guard against "lost writes": If
        // there were two createVersionMapOrAppendEntry() calls one might load the file, then
        // the other, then one writes the new file, then the other. We can also limit
        // serialization to operations on one and the same map file.
        'VersionMap ' + idHash,
        addEntry
    );
}