Source: crdt/LWWSet.ts

// noinspection RedundantIfStatementJS

/**
 * @author Maximilian Kallert <max@refinio.net>
 * @author Eduard Reimer <eduard@refinio.net>
 * @copyright REFINIO GmbH 2020
 * @license CC-BY-NC-SA-2.5; portions MIT License
 * @version 1.0.0
 */

/**
 * This module implements the CRDT LWWSet algorithm.
 * @module
 */

import {convertValue} from '../object-to-microdata';
import type {RecipeRule} from '../recipes';
import {constructItemRuleFromListRule} from '../util/recipe-checks';
import {isListItemType, ruleHasItemType} from '../util/type-checks';
import {isInteger, isObject} from '../util/type-checks-basic';
import type {CRDTImplementation} from './CRDTImplementation';

interface MetaListEntry {
    timestamp: number;
    value: unknown;
}

type MetaListEntryWithHash = MetaListEntry & {
    hash: string;
};

/**
 * The structure of the metadata of this crdt.
 */
interface LWWSetMetaData {
    addList: MetaListEntry[];
    removeList: MetaListEntry[];
}

// Sort the arrays by timestamp and content
function comparator(a: MetaListEntryWithHash, b: MetaListEntryWithHash): number {
    const value = a.timestamp - b.timestamp;

    if (value === 0) {
        if (a.hash > b.hash) {
            return 1;
        } else if (a.hash < b.hash) {
            return -1;
        } else {
            return 0;
        }
    }

    return value;
}

/**
 * Function that verifies the array types of the metadata.
 * @param {unknown} data - The data to check for compatibility.
 * @returns {boolean}
 */
function isLWWSetMetadataFastCheck(data: unknown): data is LWWSetMetaData {
    if (!isObject(data)) {
        return false;
    }

    if (!data || !Array.isArray(data.addList) || !Array.isArray(data.removeList)) {
        return false;
    }

    return true;
}

/**
 * Function that verifies that the metadata has the correct format, including each array element.
 *
 * @param {any} data - The data to check for compatibility.
 * @returns {boolean}
 */
function isLWWSetMetadata(data: unknown): data is LWWSetMetaData {
    if (!isLWWSetMetadataFastCheck(data)) {
        return false;
    }

    if (!data.addList.every(addEntry => isInteger(addEntry.timestamp))) {
        return false;
    }

    if (!data.removeList.every(removeEntry => isInteger(removeEntry.timestamp))) {
        return false;
    }

    return true;
}

/**
 * Implementation of the last writer wins set (LWW-Set)
 *
 * See paper:
 * A comprehensive study of Convergent and Commutative Replicated Data Types
 * Marc Shapiro, Nuno PreguiƧa, Carlos Baquero, Marek Zawirski
 * p. 24, Section 3.3.3
 * https://hal.inria.fr/inria-00555588/document
 */
export class LWWSet implements CRDTImplementation {
    private static computeAliveElements(
        addList: MetaListEntryWithHash[],
        removeList: MetaListEntryWithHash[]
    ): unknown[] {
        // This maps tha hash to elements.
        const map = new Map<string, MetaListEntry>();

        // Fill the map with all added values.
        for (const elem of addList) {
            map.set(elem.hash, {
                timestamp: elem.timestamp,
                value: elem.value
            });
        }

        // Remove the elements that have newer remove elements
        for (const elem of removeList) {
            const latestAdd = map.get(elem.hash);

            if (latestAdd && elem.timestamp > latestAdd.timestamp) {
                map.delete(elem.hash);
            }
        }

        return [...map.values()].map(e => e.value);
    }

    /**
     * Merge sort and remove duplicates.
     * Unique means that the timestamp and the hash are the same.
     * @param {MetaListEntryWithHash[]} list1
     * @param {MetaListEntryWithHash[]} list2
     * @returns {MetaListEntryWithHash[]}
     * @private
     */
    private static mergeSortDeduplicateList(
        list1: MetaListEntryWithHash[],
        list2: MetaListEntryWithHash[]
    ): MetaListEntryWithHash[] {
        // Merge and transform the arrays
        const mergedList = [...list1, ...list2];

        mergedList.sort(comparator);

        // Remove duplicates
        return mergedList.filter((elem, index) => {
            // Accept the element if it is the last element.
            if (index + 1 >= mergedList.length) {
                return true;
            }

            // Otherwise compare it with the next and skip if it is the same
            return comparator(elem, mergedList[index + 1]) !== 0;
        });
    }

    /**
     * Adds a hash to all entries that can used to identify equal content.
     *
     * Note: At the moment this isn't really a hash, just the stringified content
     *       because hashing is expensive and asynchronous on browsers. Perhaps
     *       we should rename it. Hashing is CPU
     *
     * @param {MetaListEntry[]} list - List for which to add the hash
     * @param {Readonly<RecipeRule>} valueRule - Rule used for serialization
     * @returns {MetaListEntryWithHash[]}
     * @private
     */
    private static addHash(
        list: MetaListEntry[],
        valueRule: Readonly<RecipeRule>
    ): MetaListEntryWithHash[] {
        return list.map(a => ({
            timestamp: a.timestamp,
            value: a.value,
            hash: convertValue(valueRule, a.value)
        }));
    }

    /**
     * Removes the hash that was added with addHash
     *
     * @param {MetaListEntryWithHash[]} list - The list from which to remove the hashes.
     * @returns {MetaListEntry[]}
     * @private
     */
    private static removeHash(list: MetaListEntryWithHash[]): MetaListEntry[] {
        return list.map(a => ({
            timestamp: a.timestamp,
            value: a.value
        }));
    }

    generateRecipeRules(rule: RecipeRule, path: string): RecipeRule[] {
        if (ruleHasItemType(rule) && !isListItemType(rule.itemtype)) {
            throw new Error(
                `generateRecipeRules: You cannot use the LWWSet implementation if the data type is
                not a set: ${rule}`
            );
        }

        return [
            {
                itemprop: 'addList',
                itemtype: {
                    type: 'bag',
                    item: {
                        type: 'object',
                        rules: [
                            {
                                itemprop: 'timestamp',
                                itemtype: {type: 'number'}
                            },
                            {
                                itemprop: 'value',
                                inheritFrom: {rule: path, extract: 'CollectionItemType'}
                            }
                        ]
                    }
                }
            },
            {
                itemprop: 'removeList',
                itemtype: {
                    type: 'bag',
                    item: {
                        type: 'object',
                        rules: [
                            {
                                itemprop: 'timestamp',
                                itemtype: {type: 'number'}
                            },
                            {
                                itemprop: 'value',
                                inheritFrom: {rule: path, extract: 'CollectionItemType'}
                            }
                        ]
                    }
                }
            }
        ];
    }

    async generateMetaData(
        dataOld: unknown,
        dataNew: unknown,
        metadataOld: unknown,
        rule: Readonly<RecipeRule>
    ): Promise<LWWSetMetaData> {
        // #### STEP 1: Sanity checks and rule adaption ####

        // If data old is undefined it means, that we don't have any data in it, it is
        // equivalent to an empty array.
        if (dataOld === undefined) {
            // eslint-disable-next-line no-param-reassign
            dataOld = [];

            if (metadataOld === undefined) {
                // eslint-disable-next-line no-param-reassign
                metadataOld = {
                    addList: [],
                    removeList: []
                };
            }
        }

        if (!isLWWSetMetadata(metadataOld)) {
            throw new Error(`Old medatata has invalid format: ${metadataOld}`);
        }

        if (!Array.isArray(dataOld)) {
            throw new Error(`The LWWSet needs dataOld to be of type array ${dataOld}`);
        }

        if (!Array.isArray(dataNew)) {
            throw new Error(`The LWWSet needs dataNew to be of type array ${dataNew}`);
        }

        // Create maps that have a unique identifier based on the content as key
        // and the value itself as value. They are used to determine added / removed elements

        // #### STEP 2: Convert to 'string content' -> 'js object content' ####

        const ruleNoList = constructItemRuleFromListRule(rule);

        // Leads to: [[string, content], ...]
        const dataOldSerialized: Array<[string, any]> = dataOld.map(elem => [
            convertValue(ruleNoList, elem),
            elem
        ]);
        const dataNewSerialized: Array<[string, any]> = dataNew.map(elem => [
            convertValue(ruleNoList, elem),
            elem
        ]);

        // Leads to a map: string -> content
        const dataOldMap = new Map(dataOldSerialized);
        const dataNewMap = new Map(dataNewSerialized);

        // #### STEP 3: Find elements that were added / removed and add this information ####

        // Fill the output maps based on the calculated differences
        const now = Date.now();
        const metadataNew: LWWSetMetaData = {
            addList: [...metadataOld.addList],
            removeList: [...metadataOld.removeList]
        };

        // Add added elements to addList
        for (const addedElement of dataNewSerialized.filter(elem => !dataOldMap.has(elem[0]))) {
            metadataNew.addList.push({
                timestamp: now,
                value: addedElement[1]
            });
        }

        // Add deleted elements to removeList
        for (const deletedElement of dataOldSerialized.filter(elem => !dataNewMap.has(elem[0]))) {
            metadataNew.removeList.push({
                timestamp: now,
                value: deletedElement[1]
            });
        }

        return metadataNew;
    }

    async mergeMetaData(
        objLatestVersion: unknown,
        metadataLatestVersion: unknown,
        objToMerge: unknown,
        metadataToMerge: unknown,
        rule: Readonly<RecipeRule>
    ): Promise<{metadata: LWWSetMetaData; data: unknown}> {
        // #### STEP 1: Sanity checks and rule adaption ####

        // If data old is undefined it means, that we don't have any data in it, it is
        // equivalent to an empty array.
        if (objLatestVersion === undefined) {
            // eslint-disable-next-line no-param-reassign
            objLatestVersion = [];

            if (metadataLatestVersion === undefined) {
                // eslint-disable-next-line no-param-reassign
                metadataLatestVersion = {
                    addList: [],
                    removeList: []
                };
            }
        }

        if (!isLWWSetMetadata(metadataLatestVersion)) {
            throw new Error(`Old medatata has invalid format: ${metadataLatestVersion}`);
        }

        if (!isLWWSetMetadata(metadataToMerge)) {
            throw new Error(`Merge metadata has invalid format: ${metadataLatestVersion}`);
        }

        if (!Array.isArray(objLatestVersion)) {
            throw new Error(`The LWWSet needs dataOld to be of type array: ${objLatestVersion}`);
        }

        if (!Array.isArray(objToMerge)) {
            throw new Error(`The LWWSet needs dataMerge to be of type array: ${objToMerge}`);
        }

        const ruleNoList = constructItemRuleFromListRule(rule);

        // #### STEP 2: Calculate new metadata by merging the old ones ####
        const mergedAddList = LWWSet.mergeSortDeduplicateList(
            LWWSet.addHash(metadataLatestVersion.addList, ruleNoList),
            LWWSet.addHash(metadataToMerge.addList, ruleNoList)
        );
        const mergedRemoveList = LWWSet.mergeSortDeduplicateList(
            LWWSet.addHash(metadataLatestVersion.removeList, ruleNoList),
            LWWSet.addHash(metadataToMerge.removeList, ruleNoList)
        );

        // #### STEP 3: Generate the new data object ####
        const data = LWWSet.computeAliveElements(mergedAddList, mergedRemoveList);

        // #### STEP 4: return metadata and data ####
        return {
            metadata: {
                addList: LWWSet.removeHash(mergedAddList),
                removeList: LWWSet.removeHash(mergedRemoveList)
            },
            data: data
        };
    }
}