Source: util/recipe-checks.ts

/**
 * @author Michael Hasenstein <hasenstein@yahoo.com>
 * @copyright REFINIO GmbH 2018
 * @license CC-BY-NC-SA-2.5; portions MIT License
 * @version 0.0.1
 */

/**
 * A module with functions that perform runtime type checks. There are very simple checks such
 * as `isString()`, there also are complex checks.
 * @module
 */

/**
 * Type of values in a dynamically created Map of possible rule property names as keys and the
 * properties of those properties. This is used to check RecipeRule objects in Recipe objects if
 * they adhere to the rules for the respective object type. Creating checks dynamically based on
 * those rules is better than hard-coding everything, in case of changes.
 * @private
 * @typedef {object} CachedRuleProperties
 * @property {ValueType} valueType
 * @property {boolean} optional Whether a rule property is optional
 */
interface CachedRuleProperties {
    valueType: ValueType['type'];
    optional: boolean;
}

import {createError} from '../errors';
import type {
    ArrayValue,
    BagValue,
    IntegerValue,
    MapValue,
    NumberValue,
    ObjectValue,
    Recipe,
    RecipeRule,
    ReferenceToIdValue,
    ReferenceToObjValue,
    RuleInheritanceWithOptions,
    SetValue,
    StringValue,
    ValueType
} from '../recipes';
import {CORE_RECIPES} from '../recipes';
import {stringifyWithCircles} from './sorted-stringify';
import {isListItemType, ruleHasItemType} from './type-checks';
import {getObjTypeName, isInteger, isNumber, isObject, isString} from './type-checks-basic';

/**
 * @private
 * @param {ValueType} typeObj
 * @param {number} index
 * @returns {undefined}
 */
function ensureString(typeObj: ValueType, index: number): void {
    // This function is called when type is "string"
    const castedStringValue = typeObj as StringValue;

    // Revive the RegExp (if this was loaded from microdata it as stringified)
    if (
        castedStringValue.regexp !== undefined &&
        getObjTypeName(castedStringValue.regexp) !== 'RegExp'
    ) {
        castedStringValue.regexp = new RegExp(castedStringValue.regexp);
    }

    if (
        castedStringValue.regexp !== undefined &&
        getObjTypeName(castedStringValue.regexp) !== 'RegExp'
    ) {
        throw createError('URC-ERECI22', {index, castedStringValue});
    }
}

/**
 * @private
 * @param {ValueType} typeObj
 * @param {number} index
 * @returns {undefined}
 */
function ensureInteger(typeObj: ValueType, index: number): void {
    // This function is called when type is "integer"
    const castedIntengerValue = typeObj as IntegerValue;

    if (
        castedIntengerValue.max !== undefined &&
        castedIntengerValue.min !== undefined &&
        !(castedIntengerValue.max > castedIntengerValue.min)
    ) {
        throw createError('URC-ERECI30-A', {index, thing: castedIntengerValue});
    }

    if (
        !(castedIntengerValue.max === undefined || isInteger(castedIntengerValue.max)) ||
        !(castedIntengerValue.min === undefined || isInteger(castedIntengerValue.min))
    ) {
        throw createError('URC-ERECI30-B', {index, thing: castedIntengerValue});
    }
}

/**
 * @private
 * @param {ValueType} typeObj
 * @param {number} index
 * @returns {undefined}
 */
function ensureNumber(typeObj: ValueType, index: number): void {
    // This function is called when type is "number"
    const castedNumberValue = typeObj as NumberValue;

    if (
        castedNumberValue.max !== undefined &&
        castedNumberValue.min !== undefined &&
        !(castedNumberValue.max > castedNumberValue.min)
    ) {
        throw createError('URC-ERECI30-C', {index, thing: castedNumberValue});
    }

    if (
        !(castedNumberValue.max === undefined || isNumber(castedNumberValue.max)) ||
        !(castedNumberValue.min === undefined || isNumber(castedNumberValue.min))
    ) {
        throw createError('URC-ERECI30-D', {index, thing: castedNumberValue});
    }
}

/**
 * @private
 * @param {ValueType} typeObj
 * @param {number} index
 * @returns {undefined}
 */
function ensureReferenceToObj(typeObj: ValueType, index: number): void {
    // This function is called when type is "referenceToObj"
    const obj = typeObj as ReferenceToObjValue;

    // REVIVER HACK
    // because type is now an object, set is transformed into an array when the
    // recipe is converted to microdata
    if (Array.isArray(obj.allowedTypes)) {
        obj.allowedTypes = new Set(obj.allowedTypes);
    }

    if (
        !(obj.allowedTypes instanceof Set) ||
        // Should be <OneObjectTypeNames | '*'> but we only test "string" to enable
        // "lazy recipe registration", when a type is mentioned that is not yet
        // registered
        !Array.from(obj.allowedTypes).every(type => isString(type))
    ) {
        throw createError('URC-ERECI14', {
            index: index,
            ref: obj.allowedTypes,
            oName: getObjTypeName(obj.allowedTypes)
        });
    }
}

/**
 * @private
 * @param {ValueType} typeObj
 * @param {number} index
 * @returns {undefined}
 */
function ensureReferenceToId(typeObj: ValueType, index: number): void {
    // This function is called when type is "referenceToId"
    const obj = typeObj as ReferenceToIdValue;

    // REVIVER HACK
    // because type is now an object, set is transformed into an array when the
    // recipe is converted to microdata
    if (Array.isArray(obj.allowedTypes)) {
        obj.allowedTypes = new Set(obj.allowedTypes);
    }

    if (
        !(obj.allowedTypes instanceof Set) ||
        // Should be <OneObjectTypeNames | '*'> but we only test "string" to enable
        // "lazy recipe registration", when a type is mentioned that is not yet
        // registered
        !Array.from(obj.allowedTypes).every(type => isString(type))
    ) {
        throw createError('URC-ERECI14', {
            index: index,
            ref: obj.allowedTypes,
            oName: getObjTypeName(obj.allowedTypes)
        });
    }
}

/**
 * Map object keys are not allowed to be complex (i.e. an object type). In Javascript those
 * would be based on being pointers to objects in memory, but - if we used them - they would be
 * saved as JSON string, making them content-based. That would make it impossible to reliably
 * reproduce the original situation in memory.
 * @type {Set<string>}
 */
const BANNED_MAP_KEY_TYPES = new Set(['array', 'bag', 'map', 'object', 'set', 'stringifiable']);

/**
 * @private
 * @param {ValueType} typeObj
 * @param {number} index
 * @returns {undefined}
 */
function ensureMap(typeObj: ValueType, index: number): void {
    // This function is called when type is "map"
    const castedMapValue = typeObj as MapValue;

    if (castedMapValue.key === undefined) {
        throw createError('URC-ERECI31', {
            index: index,
            map: castedMapValue
        });
    }

    if (castedMapValue.value === undefined) {
        throw createError('URC-ERECI31', {
            index: index,
            map: castedMapValue
        });
    }

    if (BANNED_MAP_KEY_TYPES.has(castedMapValue.key.type)) {
        throw createError('URC-ERECI31', {
            index: index,
            type: castedMapValue.key.type
        });
    }

    checkItemTypeField(castedMapValue.key, index);
    checkItemTypeField(castedMapValue.value, index);
}

/**
 * @private
 * @param {ValueType} typeObj
 * @param {number} index
 * @returns {undefined}
 */
function ensureBag(typeObj: ValueType, index: number): void {
    // This function is called when type is "bag"
    const castedBagValue = typeObj as BagValue;

    if (castedBagValue.item === undefined) {
        throw createError('URC-ERECI31', {
            index: index,
            list: castedBagValue
        });
    }

    checkItemTypeField(castedBagValue.item, index);
}

/**
 * @private
 * @param {ValueType} typeObj
 * @param {number} index
 * @returns {undefined}
 */
function ensureArray(typeObj: ValueType, index: number): void {
    // This function is called when type is "array"
    const castedArrayValue = typeObj as ArrayValue;

    if (castedArrayValue.item === undefined) {
        throw createError('URC-ERECI31', {
            index: index,
            list: castedArrayValue
        });
    }

    checkItemTypeField(castedArrayValue.item, index);
}

/**
 * @private
 * @param {ValueType} typeObj
 * @param {number} index
 * @returns {undefined}
 */
function ensureSet(typeObj: ValueType, index: number): void {
    // This function is called when type is "set"
    const castedSetValue = typeObj as SetValue;

    if (castedSetValue.item === undefined) {
        throw createError('URC-ERECI31', {
            index: index,
            list: castedSetValue
        });
    }

    checkItemTypeField(castedSetValue.item, index);
}

/**
 * @private
 * @param {ValueType} typeObj
 * @param {number} index
 * @returns {undefined}
 */
function ensureObject(typeObj: ValueType, index: number): void {
    // This function is called when type is "object"
    const castedObjectValue = typeObj as ObjectValue;
    castedObjectValue.rules.forEach(rule => ensureRecipeRule(typeObj.type, rule, index));
}

/**
 * Type ensurer for no option value types with only a "type" property.
 * - boolean
 * - referenceToBlob
 * - referenceToClob
 * - stringifiable
 * @private
 * @param {ValueType} typeObj
 * @param {number} index
 * @returns {undefined}
 */
function ensureOptionFreeType(typeObj: ValueType, index: number): void {
    if (
        Object.keys(typeObj).length !== 1 ||
        !Object.prototype.hasOwnProperty.call(typeObj, 'type')
    ) {
        throw createError('URC-ERECI33', {
            index,
            type: typeObj.type
        });
    }
}

/**
 * This map contains ensurer functions for every type in the {@link RecipeRule.itemtype}
 * @type {Map<string, function(ValueType):void>}
 */
const ITEM_TYPE_CHECKER = new Map([
    ['string', ensureString],
    ['integer', ensureInteger],
    ['number', ensureNumber],
    ['boolean', ensureOptionFreeType],
    ['referenceToObj', ensureReferenceToObj],
    ['referenceToId', ensureReferenceToId],
    ['referenceToClob', ensureOptionFreeType],
    ['referenceToBlob', ensureOptionFreeType],
    ['map', ensureMap],
    ['bag', ensureBag],
    ['array', ensureArray],
    ['set', ensureSet],
    ['object', ensureObject],
    ['stringifiable', ensureOptionFreeType]
]);

/**
 * Runs checks on the {@link RecipeRule.itemtype} field
 * @param {ValueType} typeObj
 * @param {number} index
 */
function checkItemTypeField(typeObj: ValueType, index: number): void {
    const itemTypeChecker = ITEM_TYPE_CHECKER.get(typeObj.type);

    if (itemTypeChecker === undefined) {
        throw createError('URC-ERECI19', {
            index,
            jsTypeString: Array.from(ITEM_TYPE_CHECKER.keys()),
            valueType: typeObj.type
        });
    }

    itemTypeChecker(typeObj, index);
}

/**
 *
 * @param {Record<string, unknown>} thing
 * @param {string} originalKey
 * @param {unknown[]} matches
 * @returns {unknown[]}
 */
function collectRulesInItemtype(
    thing: Record<string, unknown>,
    originalKey: string,
    matches: unknown[] = []
): unknown[] {
    if (thing !== null) {
        if (Array.isArray(thing)) {
            for (const arrayItem of thing) {
                collectRulesInItemtype(arrayItem, originalKey, matches);
            }
        } else if (typeof thing === 'object') {
            for (const key of Object.keys(thing)) {
                if (key === originalKey) {
                    matches.push(thing);
                } else {
                    collectRulesInItemtype(
                        thing[key] as Record<string, unknown>,
                        originalKey,
                        matches
                    );
                }
            }
        }
    }

    return matches;
}

/**
 * A dual-use map of all property names that can be used in a RecipeRule object.
 * 1. Existence of the key is used to determine whether a given new RecipeRule only uses
 *    valid properties
 * 2. The two cached properties are used to determine if the RecipeRule definition for a new ONE
 *    object property has all required rule properties, and that they are of the expected type
 *
 * The only property every RecipeRule object *must* have is `itemprop`. Without anything else
 * this describes a string property. All other RecipeRule properties allow further customization
 * of the ONE object property they describe.
 *
 * @example
 *
 * Map {
 *      'itemprop' => { valueType: 'string', optional: false },
 *      'optional' => { valueType: 'boolean', optional: true },
 *      'isId' => { valueType: 'boolean', optional: true },
 *      'type' => { valueType: 'stringifiable', optional: true },
 *      'inheritFrom' => { valueType: 'stringifiable', optional: true },
 *      'object' => { valueType: 'string', optional: true }
 * }
 * @private
 * @type {Map<string,CachedRuleProperties>}
 */
const RECIPE_RULE_PROPERTIES: Map<string, CachedRuleProperties> = (() => {
    // Dynamic creation of runtime constants for the RecipeRule type-checker: Try to infer the
    // properties dynamically from the recipe instead of having to hard-code them into the
    // function's code.
    const recipeRecipeObj = CORE_RECIPES.find(r => r.name === 'Recipe');

    // For TS: We know we hard-coded that recipe in core-types.js - and if it is not there a
    // crash further down is okay anyway.
    if (recipeRecipeObj === undefined) {
        throw createError('URC-RRP1');
    }

    // "Meta": Recipes are themselves described in a "Recipe" object. It has a property called
    // "rule" that describes an array of nested objects - the RecipeRule rules at the heart of each
    // recipe.
    const recipeRulesRule = recipeRecipeObj.rule.find(rule => rule.itemprop === 'rule');

    if (recipeRulesRule === undefined) {
        throw createError('URC-RRP1');
    }

    if (recipeRulesRule.itemtype === undefined) {
        throw createError('URC-RRP3');
    }

    if (!('item' in recipeRulesRule.itemtype)) {
        throw createError('URC-RRP4');
    }

    if (recipeRulesRule.itemtype.item.type !== 'object') {
        throw createError('URC-RRP4');
    }

    if (!('rules' in recipeRulesRule.itemtype.item)) {
        throw createError('URC-RRP4');
    }

    return new Map(
        recipeRulesRule.itemtype.item.rules.map(rule => [
            rule.itemprop,
            {
                valueType: rule.itemtype ? rule.itemtype.type : 'string',
                optional: rule.optional === true // Be explicit to convert undefined to true
            }
        ])
    );
})(); // IIFE

/**
 * @param {*} thing - An argument that can be of any type
 * @returns {boolean} True if the argument is a `RuleInheritanceWithOptions` object, false if not
 */
export function isRuleInheritanceWithOptions(thing: unknown): thing is RuleInheritanceWithOptions {
    if (!isObject(thing)) {
        return false;
    }

    if (!isString(thing.rule)) {
        return false;
    }

    if (thing.rule.split('.').length < 2) {
        return false;
    }

    if (!isString(thing.extract)) {
        return false;
    }

    // noinspection RedundantIfStatementJS
    if (!['MapItemType', 'CollectionItemType'].includes(thing.extract)) {
        return false;
    }

    return true;
}

/**
 * This is a check of rules that cannot be done in ensureRecipeRule() because it requires
 * knowledge of all rules simultaneously.
 *
 * ONE object property names are stored in microdata attribute "itemprop" and set in the
 * {@link RecipeRule} property with the same name. On the same level there may be no duplication
 * of "itemprop" names, but duplication between different nested objects and different nesting
 * levels is allowed. An example is the itemprop "name", which is a top level property in
 * {@link Recipe} objects but {@RecipeRule} objects nested inside may have a "name" property
 * too (the top level name is for the recipe, the ones inside rule objects are for the rule they
 * are in).
 * @private
 * @param {RecipeRule[]} rules
 * @returns {undefined}
 */
function checkItempropsForDuplicates(rules: readonly RecipeRule[]): void {
    // Only within the same level, no recursion, because object property name duplication in
    // nested objects is not an issue
    const seen: Set<string> = new Set();

    for (const rule of rules) {
        if (seen.has(rule.itemprop)) {
            throw createError('URC-CIT1', {itemprop: rule.itemprop});
        }

        seen.add(rule.itemprop);
    }
}

/**
 * @private
 * @param {string} objType - The type name of the Recipe the rule belongs to
 * @param {RecipeRule} thing - The particular rule to check
 * @param {number} [index=0] - The position in the array of rules that is the entire recipe, used
 * to produce more useful error messages
 * @param {Set<RecipeRule[]>} [seenRulesArrays=new Set()] - To detect infinite recursion if
 * nested object definitions form a circle
 * @param {boolean} [isNested]
 * @returns {RecipeRule} Returns the RecipeRule ONE object
 * @throws {Error} Throws an Error when there is an error in the recipe that our
 * (incomplete) tests detect
 */
function ensureRecipeRule(
    objType: string,
    thing: unknown,
    index: number,
    seenRulesArrays: Set<RecipeRule[]> = new Set(),
    isNested: boolean = false
): RecipeRule {
    if (!isObject(thing)) {
        throw createError('URC-ERECI1', {type: thing === null ? 'null' : typeof thing});
    }

    const collectedRules: unknown[] = [];
    collectRulesInItemtype(thing, 'rules', collectedRules);

    // Only top level properties can be ID properties. They cannot be inherited either.
    // Not very flexible but since it complicates the parser, as long as this feature is not
    // urgently needed we place this restriction.
    if (isNested && thing.isId !== undefined) {
        throw createError('URC-ERECI2', {index});
    }

    // Make sure there are no additional properties that we don't know (and therefore would not
    // otherwise check)
    for (const prop of Object.keys(thing)) {
        if (!RECIPE_RULE_PROPERTIES.has(prop)) {
            throw createError('URC-ERECI3', {index, prop});
        }
    }

    // Checks based on dynamically created meta information from the rules for RecipeRules
    for (const [prop, {optional}] of RECIPE_RULE_PROPERTIES) {
        if (thing[prop] === undefined) {
            if (optional) {
                continue;
            }

            // This only catches "itemprop", the only mandatory RecipeRule property (right now)
            throw createError('URC-ERECI4', {index, prop});
        }
    }

    // Very basic check: We only guard against HTML-breaking characters "<", ">", whitespace and
    // ".". The dot is used in some pieces of code to express a path to an itemprop through a
    // nested object, where it is used to separate the "itemprop"s of each level.
    if (/[<>.\s]+/.test(thing.itemprop)) {
        throw createError('URC-ERECI8', {index});
    }

    if (thing.itemtype) {
        checkItemTypeField(thing.itemtype, index);
    }

    // While this could be intentional, since it could also be an error we make the choice to
    // FORBID intentionally setting this value to undefined. We had the problem in one.recipes
    // where every ONE object recipe is in its own module, and the CommonJS module node.js
    // version of the code ended up with "undefined" for imported rules due to circular imports
    // (an issue with the timing-dependent require() imports).

    for (const collectedRule of collectedRules) {
        const ruleThing = collectedRule as Record<string, unknown>;

        if (
            Reflect.getOwnPropertyDescriptor(ruleThing, 'rules') !== undefined &&
            ruleThing.rules === undefined
        ) {
            throw createError('URC-ERECI25', {index, itemprop: ruleThing.itemprop});
        }

        // INFINITE RECURSION
        if (Array.isArray(ruleThing.rules) && seenRulesArrays.has(ruleThing.rules)) {
            throw createError('URC-ERECI26', {index, rule: ruleThing.rules});
        }

        // RECURSION: Rules for a nested object or an array of nested objects
        if (Array.isArray(ruleThing.rules)) {
            seenRulesArrays.add(ruleThing.rules);
            // Parameter "seenRulesArrays": Each item creates a different branch and therefore needs a
            // copy, otherwise the branches would detect objects in another branch which do not form a
            // circle.
            ruleThing.rules.forEach(rule =>
                ensureRecipeRule(objType, rule, index, new Set(seenRulesArrays), true)
            );
        }

        if (
            ruleThing.rules !== undefined &&
            !(Array.isArray(ruleThing.rules) && ruleThing.rules.length > 0)
        ) {
            throw createError('URC-ERECI23', {index, rule: ruleThing.rules});
        }

        if (Array.isArray(ruleThing.rules) && ruleThing.isId) {
            throw createError('URC-ERECI24', {
                index,
                thingKeys: Object.keys(ruleThing)
            });
        }
    }

    // AFTER all rules have been checked individually, perform these checks that require
    // looking at all rules together. Those functions presume that the rules are correct
    // individually (i.e. properties and their types are correct).
    for (const rules of collectedRules.map(collectedRule => (collectedRule as ObjectValue).rules)) {
        checkItempropsForDuplicates(rules);
    }

    // This property defines a path starting with a recipe followed by a list of at least one
    // itemprop within that recipe, or multiple to find an itemprop on a deeper level of a
    // nested object. The path separator is "."
    // We don't check that the path leads to a recipe in memory _right now_, because it is okay
    // if the target is not there at this point. It just has to be there when this rule that
    // inherits from another rule, possibly in another recipe, is actually being used.
    // That's why we only perform a very rudimentary test to catch the most basic error(s).
    if (isString(thing.inheritFrom) && thing.inheritFrom.split('.').length < 2) {
        throw createError('URC-ERECI27', {index, inheritFrom: thing.inheritFrom});
    }

    // Similar to when it is a string, but it can also be an object that in addition to the
    // path-string
    if (isObject(thing.inheritFrom) && !isRuleInheritanceWithOptions(thing.inheritFrom)) {
        throw createError('URC-ERECI28', {index, inheritFrom: thing.inheritFrom});
    }

    return thing as unknown as RecipeRule;
}

/**
 * A test performed internally before adding new recipes for ONE objects.
 * @static
 * @param {*} thing
 * @returns {Recipe} Returns the Recipe ONE object
 * @throws {Error} Throws an Error when there is an error in the recipe that our
 * (incomplete) tests detect
 */
export function ensureRecipeObj(thing: unknown): Recipe {
    if (!isObject(thing)) {
        throw createError('URC-ERCP1', {type: thing === null ? 'null' : typeof thing});
    }

    if (thing.$type$ !== 'Recipe') {
        throw createError('URC-ERCP2', {type: thing.$type$});
    }

    const recipeName = thing.name;

    // Very basic check: We only guard against HTML-breaking characters "<", ">", whitespace and
    // ".". The dot is used in some pieces of code to express a path to an itemprop through a
    // nested object, where it is used to separate the "itemprop"s of each level.
    if (
        !isString(recipeName) ||
        recipeName === '' ||
        /[<>.\s]+/.test(recipeName) ||
        JSON.stringify(recipeName) !== `"${recipeName}"`
    ) {
        throw createError('URC-ERCP3', {recipeName});
    }

    if (!Array.isArray(thing.rule)) {
        throw createError('URC-ERCP4', {recipeName});
    }

    // Catch errors to add the whole recipe object to the error output, otherwise the caller
    // will only see the specific rule that caused the error and may have to guess to which
    // recipe it belongs.
    try {
        thing.rule.forEach((rule: unknown, index: number) =>
            ensureRecipeRule(recipeName, rule, index)
        );
    } catch (err) {
        // No need to createError() here, this already is one- It has this function in its
        // stack trace too. But the message thus far only has the one offending rule.
        err.message += ';\n  Recipe: ' + stringifyWithCircles(thing);
        throw err;
    }

    return thing as unknown as Recipe;
}

/**
 * Construct a recipeRule based on the `itemtype.item` from the list rule. This is used for
 * constructing CRDT types.
 * @param {RecipeRule} rule
 * @returns {RecipeRule}
 */
export function constructItemRuleFromListRule(rule: RecipeRule): RecipeRule {
    if (!ruleHasItemType(rule) || !isListItemType(rule.itemtype)) {
        throw new Error(`You cannot extract an item rule from a no list rule: ${rule}`);
    }

    return {
        itemprop: rule.itemprop,
        itemtype: rule.itemtype.item
    };
}