strcomp.h | documentation |
#charset "us-ascii" #pragma once /* * Copyright 2000, 2006 Michael J. Roberts. * * This file is part of TADS 3. * * This header defines the StringComparator intrinsic class. */ /* include our base class definition */ #include "systype.h" /* * StringComparator intrinsic class. This class provides support for * dictionaries based on complex string matches, including truncation * (matching an input word to a dictionary word when the input word is at * least some minimum length, and matches the dictionary word up to the * full length of the input word, but the input word is shorter than the * dictionary word); case folding (matching upper-case letters to * lower-case letters and vice versa); and character equivalences (for * matching accented characters to non-accented equivalents, or matching * special characters to multi-character equivalents, such as matching a * German "ess-zet" ("sharp-s") ligature to a pair of lower-case "s" * characters in input). */ intrinsic class StringComparator 'string-comparator/030000': Object { /* * Constructor: * * new StringComparator(truncLen, caseSensitive, mappings) * * truncLen = the minimum truncation length. An input string that * matches a dictionary string up to the full length of the input * string, and is shorter than the dictionary string but at least this * truncation length, will match the dictionary string. If truncLen is * zero or nil, no truncated matches are allowed. * * caseSensitive = true if matches are to be sensitive to case, nil if * not. If this parameter is nil, then an upper-case letter in an * input string will match a lower-case letter in a dictionary string, * and vice versa. If this parameter is true, each character must * match exactly. * * mappings is a list of equivalent character mappings. Each mapping * in the list is a sublist in this format: * *. ['dictChar', 'inputString', ucFlags, lcFlags] * * 'dictChar' is a one-character string giving the character to be * mapped in dictionary strings. 'inputString' is a string of one or * more characters that is to be considered equivalent to the * dictionary character when the inputString appears in an input * string. ucFlags and lcFlags are integer values giving the flag * values to bitwise-OR into the results when this mapping is used to * match an upper-case or lower-case input string, respectively. * * For example, a mapping to allow the German ess-zet character (whose * Unicode value is 0x00DF) to match "ss" sequences in input strings, * with no result flag additions, would look like this: * *. ['\u00DF', 'ss', 0, 0] * * Only one mapping is allowed for each dictionary character. If more * than one mapping is given for a single dictionary character, only * the latest one in the list is actually used. * * Flag values 0x0001 through 0x0080 are reserved for use by * StringComparator itself. Callers are free to use any flag values * 0x0100 and above. Note that the system flag values are used as * bitwise OR'd values, so callers should not define any flag values * 'f' for which (f & 0xFF) != 0. */ /* * Calculate a hash value. This returns an integer giving the hash * value for the given string. */ calcHash(str); /* * Match two values. The first value is the input string, and the * second is the dictionary string. Each character in the dictionary * string can match the corresponding input string character exactly * (with or without case sensitivity, as specified in our * constructor), or can match the equivalence mapping sequence for the * dictionary character. * * The return value is zero if the values do not match. If the values * do match, the return value is a non-zero integer, which will be a * bitwise OR combination of all of the flag values applicable to the * match. This is a combination of pre-defined flag values (see * below) and any flag values from equivalence mappings. The flag * values from ALL equivalence mappings that were actually used to * make the match are included. */ matchValues(inputStr, dictStr); } /* * Pre-defined matchValues result flags. These are set when applicable in * the return value of matchValues(). * * This class reserves flag values 0x0001 through 0x0080. Callers should * not use any flag values with any of these bits set. Even though we * don't define values for all of these flags currently, the ones we don't * use are reserved for possible use in future versions; to ensure * compatibility with future versions, callers should not use any of the * reserved flags for their own purposes. */ /* * Match - this flag is set in the return code for all matching strings. * (This flag isn't as useless as it might sound; its purpose is to ensure * that the return value from matchValues() is non-zero for all matches, * even when no other flag values are applicable.) */ #define StrCompMatch 0x0001 /* * Case folding - this flag is set when the two values match, but one or * more characters differ in case (in other words, an upper-case letter in * the input string matched a lower-case letter in the dictionary string, * or vice versa). */ #define StrCompCaseFold 0x0002 /* * Truncation - this flag is set when the input string is shorter than the * value string (but matches the dictionary completely up to the input * string's full length, and is at least as long as the truncation length * specified in the constructor). This flag can only be returned when * truncation is allowed (as indicated by a non-zero truncation length in * the constructor), because truncated strings will never match at all * when truncation isn't allowed. */ #define StrCompTrunc 0x0004
TADS 3 Library Manual
Generated on 5/16/2013 from TADS version 3.1.3
Generated on 5/16/2013 from TADS version 3.1.3