Fixed value tests

hed-standard · Sep 21, 2024 · a5ddb21 · a5ddb21
1 parent c93d310
commit a5ddb21
Show file tree

Hide file tree

Showing 16 changed files with 2,409 additions and 2,055 deletions.
diff --git a/.gitignore b/.gitignore
@@ -123,3 +123,5 @@ Desktop.ini
 schema_cache_test/
 hed_cache/
 spec_tests/hed-specification/tests
+spec_tests/hed-examples
+spec_tests/*.json
diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py
@@ -51,6 +51,15 @@ def val_error_element_deprecatedr(tag):
 def val_error_invalid_tag_character(tag, problem_tag):
     return f"Invalid character '{problem_tag}' in tag '{tag}'"
 
+@hed_tag_error(ValidationErrors.INVALID_VALUE_CLASS_CHARACTER, has_sub_tag=True,
+               actual_code=ValidationErrors.CHARACTER_INVALID)
+def val_error_INVALID_VALUE_CLASS_CHARACTER(tag, problem_tag, value_class):
+    return f"Invalid character '{problem_tag}' in tag '{tag}' for value class '{value_class}'"
+
+@hed_tag_error(ValidationErrors.INVALID_VALUE_CLASS_VALUE, has_sub_tag=True,
+               actual_code=ValidationErrors.VALUE_INVALID)
+def val_error_INVALID_VALUE_CLASS_VALUE(tag, problem_tag, value_class):
+    return f"'{tag}' has an invalid value portion for value class '{value_class}'"
 
 @hed_error(ValidationErrors.TILDES_UNSUPPORTED)
 def val_error_tildes_not_supported(source_string, char_index):
@@ -124,8 +133,11 @@ def val_error_no_valid_tag(tag, problem_tag):
 
 
 @hed_tag_error(ValidationErrors.VALUE_INVALID)
-def val_error_no_value(tag):
-    return f"'{tag}' has an invalid value portion."
+def val_error_no_value(tag, value_class=''):
+    if value_class:
+        return f"'{tag}' has an invalid value portion because it is not a valid '{value_class}' value."
+    else:
+        return f"'{tag}' has an invalid value portion."
 
 
 @hed_error(ValidationErrors.HED_MISSING_REQUIRED_COLUMN, default_severity=ErrorSeverity.WARNING)

diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
@@ -88,7 +88,8 @@ class ValidationErrors:
     DUPLICATE_COLUMN_BETWEEN_SOURCES = "DUPLICATE_COLUMN_BETWEEN_SOURCES"
     HED_BLANK_COLUMN = "HED_BLANK_COLUMN"
 
-
+    INVALID_VALUE_CLASS_CHARACTER = 'INVALID_VALUE_CLASS_CHARACTER'
+    INVALID_VALUE_CLASS_VALUE = 'INVALID_VALUE_CLASS_VALUE'
     INVALID_TAG_CHARACTER = 'invalidTagCharacter'
 
     CURLY_BRACE_UNSUPPORTED_HERE = "CURLY_BRACE_UNSUPPORTED_HERE"

diff --git a/hed/schema/schema_validation_util.py b/hed/schema/schema_validation_util.py
@@ -1,164 +1,164 @@
-"""Utilities used in HED validation/loading using a HED schema."""
-
-from hed.errors.error_reporter import ErrorHandler
-from hed.errors.error_types import SchemaWarnings
-from hed.schema import hed_schema_constants as constants
-from hed.schema.hed_schema_constants import character_types
-from hed.schema.hed_schema import HedSchema
-
-
-def validate_schema_tag_new(hed_entry):
-    """ Check tag entry for capitalization and illegal characters.
-
-    Parameters:
-        hed_entry (HedTagEntry): A single tag entry
-
-    Returns:
-        list: A list of all formatting issues found in the term. Each issue is a dictionary.
-    """
-    issues_list = []
-    hed_term = hed_entry.short_tag_name
-    # Any # terms will have already been validated as the previous entry.
-    if hed_term == "#":
-        return issues_list
-
-    if hed_term and hed_term[0] and not (hed_term[0].isdigit() or hed_term[0].isupper()):
-        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION,
-                                                 hed_term, char_index=0, problem_char=hed_term[0])
-    issues_list += validate_schema_term_new(hed_entry, hed_term)
-    return issues_list
-
-
-def validate_schema_term_new(hed_entry, hed_term=None):
-    """ Check the term for invalid character issues
-
-    Parameters:
-        hed_entry (HedSchemaEntry): A single schema entry
-        hed_term (str or None): Use instead of hed_entry.name if present.
-
-    Returns:
-        list: A list of all formatting issues found in the term. Each issue is a dictionary.
-    """
-    if not hed_term:
-        hed_term = hed_entry.name
-    issues_list = []
-    # todo: potentially optimize this someday, as most values are the same
-    character_set = get_allowed_characters_by_name(["name"] +
-                                                   hed_entry.attributes.get("allowedCharacter", "").split(","))
-    indexes = get_problem_indexes(hed_term, character_set)
-    for char, index in indexes:
-        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG,
-                                                 hed_term, char_index=index, problem_char=char)
-    return issues_list
-
-
-def validate_schema_description_new(hed_entry):
-    """ Check the description of the entry for invalid character issues
-
-    Parameters:
-        hed_entry (HedSchemaEntry): A single schema entry
-
-    Returns:
-        list: A list of all invalid characters found in description. Each issue is a dictionary.
-    """
-    if not hed_entry.description:
-        return []
-    issues_list = []
-    character_set = get_allowed_characters_by_name(["text", "comma"])
-    indexes = get_problem_indexes(hed_entry.description, character_set)
-    # Kludge, just get short name here if we have it for error reporting
-    name = hed_entry.name
-    if hasattr(hed_entry, "short_tag_name"):
-        name = hed_entry.short_tag_name
-    for char, index in indexes:
-
-        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC,
-                                                 hed_entry.description, name, problem_char=char, char_index=index)
-    return issues_list
-
-
-def schema_version_for_library(hed_schema, library_name):
-    """ Given the library name and hed schema object, return the version
-
-    Parameters:
-        hed_schema (HedSchema): the schema object
-        library_name (str or None): The library name you're interested in.  "" for the standard schema.
-
-    Returns:
-        version_number (str): The version number of the given library name.  Returns None if unknown library_name.
-    """
-    if library_name is None:
-        library_name = ""
-    names = hed_schema.library.split(",")
-    versions = hed_schema.version_number.split(",")
-    for name, version in zip(names, versions):
-        if name == library_name:
-            return version
-
-    # Return the partnered schema version
-    if library_name == "" and hed_schema.with_standard:
-        return hed_schema.with_standard
-    return None
-
-
-def get_allowed_characters(value_classes):
-    """Returns the allowed characters in a given container of value classes
-
-    Parameters:
-        value_classes(list of HedSchemaEntry): A list of schema entries that should have the allowedCharacter attribute
-
-    Returns:
-        character_set(set): The set of all characters from the given classes
-    """
-    # This could be pre-computed
-    character_set_names = []
-
-    for value_class in value_classes:
-        allowed_types = value_class.attributes.get(constants.HedKey.AllowedCharacter, "").split(",")
-        character_set_names.extend(allowed_types)
-
-    character_set = get_allowed_characters_by_name(character_set_names)
-    # for now, just always allow these special cases(it's validated extensively elsewhere)
-    character_set.update("#/")
-    return character_set
-
-
-def get_allowed_characters_by_name(character_set_names):
-    """Returns the allowed characters from a list of character set names
-
-    Note: "nonascii" is a special case "character" that can be included as well
-
-    Parameters:
-        character_set_names(list of str): A list of character sets to allow.  See hed_schema_constants.character_types
-
-    Returns:
-        character_set(set): The set of all characters from the names
-    """
-    character_set = set()
-    for name in character_set_names:
-        if name in character_types and name != "nonascii":
-            character_set.update(character_types[name])
-        else:
-            character_set.add(name)
-    return character_set
-
-
-def get_problem_indexes(validation_string, character_set, index_adj=0):
-    """Finds indexes with values not in character set
-
-    Parameters:
-        validation_string(str): The string to check characters in
-        character_set(set): the list of valid characters(or the value "nonascii" as a set entry)
-        index_adj(int): the value to adjust the reported indices by, if this isn't the start of a string.
-
-    Returns:
-        index_list(tuple of (str, int)): The list of problematic characters and indices
-    """
-    if not character_set:
-        return []
-
-    indexes = [(char, index + index_adj) for index, char in enumerate(validation_string) if char not in character_set]
-    if "nonascii" in character_set:
-        indexes = [(char, index) for char, index in indexes if not ord(char) > 127]
-
-    return indexes
+"""Utilities used in HED validation/loading using a HED schema."""
+
+from hed.errors.error_reporter import ErrorHandler
+from hed.errors.error_types import SchemaWarnings
+from hed.schema import hed_schema_constants as constants
+from hed.schema.hed_schema_constants import character_types
+from hed.schema.hed_schema import HedSchema
+
+
+def validate_schema_tag_new(hed_entry):
+    """ Check tag entry for capitalization and illegal characters.
+
+    Parameters:
+        hed_entry (HedTagEntry): A single tag entry
+
+    Returns:
+        list: A list of all formatting issues found in the term. Each issue is a dictionary.
+    """
+    issues_list = []
+    hed_term = hed_entry.short_tag_name
+    # Any # terms will have already been validated as the previous entry.
+    if hed_term == "#":
+        return issues_list
+
+    if hed_term and hed_term[0] and not (hed_term[0].isdigit() or hed_term[0].isupper()):
+        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION,
+                                                 hed_term, char_index=0, problem_char=hed_term[0])
+    issues_list += validate_schema_term_new(hed_entry, hed_term)
+    return issues_list
+
+
+def validate_schema_term_new(hed_entry, hed_term=None):
+    """ Check the term for invalid character issues
+
+    Parameters:
+        hed_entry (HedSchemaEntry): A single schema entry
+        hed_term (str or None): Use instead of hed_entry.name if present.
+
+    Returns:
+        list: A list of all formatting issues found in the term. Each issue is a dictionary.
+    """
+    if not hed_term:
+        hed_term = hed_entry.name
+    issues_list = []
+    # todo: potentially optimize this someday, as most values are the same
+    character_set = get_allowed_characters_by_name(["name"] +
+                                                   hed_entry.attributes.get("allowedCharacter", "").split(","))
+    indexes = get_problem_indexes(hed_term, character_set)
+    for char, index in indexes:
+        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG,
+                                                 hed_term, char_index=index, problem_char=char)
+    return issues_list
+
+
+def validate_schema_description_new(hed_entry):
+    """ Check the description of the entry for invalid character issues
+
+    Parameters:
+        hed_entry (HedSchemaEntry): A single schema entry
+
+    Returns:
+        list: A list of all invalid characters found in description. Each issue is a dictionary.
+    """
+    if not hed_entry.description:
+        return []
+    issues_list = []
+    character_set = get_allowed_characters_by_name(["text", "comma"])
+    indexes = get_problem_indexes(hed_entry.description, character_set)
+    # Kludge, just get short name here if we have it for error reporting
+    name = hed_entry.name
+    if hasattr(hed_entry, "short_tag_name"):
+        name = hed_entry.short_tag_name
+    for char, index in indexes:
+
+        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC,
+                                                 hed_entry.description, name, problem_char=char, char_index=index)
+    return issues_list
+
+
+def schema_version_for_library(hed_schema, library_name):
+    """ Given the library name and hed schema object, return the version
+
+    Parameters:
+        hed_schema (HedSchema): the schema object
+        library_name (str or None): The library name you're interested in.  "" for the standard schema.
+
+    Returns:
+        version_number (str): The version number of the given library name.  Returns None if unknown library_name.
+    """
+    if library_name is None:
+        library_name = ""
+    names = hed_schema.library.split(",")
+    versions = hed_schema.version_number.split(",")
+    for name, version in zip(names, versions):
+        if name == library_name:
+            return version
+
+    # Return the partnered schema version
+    if library_name == "" and hed_schema.with_standard:
+        return hed_schema.with_standard
+    return None
+
+
+def get_allowed_characters(value_classes):
+    """Returns the allowed characters in a given container of value classes
+
+    Parameters:
+        value_classes(list of HedSchemaEntry): A list of schema entries that should have the allowedCharacter attribute
+
+    Returns:
+        character_set(set): The set of all characters from the given classes
+    """
+    # This could be pre-computed
+    character_set_names = []
+
+    for value_class in value_classes:
+        allowed_types = value_class.attributes.get(constants.HedKey.AllowedCharacter, "").split(",")
+        character_set_names.extend(allowed_types)
+
+    character_set = get_allowed_characters_by_name(character_set_names)
+    # for now, just always allow these special cases(it's validated extensively elsewhere)
+    character_set.update("#/")
+    return character_set
+
+
+def get_allowed_characters_by_name(character_set_names):
+    """Returns the allowed characters from a list of character set names
+
+    Note: "nonascii" is a special case "character" that can be included as well
+
+    Parameters:
+        character_set_names(list of str): A list of character sets to allow.  See hed_schema_constants.character_types
+
+    Returns:
+        character_set(set): The set of all characters from the names
+    """
+    character_set = set()
+    for name in character_set_names:
+        if name in character_types and name != "nonascii":
+            character_set.update(character_types[name])
+        else:
+            character_set.add(name)
+    return character_set
+
+
+def get_problem_indexes(validation_string, character_set, index_adj=0):
+    """Finds indexes with values not in character set
+
+    Parameters:
+        validation_string(str): The string to check characters in
+        character_set(set): the list of valid characters(or the value "nonascii" as a set entry)
+        index_adj(int): the value to adjust the reported indices by, if this isn't the start of a string.
+
+    Returns:
+        index_list(tuple of (str, int)): The list of problematic characters and indices
+    """
+    if not character_set:
+        return []
+
+    indexes = [(char, index + index_adj) for index, char in enumerate(validation_string) if char not in character_set]
+    if "nonascii" in character_set:
+        indexes = [(char, index) for char, index in indexes if not ord(char) > 127]
+
+    return indexes