Skip to content

Commit

Permalink
Fixed value tests
Browse files Browse the repository at this point in the history
  • Loading branch information
VisLab committed Sep 21, 2024
1 parent c93d310 commit a5ddb21
Show file tree
Hide file tree
Showing 16 changed files with 2,409 additions and 2,055 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,5 @@ Desktop.ini
schema_cache_test/
hed_cache/
spec_tests/hed-specification/tests
spec_tests/hed-examples
spec_tests/*.json
16 changes: 14 additions & 2 deletions hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,15 @@ def val_error_element_deprecatedr(tag):
def val_error_invalid_tag_character(tag, problem_tag):
return f"Invalid character '{problem_tag}' in tag '{tag}'"

@hed_tag_error(ValidationErrors.INVALID_VALUE_CLASS_CHARACTER, has_sub_tag=True,
actual_code=ValidationErrors.CHARACTER_INVALID)
def val_error_INVALID_VALUE_CLASS_CHARACTER(tag, problem_tag, value_class):
return f"Invalid character '{problem_tag}' in tag '{tag}' for value class '{value_class}'"

@hed_tag_error(ValidationErrors.INVALID_VALUE_CLASS_VALUE, has_sub_tag=True,
actual_code=ValidationErrors.VALUE_INVALID)
def val_error_INVALID_VALUE_CLASS_VALUE(tag, problem_tag, value_class):
return f"'{tag}' has an invalid value portion for value class '{value_class}'"

@hed_error(ValidationErrors.TILDES_UNSUPPORTED)
def val_error_tildes_not_supported(source_string, char_index):
Expand Down Expand Up @@ -124,8 +133,11 @@ def val_error_no_valid_tag(tag, problem_tag):


@hed_tag_error(ValidationErrors.VALUE_INVALID)
def val_error_no_value(tag):
return f"'{tag}' has an invalid value portion."
def val_error_no_value(tag, value_class=''):
if value_class:
return f"'{tag}' has an invalid value portion because it is not a valid '{value_class}' value."
else:
return f"'{tag}' has an invalid value portion."


@hed_error(ValidationErrors.HED_MISSING_REQUIRED_COLUMN, default_severity=ErrorSeverity.WARNING)
Expand Down
3 changes: 2 additions & 1 deletion hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ class ValidationErrors:
DUPLICATE_COLUMN_BETWEEN_SOURCES = "DUPLICATE_COLUMN_BETWEEN_SOURCES"
HED_BLANK_COLUMN = "HED_BLANK_COLUMN"


INVALID_VALUE_CLASS_CHARACTER = 'INVALID_VALUE_CLASS_CHARACTER'
INVALID_VALUE_CLASS_VALUE = 'INVALID_VALUE_CLASS_VALUE'
INVALID_TAG_CHARACTER = 'invalidTagCharacter'

CURLY_BRACE_UNSUPPORTED_HERE = "CURLY_BRACE_UNSUPPORTED_HERE"
Expand Down
328 changes: 164 additions & 164 deletions hed/schema/schema_validation_util.py
Original file line number Diff line number Diff line change
@@ -1,164 +1,164 @@
"""Utilities used in HED validation/loading using a HED schema."""

from hed.errors.error_reporter import ErrorHandler
from hed.errors.error_types import SchemaWarnings
from hed.schema import hed_schema_constants as constants
from hed.schema.hed_schema_constants import character_types
from hed.schema.hed_schema import HedSchema


def validate_schema_tag_new(hed_entry):
""" Check tag entry for capitalization and illegal characters.
Parameters:
hed_entry (HedTagEntry): A single tag entry
Returns:
list: A list of all formatting issues found in the term. Each issue is a dictionary.
"""
issues_list = []
hed_term = hed_entry.short_tag_name
# Any # terms will have already been validated as the previous entry.
if hed_term == "#":
return issues_list

if hed_term and hed_term[0] and not (hed_term[0].isdigit() or hed_term[0].isupper()):
issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION,
hed_term, char_index=0, problem_char=hed_term[0])
issues_list += validate_schema_term_new(hed_entry, hed_term)
return issues_list


def validate_schema_term_new(hed_entry, hed_term=None):
""" Check the term for invalid character issues
Parameters:
hed_entry (HedSchemaEntry): A single schema entry
hed_term (str or None): Use instead of hed_entry.name if present.
Returns:
list: A list of all formatting issues found in the term. Each issue is a dictionary.
"""
if not hed_term:
hed_term = hed_entry.name
issues_list = []
# todo: potentially optimize this someday, as most values are the same
character_set = get_allowed_characters_by_name(["name"] +
hed_entry.attributes.get("allowedCharacter", "").split(","))
indexes = get_problem_indexes(hed_term, character_set)
for char, index in indexes:
issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG,
hed_term, char_index=index, problem_char=char)
return issues_list


def validate_schema_description_new(hed_entry):
""" Check the description of the entry for invalid character issues
Parameters:
hed_entry (HedSchemaEntry): A single schema entry
Returns:
list: A list of all invalid characters found in description. Each issue is a dictionary.
"""
if not hed_entry.description:
return []
issues_list = []
character_set = get_allowed_characters_by_name(["text", "comma"])
indexes = get_problem_indexes(hed_entry.description, character_set)
# Kludge, just get short name here if we have it for error reporting
name = hed_entry.name
if hasattr(hed_entry, "short_tag_name"):
name = hed_entry.short_tag_name
for char, index in indexes:

issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC,
hed_entry.description, name, problem_char=char, char_index=index)
return issues_list


def schema_version_for_library(hed_schema, library_name):
""" Given the library name and hed schema object, return the version
Parameters:
hed_schema (HedSchema): the schema object
library_name (str or None): The library name you're interested in. "" for the standard schema.
Returns:
version_number (str): The version number of the given library name. Returns None if unknown library_name.
"""
if library_name is None:
library_name = ""
names = hed_schema.library.split(",")
versions = hed_schema.version_number.split(",")
for name, version in zip(names, versions):
if name == library_name:
return version

# Return the partnered schema version
if library_name == "" and hed_schema.with_standard:
return hed_schema.with_standard
return None


def get_allowed_characters(value_classes):
"""Returns the allowed characters in a given container of value classes
Parameters:
value_classes(list of HedSchemaEntry): A list of schema entries that should have the allowedCharacter attribute
Returns:
character_set(set): The set of all characters from the given classes
"""
# This could be pre-computed
character_set_names = []

for value_class in value_classes:
allowed_types = value_class.attributes.get(constants.HedKey.AllowedCharacter, "").split(",")
character_set_names.extend(allowed_types)

character_set = get_allowed_characters_by_name(character_set_names)
# for now, just always allow these special cases(it's validated extensively elsewhere)
character_set.update("#/")
return character_set


def get_allowed_characters_by_name(character_set_names):
"""Returns the allowed characters from a list of character set names
Note: "nonascii" is a special case "character" that can be included as well
Parameters:
character_set_names(list of str): A list of character sets to allow. See hed_schema_constants.character_types
Returns:
character_set(set): The set of all characters from the names
"""
character_set = set()
for name in character_set_names:
if name in character_types and name != "nonascii":
character_set.update(character_types[name])
else:
character_set.add(name)
return character_set


def get_problem_indexes(validation_string, character_set, index_adj=0):
"""Finds indexes with values not in character set
Parameters:
validation_string(str): The string to check characters in
character_set(set): the list of valid characters(or the value "nonascii" as a set entry)
index_adj(int): the value to adjust the reported indices by, if this isn't the start of a string.
Returns:
index_list(tuple of (str, int)): The list of problematic characters and indices
"""
if not character_set:
return []

indexes = [(char, index + index_adj) for index, char in enumerate(validation_string) if char not in character_set]
if "nonascii" in character_set:
indexes = [(char, index) for char, index in indexes if not ord(char) > 127]

return indexes
"""Utilities used in HED validation/loading using a HED schema."""

from hed.errors.error_reporter import ErrorHandler
from hed.errors.error_types import SchemaWarnings
from hed.schema import hed_schema_constants as constants
from hed.schema.hed_schema_constants import character_types
from hed.schema.hed_schema import HedSchema


def validate_schema_tag_new(hed_entry):
""" Check tag entry for capitalization and illegal characters.
Parameters:
hed_entry (HedTagEntry): A single tag entry
Returns:
list: A list of all formatting issues found in the term. Each issue is a dictionary.
"""
issues_list = []
hed_term = hed_entry.short_tag_name
# Any # terms will have already been validated as the previous entry.
if hed_term == "#":
return issues_list

if hed_term and hed_term[0] and not (hed_term[0].isdigit() or hed_term[0].isupper()):
issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION,
hed_term, char_index=0, problem_char=hed_term[0])
issues_list += validate_schema_term_new(hed_entry, hed_term)
return issues_list


def validate_schema_term_new(hed_entry, hed_term=None):
""" Check the term for invalid character issues
Parameters:
hed_entry (HedSchemaEntry): A single schema entry
hed_term (str or None): Use instead of hed_entry.name if present.
Returns:
list: A list of all formatting issues found in the term. Each issue is a dictionary.
"""
if not hed_term:
hed_term = hed_entry.name
issues_list = []
# todo: potentially optimize this someday, as most values are the same
character_set = get_allowed_characters_by_name(["name"] +
hed_entry.attributes.get("allowedCharacter", "").split(","))
indexes = get_problem_indexes(hed_term, character_set)
for char, index in indexes:
issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG,
hed_term, char_index=index, problem_char=char)
return issues_list


def validate_schema_description_new(hed_entry):
""" Check the description of the entry for invalid character issues
Parameters:
hed_entry (HedSchemaEntry): A single schema entry
Returns:
list: A list of all invalid characters found in description. Each issue is a dictionary.
"""
if not hed_entry.description:
return []
issues_list = []
character_set = get_allowed_characters_by_name(["text", "comma"])
indexes = get_problem_indexes(hed_entry.description, character_set)
# Kludge, just get short name here if we have it for error reporting
name = hed_entry.name
if hasattr(hed_entry, "short_tag_name"):
name = hed_entry.short_tag_name
for char, index in indexes:

issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC,
hed_entry.description, name, problem_char=char, char_index=index)
return issues_list


def schema_version_for_library(hed_schema, library_name):
""" Given the library name and hed schema object, return the version
Parameters:
hed_schema (HedSchema): the schema object
library_name (str or None): The library name you're interested in. "" for the standard schema.
Returns:
version_number (str): The version number of the given library name. Returns None if unknown library_name.
"""
if library_name is None:
library_name = ""
names = hed_schema.library.split(",")
versions = hed_schema.version_number.split(",")
for name, version in zip(names, versions):
if name == library_name:
return version

# Return the partnered schema version
if library_name == "" and hed_schema.with_standard:
return hed_schema.with_standard
return None


def get_allowed_characters(value_classes):
"""Returns the allowed characters in a given container of value classes
Parameters:
value_classes(list of HedSchemaEntry): A list of schema entries that should have the allowedCharacter attribute
Returns:
character_set(set): The set of all characters from the given classes
"""
# This could be pre-computed
character_set_names = []

for value_class in value_classes:
allowed_types = value_class.attributes.get(constants.HedKey.AllowedCharacter, "").split(",")
character_set_names.extend(allowed_types)

character_set = get_allowed_characters_by_name(character_set_names)
# for now, just always allow these special cases(it's validated extensively elsewhere)
character_set.update("#/")
return character_set


def get_allowed_characters_by_name(character_set_names):
"""Returns the allowed characters from a list of character set names
Note: "nonascii" is a special case "character" that can be included as well
Parameters:
character_set_names(list of str): A list of character sets to allow. See hed_schema_constants.character_types
Returns:
character_set(set): The set of all characters from the names
"""
character_set = set()
for name in character_set_names:
if name in character_types and name != "nonascii":
character_set.update(character_types[name])
else:
character_set.add(name)
return character_set


def get_problem_indexes(validation_string, character_set, index_adj=0):
"""Finds indexes with values not in character set
Parameters:
validation_string(str): The string to check characters in
character_set(set): the list of valid characters(or the value "nonascii" as a set entry)
index_adj(int): the value to adjust the reported indices by, if this isn't the start of a string.
Returns:
index_list(tuple of (str, int)): The list of problematic characters and indices
"""
if not character_set:
return []

indexes = [(char, index + index_adj) for index, char in enumerate(validation_string) if char not in character_set]
if "nonascii" in character_set:
indexes = [(char, index) for char, index in indexes if not ord(char) > 127]

return indexes
Loading

0 comments on commit a5ddb21

Please sign in to comment.