Skip to content

Commit

Permalink
Refactor the slugify function
Browse files Browse the repository at this point in the history
  • Loading branch information
pudo committed May 3, 2021
1 parent 74dd46d commit 00ab0e1
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 17 deletions.
19 changes: 3 additions & 16 deletions normality/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from normality.encoding import DEFAULT_ENCODING
from normality.stringify import stringify
from normality.paths import safe_filename
from normality.slugify import slugify
from normality.util import Categories, Encoding

__all__ = [
Expand Down Expand Up @@ -78,24 +79,10 @@ def normalize(
# Perform unicode category-based character replacement. This is
# used to filter out whole classes of characters, such as symbols,
# punctuation, or whitespace-like characters.
text = category_replace(text, replace_categories)

if text is None:
return None
if replace_categories is not None:
text = category_replace(text, replace_categories)

if collapse:
# Remove consecutive whitespace.
text = collapse_spaces(text)
return text


def slugify(value: Any, sep: str = "-") -> Optional[str]:
"""A simple slug generator."""
text = stringify(value)
if text is None:
return None
text = text.replace(sep, WS)
text = normalize(text, ascii=True)
if text is None:
return None
return text.replace(WS, sep)
33 changes: 32 additions & 1 deletion normality/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"Zs": WS,
"Zl": WS,
"Zp": WS,
"Pc": None,
"Pc": WS, # TODO: figure out if this wants to be None
"Pd": WS,
"Ps": WS,
"Pe": WS,
Expand All @@ -46,4 +46,35 @@
"Zp": WS,
}

SLUG_CATEGORIES: Categories = {
"Cc": None,
"Cf": None,
"Cs": None,
"Co": None,
"Cn": None,
# "Lm": None,
# "Mn": None,
"Mc": WS,
"Me": None,
"No": None,
"Zs": WS,
"Zl": WS,
"Zp": WS,
"Pc": WS,
"Pd": WS,
"Ps": WS,
"Pe": WS,
"Pi": WS,
"Pf": WS,
"Po": WS,
"Sm": WS,
"Sc": None,
"Sk": None,
"So": WS,
"Zs": WS,
"Zl": WS,
"Zp": WS,
}


CONTROL_CODES: Categories = {"Cc": WS, "Cf": WS, "Cs": WS, "Co": WS, "Cn": WS}
29 changes: 29 additions & 0 deletions normality/slugify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import string
from typing import Any, Optional

from normality.cleaning import collapse_spaces, category_replace
from normality.constants import SLUG_CATEGORIES, WS
from normality.transliteration import latinize_text
from normality.stringify import stringify

VALID_CHARS = string.ascii_lowercase + string.digits + WS


def slugify(value: Any, sep: str = "-") -> Optional[str]:
"""A simple slug generator. Slugs are pure ASCII lowercase strings
that can be used in URLs an other places where a name has to be
machine-safe."""
text = stringify(value)
if text is None:
return None
text = text.replace(sep, WS)
# run this first because it'll give better results on special
# characters.
text = category_replace(text, SLUG_CATEGORIES)
text = latinize_text(text, ascii=True)
text = text.lower()
text = "".join([c for c in text if c in VALID_CHARS])
text = collapse_spaces(text)
if text is None:
return None
return text.replace(WS, sep)
3 changes: 3 additions & 0 deletions tests/test_normality.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def test_azeri(self):
def test_slugify(self):
text = u"BABY! camel-is good"
self.assertEqual("baby-camel-is-good", slugify(text, sep="-"))
self.assertEqual("tests", slugify("testʼs", sep="-"))
self.assertEqual("test-s", slugify("test_s", sep="-"))

def test_georgian(self):
text = u"ავლაბრის ფონდი"
Expand All @@ -40,6 +42,7 @@ def test_georgian(self):
def test_german(self):
text = u"Häschen Spaß"
self.assertEqual("Haschen Spass", ascii_text(text))
self.assertEqual("haschen-spass", slugify(text, sep="-"))

def test_stringify(self):
self.assertEqual(".", stringify(" . "))
Expand Down

0 comments on commit 00ab0e1

Please sign in to comment.