Skip to content

Commit

Permalink
Fixing up origin lookups and where colour gets calculated
Browse files Browse the repository at this point in the history
  • Loading branch information
Emily-RoseSteyn committed Feb 26, 2024
1 parent f546d1c commit c694114
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 25 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ rsync -avm --include='*/' --include='*.osm.pbf' --exclude='*' ./data ${REMOTE}:$

## Dictionary Generation

[//]: # (TODO: Document this some more)

#### 1. List Open Street Maps Locations

## Relevant Notebooks
Expand Down
14 changes: 13 additions & 1 deletion src/lookup_language/lookup_word.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,19 @@

class LanguageDetector:
def __init__(self):
self._languages = [Language.ENGLISH, Language.ZULU, Language.AFRIKAANS, Language.SOTHO, Language.XHOSA]
self._languages = [
Language.AFRIKAANS,
Language.DUTCH,
Language.ENGLISH,
Language.FRENCH,
Language.GERMAN,
Language.ITALIAN,
Language.PORTUGUESE,
Language.SOTHO,
Language.SPANISH,
Language.XHOSA,
Language.ZULU,
]
self._detector = LanguageDetectorBuilder.from_languages(*self._languages).build()

def detect(self, term):
Expand Down
32 changes: 19 additions & 13 deletions src/mapping/country_colour_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,28 @@ def country_colour_map_dynamic():

def country_colour_map_static():
return {
COUNTRY_ISO_CODE_NAME_MAP["ZA"]: "#e5c494",
COUNTRY_ISO_CODE_NAME_MAP["BE"]: "#ffd000",
COUNTRY_ISO_CODE_NAME_MAP["GB-ENG"]: "#66c2a5",
COUNTRY_ISO_CODE_NAME_MAP["FR"]: "#ffa886",
COUNTRY_ISO_CODE_NAME_MAP["DE"]: "#cd212a",
COUNTRY_ISO_CODE_NAME_MAP["IE"]: "#b3f469",
COUNTRY_ISO_CODE_NAME_MAP["IT"]: "#e78ac3",
COUNTRY_ISO_CODE_NAME_MAP["NL"]: "#ff9325",
COUNTRY_ISO_CODE_NAME_MAP["GB-NIR"]: "#009a61",
COUNTRY_ISO_CODE_NAME_MAP["PT"]: "#ffec4f",
COUNTRY_ISO_CODE_NAME_MAP["GB-SCT"]: "#91a4ff",
COUNTRY_ISO_CODE_NAME_MAP["ES"]: "#800080",
COUNTRY_ISO_CODE_NAME_MAP["GB-WLS"]: "#ccebc5",
"south_africa": "#e5c494",
"belgium": "#ffd000",
"england": "#66c2a5",
"france": "#ffa886",
"germany": "#cd212a",
"ireland": "#b3f469",
"italy": "#e78ac3",
"netherlands": "#ff9325",
"northern_ireland": "#009a61",
"portugal": "#ffec4f",
"scotland": "#91a4ff",
"spain": "#800080",
"wales": "#ccebc5",
"afrikaans": "#a65628",
"dutch": "#ff9325",
"english": "#66c2a5",
"french": "#ffa886",
"german": "#cd212a",
"italian": "#e78ac3",
"portuguese": "#ffec4f",
"sotho": "#a6761d",
"spanish": "#800080",
"xhosa": "#d95f02",
"zulu": "#f781bf",
}
Expand Down
31 changes: 25 additions & 6 deletions src/mapping/lookup_origin.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import sqlite3
from typing import Tuple, Any
from typing import Any

import pandas as pd
from numpy import ndarray
from pandas import Series, DataFrame

from mapping.country_colour_map import get_colour
from lookup_language.lookup_word import LanguageDetector
from mapping.stop_terms import STOP_TERMS
from utils.env_variables import SQLITE_DB, TERMS_DICTIONARY_TABLE, TERMS_LANGUAGE_DICTIONARY_TABLE

Expand Down Expand Up @@ -49,14 +47,35 @@ def lookup_origin(street_name: str, map_language: bool = False, country: str = "
return origin, primary_term


def lookup_language(street_name: str, map_language: bool = False, country: str = "south_africa"
) -> tuple[None, None] | tuple[Any, Any]:
street_name = street_name.lower()
terms = street_name.split(' ')
primary_terms = [i for i in terms if i not in STOP_TERMS]

primary_term = ' '.join(primary_terms)

# Do language lookup
# Find any matching terms, select highest likelihood
language_detector = LanguageDetector()
language_result = language_detector.detect(primary_term)
origin = language_result["language"][0]

return origin, primary_term


def map_street_to_origin(x, map_language: bool = False):
street_name = x["name"]
if isinstance(street_name, str):
if map_language and isinstance(street_name, str):
origin, primary_term = lookup_language(street_name)
x["origin"] = origin
x["primary_term"] = primary_term
elif isinstance(street_name, str):
origin, primary_term = lookup_origin(street_name, map_language)
x["origin"] = origin
x["primary_term"] = primary_term
else:
x["origin"] = None
x["colour"] = get_colour(x["origin"])
x["primary_term"] = None

return x
15 changes: 11 additions & 4 deletions src/mapping/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import geopandas as gpd
import osmnx as ox

from mapping.country_colour_map import DEFAULT_BACKGROUND_COLOUR, get_custom_legend
from mapping.country_colour_map import DEFAULT_BACKGROUND_COLOUR, get_custom_legend, get_colour
from mapping.lookup_origin import map_street_to_origin
from utils.env_variables import OUTPUT_IMAGES_DIR, PUNCTUATION, OUTPUT_GDF_DIR
from utils.logger import get_logger
Expand All @@ -16,6 +16,7 @@


def get_gdf(address, graph, processed_place_name, dist, map_language):
logger.info(f"Getting geodataframe for {address}")
gdf_output_dir = OUTPUT_GDF_DIR
if not os.path.exists(gdf_output_dir):
os.makedirs(gdf_output_dir)
Expand All @@ -34,7 +35,7 @@ def get_gdf(address, graph, processed_place_name, dist, map_language):
gdf = gdf.apply(lambda x: map_street_to_origin(x, map_language), axis=1)

# Save
gdf = gpd.GeoDataFrame(gdf[["origin", "colour", "geometry"]], index=gdf.index)
gdf = gpd.GeoDataFrame(gdf[["origin", "name", "geometry"]], index=gdf.index)
gdf.to_parquet(gdf_output_path)

logger.info(f"Mapped origins and colours for {address}.")
Expand All @@ -53,9 +54,12 @@ def map_origin_of_address(address: str, dist: int = 1000, edge_linewidth: int =

logger.info(f"Retrieved graph for address {address}")

# Getting geodataframe with colours
# Getting dictionary geodataframe
gdf = get_gdf(address, graph, processed_place_name, dist, map_language)

# Get colours
gdf = gdf.apply(lambda x: get_colour(x["origin"]), axis=1)

logger.info(f"Plotting...")
# Map coloured streets on graph
map_fig, map_ax = ox.plot_graph(graph, node_size=0,
Expand All @@ -76,7 +80,10 @@ def map_origin_of_address(address: str, dist: int = 1000, edge_linewidth: int =

# Save figure
timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M')
output_path = os.path.join(output_dir, f"{timestamp}_{processed_place_name}.png")

filename = f"{timestamp}_{processed_place_name}_language.png" if map_language \
else f"{timestamp}_{processed_place_name}.png"
output_path = os.path.join(output_dir, filename)
map_fig.savefig(output_path, dpi=300, bbox_inches='tight', format="png",
facecolor=DEFAULT_BACKGROUND_COLOUR, transparent=False)

Expand Down
11 changes: 10 additions & 1 deletion src/mapping/stop_terms.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
# Constructed based on frequency of terms in SOUTH AFRICA's streets
STOP_TERMS = [
"avenue",
"boulevard",
"close",
"crescent",
"drive",
"lane",
"place",
"road",
"route",
"street",
"terrace"
"terrace",
"the",
"track",
"trail",
"way",
]

0 comments on commit c694114

Please sign in to comment.