Skip to content

Commit

Permalink
Merge pull request sfu-db#916 from khoatxp/feat/create-db-report-add-…
Browse files Browse the repository at this point in the history
…interactive-diagram

feat(eda.create_db_report): add interactive diagram functionality
  • Loading branch information
jinglinpeng committed Jul 28, 2022
2 parents ad1afc7 + 3a96e44 commit a10a04b
Show file tree
Hide file tree
Showing 29 changed files with 9,381 additions and 684 deletions.
14 changes: 13 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ jobs:
matrix:
python: ["3.8", "3.9"]
os: [ubuntu-latest, macos-latest, windows-latest]
include:
- os: ubuntu-latest
install_graphviz:
sudo apt install graphviz graphviz-dev
- os: macos-latest
install_graphviz: brew install graphviz
- os: windows-latest
install_graphviz:
choco install graphviz --version=2.48.0;
poetry run pip install --global-option=build_ext --global-option="-IC:\Program Files\Graphviz\include" --global-option="-LC:\Program Files\Graphviz\lib" pygraphviz;
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
Expand All @@ -35,9 +45,11 @@ jobs:

- name: Install dependencies
run: |
echo "Cache Version ${{ secrets.CACHE_VERSION }}"
pip install poetry
${{ matrix.install_graphviz }}
echo "Cache Version ${{ secrets.CACHE_VERSION }}"
poetry install
poetry run pip install ERAlchemy
poetry config --list
- name: Print tool versions
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,9 @@ report.xml
dataprep/eda/create_db_report/layout/index.html
dataprep/eda/create_db_report/layout/columns.html
dataprep/eda/create_db_report/layout/constraints.html
dataprep/eda/create_db_report/layout/relationships.html
dataprep/eda/create_db_report/layout/diagrams/*
dataprep/eda/create_db_report/layout/tables/*
!dataprep/eda/create_db_report/layout/tables/table.html
!dataprep/eda/create_db_report/layout/tables/table.js
!dataprep/eda/create_db_report/layout/bower/admin-lte/dist
!dataprep/eda/create_db_report/layout/bower
20 changes: 13 additions & 7 deletions dataprep/eda/create_db_report/db_models/constraint.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@


class ForeignKeyConstraint:
imported_key_cascade = 0
imported_key_restrict = 1
imported_key_set_null = 2
imported_key_no_action = 3
imported_key_cascade = "0"
imported_key_restrict = "1"
imported_key_set_null = "2"
imported_key_no_action = "3"

def __init__(self, child: Table, name: str, delete_rule: str, update_rule: int):
self.name = name
self.child_table = child
def __init__(self, child: Table, name: str, delete_rule: str, update_rule: str):
self.name = name.replace("'", "")
self.delete_rule = delete_rule
self.update_rule = update_rule
self.parent_columns = []
self.child_columns = []
self.parent_table = None
self.child_table = child

def add_parent_column(self, column):
if column is not None:
Expand All @@ -25,6 +25,12 @@ def add_child_column(self, column):
if column is not None:
self.child_columns.append(column)

def get_parent_table(self):
return self.parent_table

def get_child_table(self):
return self.child_table

def is_cascade_on_delete(self):
return self.delete_rule == self.imported_key_cascade

Expand Down
8 changes: 7 additions & 1 deletion dataprep/eda/create_db_report/db_models/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

class Database:
def __init__(self, name: str, schema: str, stats: DbMeta) -> None:
self.name = name
self.name = name.replace("'", "")
self.schema = schema
self.tables = {}
self.views = {}
Expand All @@ -20,5 +20,11 @@ def add_view(self, view_name: str, view_object):
def get_tables(self):
return self.tables.values()

def get_tables_dict(self):
return self.tables

def get_views(self):
return self.views.values()

def get_name(self):
return self.name
2 changes: 2 additions & 0 deletions dataprep/eda/create_db_report/db_models/db_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@ def __init__(
num_of_pk: int,
num_of_tables: int,
product_version: str,
connection_url: str,
) -> None:
self.num_of_views = num_of_views
self.num_of_schemas = num_of_schemas
self.num_of_fk = num_of_fk
self.num_of_uk = num_of_uk
self.num_of_pk = num_of_pk
self.num_of_table = num_of_tables
self.connection_url = connection_url
self.database_product = self.engine_name_dict[engine_name] + " - " + product_version
17 changes: 15 additions & 2 deletions dataprep/eda/create_db_report/db_models/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
class Table:
def __init__(self, database: Database, schema: str, name: str) -> None:
self.database = database
self.schema = schema
self.name = name
self.schema = schema.replace("'", "")
self.name = name.replace("'", "")
self.foreign_keys = {}
self.columns = {}
self.primary_keys = []
self.indexes = {}
self.referenced_by = {}
self.id = None
self.check_constraints = {}
self.num_of_rows = 0
Expand Down Expand Up @@ -50,6 +51,12 @@ def add_max_parents(self):
def add_max_children(self):
self.max_children += 1

def add_referenced_by_table(self, table):
self.referenced_by[table.get_name()] = table

def get_referenced_by_tables(self):
return self.referenced_by

def get_view_definition(self):
return None

Expand All @@ -70,5 +77,11 @@ def add_check_constraint(self, constraint_name: str, text: str):
def get_foreign_keys(self):
return self.foreign_keys.values()

def get_foreign_keys_dict(self):
return self.foreign_keys

def add_foreign_key(self, foreign_key):
self.foreign_keys[foreign_key.name] = foreign_key

def get_name(self):
return self.name
9 changes: 6 additions & 3 deletions dataprep/eda/create_db_report/db_models/table_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ def __init__(
comments: str,
):
self.table = table
self.name = name
self.name = name.replace("'", "")
self.type_name = type_name
self.not_null = not_null
self.default_value = default_value if default_value is not None else ""
self.comments = comments
self.default_value = default_value.replace("'", "") if default_value else ""
self.comments = comments.replace("'", "") if default_value else ""
self.parents = {}
self.children = {}
self.type = 0
Expand Down Expand Up @@ -60,3 +60,6 @@ def get_parents(self):

def get_children(self):
return list(self.children.values())

def get_name(self):
return self.name
2 changes: 1 addition & 1 deletion dataprep/eda/create_db_report/db_models/table_index.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
class TableIndex:
def __init__(self, name: str, index_type: str) -> None:
self.name = name
self.name = name.replace("'", "")
self.is_unique = False
self.is_primary = False
self.columns = {}
Expand Down
164 changes: 164 additions & 0 deletions dataprep/eda/create_db_report/diagram_factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import re
import os
import platform
import pydot
import json
import shutil
from typing import Dict, Any
from .db_models.database import Database

GRAPHVIZ_PATH = os.environ.get("GRAPHVIZ_PATH", "C:/Program Files/Graphviz/bin")
if platform.system() == "Windows" and os.path.exists(GRAPHVIZ_PATH):
os.add_dll_directory(GRAPHVIZ_PATH)
try:
from eralchemy import render_er

_WITH_GV = True
except ImportError:
_WITH_GV = False


class DiagramFactory:
def __init__(self, output_dir: str):
self.cwd = os.getcwd()
self.diagram_dir = output_dir + "/diagrams"
self.dirs = {
"table": self.diagram_dir + "/tables",
"summary": self.diagram_dir + "/summary",
"orphan": self.diagram_dir + "/orphans",
}
self.import_err = ImportError(
"ERAlchemy is not installed."
" Please run pip install ERAlchemy"
"\nThis package also requires sub-dependency pygraphviz."
" Please refer to https://pygraphviz.github.io/documentation/stable/install.html to install pygraphviz."
f"\nFor Windows users, make sure that Graphviz is installed under {GRAPHVIZ_PATH}"
"\nIf Graphviz was installed in a different directory, set path environment variable GRAPHVIZ_PATH to that directory."
)
self.create_dirs()

def create_dirs(self):
if not os.path.exists(self.diagram_dir):
os.mkdir(self.diagram_dir)
for path in self.dirs:
if os.path.exists(self.dirs[path]) and os.path.isdir(self.dirs[path]):
shutil.rmtree(self.dirs[path])
os.mkdir(self.dirs[path])

def generate_summary_diagram(self, database_object: Database, database_url: str):
os.chdir(self.dirs["summary"])
if _WITH_GV:
render_er(database_url, "relationships.dot")
else:
raise self.import_err
json_tables = self.generate_diagram_tables(database_object.get_tables_dict())
file = str(
os.path.realpath(
os.path.join(os.path.dirname(__file__), "layout/diagrams/summary/relationships.dot")
)
)
json_relationships = self.generate_diagram_relationships(file)
os.chdir(self.cwd)
return json_tables, json_relationships

def generate_table_diagrams(self, database_object: Database, database_url: str):
database_tables = database_object.get_tables_dict()
table_names = set(database_tables.keys())
result_tables = {}
for table in table_names:
related_table_names = {table}
related_table_names.update(database_tables[table].get_referenced_by_tables())
table_foreign_keys = database_tables[table].get_foreign_keys_dict()
for foreign_key in table_foreign_keys:
related_table_names.add(
table_foreign_keys[foreign_key].get_parent_table().get_name()
)
related_table_names = list(related_table_names)
os.chdir(self.dirs["table"])
if _WITH_GV:
render_er(
database_url, f"{table}.dot", include_tables=" ".join(related_table_names)
)
else:
raise self.import_err
os.chdir(self.cwd)
first_degree_tables = {
key: value for key, value in database_tables.items() if key in related_table_names
}
json_tables = self.generate_diagram_tables(first_degree_tables)
file = str(
os.path.realpath(
os.path.join(os.path.dirname(__file__), f"layout/diagrams/tables/{table}.dot")
)
)
json_relationships = self.generate_diagram_relationships(file)
result_tables[table] = {
"json_tables": json_tables,
"json_relationships": json_relationships,
}
return result_tables

def generate_diagram_tables(self, tables: Dict[str, Any]):
table_names = set(tables.keys())
json_tables = []
for table in table_names:
current_table_description = {"key": table}
table_items = []
related_table_names = {table}
related_table_names.update(tables[table].get_referenced_by_tables())
table_columns = tables[table].get_columns()
for column in table_columns:
current_column = {
"name": column.get_name(),
"type": column.type_name,
"default_value": column.default_value,
"nullable": column.not_null,
}
if column.is_primary():
current_column["iskey"] = True
current_column["figure"] = "Decision"
current_column["color"] = "red"
elif column.is_foreign_key():
current_column["iskey"] = True
current_column["figure"] = "Decision"
current_column["color"] = "purple"
current_column["ref"] = ",".join(
[f"{x.name} in {x.table.name}" for x in column.parents]
)
else:
current_column["iskey"] = False
current_column["figure"] = "Circle"
current_column["color"] = "green"
table_items.append(current_column)
current_table_description["items"] = self.sort_by_priority(table_items)
json_tables.append(current_table_description)
return json_tables

def generate_diagram_relationships(self, dot_file: str):
json_relationships = []
graph = pydot.graph_from_dot_file(dot_file)
rex = re.compile(r"<<FONT>(.*?)</FONT>>", re.S | re.M)
edge_list = graph[0].get_edge_list()
for e in edge_list:
current_edge = {}
node_name = str(e).split()
labels = json.dumps(e.get_attributes())
edge_attr = json.loads(labels)
# { from: "Products", to: "Suppliers", text: "0..N", toText: "1" }
current_edge["from"] = node_name[0].replace('"', "")
current_edge["to"] = node_name[2].replace('"', "")
match = rex.match(edge_attr["taillabel"])
if match:
current_edge["text"] = match.groups()[0].strip()
match = rex.match(edge_attr["headlabel"])
if match:
current_edge["toText"] = match.groups()[0].strip()
json_relationships.append(current_edge)
return json_relationships

def sort_by_priority(self, values):
priority = ["red", "purple", "green"]
priority_dict = dict(zip(priority, range(len(priority))))
for value in values:
value["priority"] = priority_dict[value["color"]]
return sorted(values, key=lambda x: x["priority"])
Loading

0 comments on commit a10a04b

Please sign in to comment.