Merge branch 'main' into rodrigo-lucchesi

mti-at-ess · Sep 20, 2023 · 094f154 · 094f154
2 parents 5f38ed6 + 9728c0e
commit 094f154
Show file tree

Hide file tree

Showing 178 changed files with 867,526 additions and 91 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,7 @@
 venv/*
 .idea/*
+.env
+.DS_Store
 __pycache__/
+.pytest_cache/
+*.py[cod]
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,6 @@
+{
+  "[python]": {
+    "editor.defaultFormatter": "ms-python.autopep8"
+  },
+  "python.formatting.provider": "none"
+}
diff --git a/README.md b/README.md
@@ -72,13 +72,16 @@ Whether you are a seasoned programmer, a beginner who's just starting out, or so
 -   Bug reports: If you spot any bugs or issues, please let us know. You can create a new issue to report any problems you encounter.
 -   Pull Requests: Enhancements, bug fixes, better jokes - we welcome them all! Feel free to create a pull request with your changes.
 
-### Why contribute?
+### Why to contribute?
 
 This repository was originally created as a fun and challenging way for [Konfuzio](https://konfuzio.com) applicants to demonstrate their skills.
 
-If you're considering applying for a role that involves programming, contributing to this repository can be a great way to show us what you can do. Please submit a short [Online Video Interview](https://vocalvideo.com/c/helm-nagel-gmbh) in addition.
+### Contribute and apply for a job 🍾
 
-But don't worry, you don't have to be a job applicant to contribute! Whether you're looking to improve your coding skills, learn more about AI, or just want to make people laugh, contributing to the AI Comedy Club can be a rewarding experience.
+If you're considering applying for a role that involves programming, contributing to this repository can be a great way to show us what you can do.
+
+1. Please submit a short [Online Video Interview](https://vocalvideo.com/c/helm-nagel-gmbh) in addition.
+2. Book a meeting with our CTO, Florian Zyprian to get to know each other and receive a review of your pull request: **[BOOK a Meeting](https://calendly.com/zyprian/treffen-via-google-hangout?back=1&month=2023-09)**
 
 ### A Roadmap, No Joke
 

diff --git a/api/api.py b/api/api.py
@@ -0,0 +1,42 @@
+import uvicorn
+from fastapi import FastAPI, Request
+from fastapi.templating import Jinja2Templates
+from fastapi.responses import HTMLResponse
+import importlib.util
+import os
+from pathlib import Path
+
+# This is a simple backend with html template to display ONLY jokes from every bot
+
+# The directory where the bots are located
+bots_dir = os.path.join(Path(os.getcwd()).parent, "bots")
+
+
+app = FastAPI()
+templates = Jinja2Templates(directory="templates")
+
+
+@app.get('/', response_class=HTMLResponse)
+def index(request: Request):
+    bots = []
+    bot_directories = [d for d in os.listdir(bots_dir) if os.path.isdir(os.path.join(bots_dir, d))]
+    for bot_dir in bot_directories:
+
+        # Dynamically load the bot's module
+        spec = importlib.util.spec_from_file_location("bot", os.path.join(bots_dir, bot_dir, "joke_bot.py"))
+        bot_module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(bot_module)
+
+        # Create an instance of the bot and add it to the list
+        bot = bot_module.Bot()
+        bots.append(bot)
+    jokes = {}
+    for bot in bots:
+        jokes[bot.name] = bot.tell_joke()
+    return templates.TemplateResponse("index.html", {"request": request, "title": "Comedy-club",
+                                                     "placeholder": "Welcome", "jokes": jokes})
+
+
+if __name__ == "__main__":
+    uvicorn.run("api:app", host='127.0.0.1', port=8001, reload=True)
+
diff --git a/api/templates/index.html b/api/templates/index.html
@@ -0,0 +1,16 @@
+<!doctype html>
+
+<html lang="en">
+
+<head>
+    <title>{{title}}</title>
+</head>
+
+<body>
+<h1>Want some jokes?</h1>
+{% for key, value in jokes.items() %}
+<b>Joke from {{ key }}: {{ value }}</b><br>
+{% endfor %}
+</body>
+
+</html>
diff --git a/bots/ChuckleChief/.gitattributes b/bots/ChuckleChief/.gitattributes
@@ -0,0 +1 @@
+models/model_v04/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
diff --git a/bots/ChuckleChief/.gitignore b/bots/ChuckleChief/.gitignore
@@ -0,0 +1,2 @@
+.ipynb_checkpoints/
+.virtual_documents/
diff --git a/bots/ChuckleChief/README.md b/bots/ChuckleChief/README.md
@@ -0,0 +1,11 @@
+## Konfuzio "AI Comedy Club" Challenge
+
+### Introduction
+
+Humour, even for us humans, can be mysterious. It is no wonder that it poses a unique challenge for AI systems as well. Let us think about it - we all have that one friend who effortlessly brings laughter into our lives. They have a natural knack for timing, delivery, and a deep understanding of what makes something funny. But have we ever tried to explain why they are funny?
+
+In this project, we aim to tackle the challenging task of creating an AI bot that excels in generating new jokes. This task is particularly difficult due to the complexities of humour. Humour is subjective and context-dependent, making it challenging for an AI system to understand and replicate effectively.
+
+Additionally, jokes often rely on wordplay, sarcasm, and cultural references, which further complicates the task of generating original and funny jokes. Despite these challenges, we are determined to push the boundaries of AI and humour, striving to create a bot that can bring joy and laughter to users worldwide.
+
+Meet **ChuckleChief**, our enthusiastic and curious novice AI companion, eager to unravel the mysteries of humour. 
diff --git a/bots/ChuckleChief/joke_bot.py b/bots/ChuckleChief/joke_bot.py
@@ -0,0 +1,75 @@
+import random
+
+import torch
+from transformers import (AutoModelForCausalLM, AutoTokenizer,
+                          DistilBertForSequenceClassification,
+                          DistilBertTokenizer)
+
+
+class Bot:
+    name = "ChuckleChief"
+
+    def __init__(self):
+        self.max_length = 30
+        self.joke_prefixes = [
+            "My best joke is: ",
+            "Here is a joke filled with harmless humour: "
+        ]
+
+        generator_model = "botbrain/ChuckleWhiz"
+        rater_model = "botbrain/ChuckleWhizRater"
+
+        self.generator = AutoModelForCausalLM.from_pretrained(
+            generator_model).eval()
+        self.generator_tokeniser = AutoTokenizer.from_pretrained(
+            generator_model)
+
+        self.rater = DistilBertForSequenceClassification.from_pretrained(
+            rater_model).eval()
+        self.rater_tokeniser = DistilBertTokenizer.from_pretrained(rater_model)
+
+    def tell_joke(self) -> str:
+        prefix = random.choice(self.joke_prefixes)
+        input_ids = self.generator_tokeniser.encode(
+            prefix, return_tensors="pt")
+        with torch.no_grad():
+            joke = self.generator.generate(
+                input_ids,
+                max_length=self.max_length,
+                repetition_penalty=1.2,
+                temperature=0.75,
+                do_sample=True
+            )
+
+        joke = self.generator_tokeniser.decode(
+            joke[0], skip_special_tokens=True)
+
+        # Find the last sentence-ending punctuation mark.
+        end_marks = [".", "!", "?"]
+        end_pos = max([joke.rfind(m) for m in end_marks])
+
+        # If there is no sentence-ending punctuation mark, return the whole joke.
+        if end_pos == -1:
+            return joke
+
+        # Otherwise, return the joke up to the last sentence-ending punctuation mark.
+        return joke[:end_pos + 1]
+
+    def rate_joke(self, joke: str) -> int:
+        encoding = self.rater_tokeniser.encode_plus(
+            joke,
+            add_special_tokens=True,
+            truncation=True,
+            padding="longest",
+            return_tensors="pt"
+        )
+
+        input_ids = encoding["input_ids"]
+        attention_mask = encoding["attention_mask"]
+
+        with torch.no_grad():
+            outputs = self.rater(input_ids, attention_mask)
+
+        rating = outputs.logits.item()
+        rating = round(min(max(rating, 1), 10))
+        return rating