forked from konfuzio-ai/ai-comedy-club
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into rodrigo-lucchesi
- Loading branch information
Showing
178 changed files
with
867,526 additions
and
91 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,7 @@ | ||
venv/* | ||
.idea/* | ||
.env | ||
.DS_Store | ||
__pycache__/ | ||
.pytest_cache/ | ||
*.py[cod] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"[python]": { | ||
"editor.defaultFormatter": "ms-python.autopep8" | ||
}, | ||
"python.formatting.provider": "none" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import uvicorn | ||
from fastapi import FastAPI, Request | ||
from fastapi.templating import Jinja2Templates | ||
from fastapi.responses import HTMLResponse | ||
import importlib.util | ||
import os | ||
from pathlib import Path | ||
|
||
# This is a simple backend with html template to display ONLY jokes from every bot | ||
|
||
# The directory where the bots are located | ||
bots_dir = os.path.join(Path(os.getcwd()).parent, "bots") | ||
|
||
|
||
app = FastAPI() | ||
templates = Jinja2Templates(directory="templates") | ||
|
||
|
||
@app.get('/', response_class=HTMLResponse) | ||
def index(request: Request): | ||
bots = [] | ||
bot_directories = [d for d in os.listdir(bots_dir) if os.path.isdir(os.path.join(bots_dir, d))] | ||
for bot_dir in bot_directories: | ||
|
||
# Dynamically load the bot's module | ||
spec = importlib.util.spec_from_file_location("bot", os.path.join(bots_dir, bot_dir, "joke_bot.py")) | ||
bot_module = importlib.util.module_from_spec(spec) | ||
spec.loader.exec_module(bot_module) | ||
|
||
# Create an instance of the bot and add it to the list | ||
bot = bot_module.Bot() | ||
bots.append(bot) | ||
jokes = {} | ||
for bot in bots: | ||
jokes[bot.name] = bot.tell_joke() | ||
return templates.TemplateResponse("index.html", {"request": request, "title": "Comedy-club", | ||
"placeholder": "Welcome", "jokes": jokes}) | ||
|
||
|
||
if __name__ == "__main__": | ||
uvicorn.run("api:app", host='127.0.0.1', port=8001, reload=True) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
<!doctype html> | ||
|
||
<html lang="en"> | ||
|
||
<head> | ||
<title>{{title}}</title> | ||
</head> | ||
|
||
<body> | ||
<h1>Want some jokes?</h1> | ||
{% for key, value in jokes.items() %} | ||
<b>Joke from {{ key }}: {{ value }}</b><br> | ||
{% endfor %} | ||
</body> | ||
|
||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
models/model_v04/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
.ipynb_checkpoints/ | ||
.virtual_documents/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
## Konfuzio "AI Comedy Club" Challenge | ||
|
||
### Introduction | ||
|
||
Humour, even for us humans, can be mysterious. It is no wonder that it poses a unique challenge for AI systems as well. Let us think about it - we all have that one friend who effortlessly brings laughter into our lives. They have a natural knack for timing, delivery, and a deep understanding of what makes something funny. But have we ever tried to explain why they are funny? | ||
|
||
In this project, we aim to tackle the challenging task of creating an AI bot that excels in generating new jokes. This task is particularly difficult due to the complexities of humour. Humour is subjective and context-dependent, making it challenging for an AI system to understand and replicate effectively. | ||
|
||
Additionally, jokes often rely on wordplay, sarcasm, and cultural references, which further complicates the task of generating original and funny jokes. Despite these challenges, we are determined to push the boundaries of AI and humour, striving to create a bot that can bring joy and laughter to users worldwide. | ||
|
||
Meet **ChuckleChief**, our enthusiastic and curious novice AI companion, eager to unravel the mysteries of humour. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import random | ||
|
||
import torch | ||
from transformers import (AutoModelForCausalLM, AutoTokenizer, | ||
DistilBertForSequenceClassification, | ||
DistilBertTokenizer) | ||
|
||
|
||
class Bot: | ||
name = "ChuckleChief" | ||
|
||
def __init__(self): | ||
self.max_length = 30 | ||
self.joke_prefixes = [ | ||
"My best joke is: ", | ||
"Here is a joke filled with harmless humour: " | ||
] | ||
|
||
generator_model = "botbrain/ChuckleWhiz" | ||
rater_model = "botbrain/ChuckleWhizRater" | ||
|
||
self.generator = AutoModelForCausalLM.from_pretrained( | ||
generator_model).eval() | ||
self.generator_tokeniser = AutoTokenizer.from_pretrained( | ||
generator_model) | ||
|
||
self.rater = DistilBertForSequenceClassification.from_pretrained( | ||
rater_model).eval() | ||
self.rater_tokeniser = DistilBertTokenizer.from_pretrained(rater_model) | ||
|
||
def tell_joke(self) -> str: | ||
prefix = random.choice(self.joke_prefixes) | ||
input_ids = self.generator_tokeniser.encode( | ||
prefix, return_tensors="pt") | ||
with torch.no_grad(): | ||
joke = self.generator.generate( | ||
input_ids, | ||
max_length=self.max_length, | ||
repetition_penalty=1.2, | ||
temperature=0.75, | ||
do_sample=True | ||
) | ||
|
||
joke = self.generator_tokeniser.decode( | ||
joke[0], skip_special_tokens=True) | ||
|
||
# Find the last sentence-ending punctuation mark. | ||
end_marks = [".", "!", "?"] | ||
end_pos = max([joke.rfind(m) for m in end_marks]) | ||
|
||
# If there is no sentence-ending punctuation mark, return the whole joke. | ||
if end_pos == -1: | ||
return joke | ||
|
||
# Otherwise, return the joke up to the last sentence-ending punctuation mark. | ||
return joke[:end_pos + 1] | ||
|
||
def rate_joke(self, joke: str) -> int: | ||
encoding = self.rater_tokeniser.encode_plus( | ||
joke, | ||
add_special_tokens=True, | ||
truncation=True, | ||
padding="longest", | ||
return_tensors="pt" | ||
) | ||
|
||
input_ids = encoding["input_ids"] | ||
attention_mask = encoding["attention_mask"] | ||
|
||
with torch.no_grad(): | ||
outputs = self.rater(input_ids, attention_mask) | ||
|
||
rating = outputs.logits.item() | ||
rating = round(min(max(rating, 1), 10)) | ||
return rating |
Oops, something went wrong.