From 475671d1e846a370a38e2044a73aaa1da417fd9a Mon Sep 17 00:00:00 2001 From: douglas Date: Mon, 3 Apr 2023 20:31:01 -0400 Subject: [PATCH] Pinecone memory and memory usage tracking --- .env.template | 2 ++ README.md | 29 +++++++++++++++++++ requirements.txt | 3 +- scripts/chat.py | 37 ++++++++++++++++-------- scripts/commands.py | 38 ++----------------------- scripts/config.py | 12 +++++++- scripts/data/prompt.txt | 38 ++++++++++++------------- scripts/main.py | 17 +++++++++-- scripts/memory.py | 62 ++++++++++++++++++++++++++++++++++++++++- 9 files changed, 167 insertions(+), 71 deletions(-) diff --git a/.env.template b/.env.template index c64d85028f02..9fbffbcd4da2 100644 --- a/.env.template +++ b/.env.template @@ -1,3 +1,5 @@ +PINECONE_API_KEY=your-pinecone-api-key +PINECONE_ENV=your-pinecone-region OPENAI_API_KEY=your-openai-api-key ELEVENLABS_API_KEY=your-elevenlabs-api-key SMART_LLM_MODEL="gpt-4" diff --git a/README.md b/README.md index d66a60222449..3af539c85943 100644 --- a/README.md +++ b/README.md @@ -139,6 +139,35 @@ export CUSTOM_SEARCH_ENGINE_ID="YOUR_CUSTOM_SEARCH_ENGINE_ID" ``` +## 🌲 Pinecone API Key Setup + +Pinecone enable a vector based memory so a vast memory can be stored and only relevant memories +are loaded for the agent at any given time. + +1. Go to app.pinecone.io and make an account if you don't already have one. +2. Choose the `Starter` plan to avoid being charged. +3. Find your API key and region under the default project in the left sidebar. + +### Setting up environment variables + For Windows Users: +``` +setx PINECONE_API_KEY "YOUR_GOOGLE_API_KEY" +export PINECONE_ENV="Your region" # something like: us-east4-gcp + +``` +For macOS and Linux users: +``` +export PINECONE_API_KEY="YOUR_GOOGLE_API_KEY" +export PINECONE_ENV="Your region" # something like: us-east4-gcp + +``` + +Or you can set them in the `.env` file. + +## View Memory Usage + +1. View memory usage by using the `--debug` flag :) + ## 💀 Continuous Mode ⚠️ Run the AI **without** user authorisation, 100% automated. Continuous mode is not recommended. diff --git a/requirements.txt b/requirements.txt index 158e9324196e..2efb371cc018 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,5 @@ tiktoken==0.3.3 gTTS==2.3.1 docker duckduckgo-search -google-api-python-client #(https://developers.google.com/custom-search/v1/overview) +google-api-python-client #(https://developers.google.com/custom-search/v1/overview) +pinecone-client==2.2.1 diff --git a/scripts/chat.py b/scripts/chat.py index 86a70b093347..8da074c6bfbf 100644 --- a/scripts/chat.py +++ b/scripts/chat.py @@ -23,6 +23,19 @@ def create_chat_message(role, content): return {"role": role, "content": content} +def generate_context(prompt, relevant_memory, full_message_history, model): + current_context = [ + create_chat_message( + "system", prompt), create_chat_message( + "system", f"Permanent memory: {relevant_memory}")] + + # Add messages from the full message history until we reach the token limit + next_message_to_add_index = len(full_message_history) - 1 + insertion_index = len(current_context) + # Count the currently used tokens + current_tokens_used = token_counter.count_message_tokens(current_context, model) + return next_message_to_add_index, current_tokens_used, insertion_index, current_context + # TODO: Change debug from hardcode to argument def chat_with_ai( @@ -41,7 +54,7 @@ def chat_with_ai( prompt (str): The prompt explaining the rules to the AI. user_input (str): The input from the user. full_message_history (list): The list of all messages sent between the user and the AI. - permanent_memory (list): The list of items in the AI's permanent memory. + permanent_memory (Obj): The memory object containing the permanent memory. token_limit (int): The maximum number of tokens allowed in the API call. Returns: @@ -53,18 +66,20 @@ def chat_with_ai( print(f"Token limit: {token_limit}") send_token_limit = token_limit - 1000 - current_context = [ - create_chat_message( - "system", prompt), create_chat_message( - "system", f"Permanent memory: {permanent_memory}")] + relevant_memory = permanent_memory.get_relevant(str(full_message_history[-5:]), 10) + + if debug: + print('Memory Stats: ', permanent_memory.get_stats()) + + next_message_to_add_index, current_tokens_used, insertion_index, current_context = generate_context( + prompt, relevant_memory, full_message_history, model) - # Add messages from the full message history until we reach the token limit - next_message_to_add_index = len(full_message_history) - 1 - current_tokens_used = 0 - insertion_index = len(current_context) + while current_tokens_used > 2500: + # remove memories until we are under 2500 tokens + relevant_memory = relevant_memory[1:] + next_message_to_add_index, current_tokens_used, insertion_index, current_context = generate_context( + prompt, relevant_memory, full_message_history, model) - # Count the currently used tokens - current_tokens_used = token_counter.count_message_tokens(current_context, model) current_tokens_used += token_counter.count_message_tokens([create_chat_message("user", user_input)], model) # Account for user input (appended later) while next_message_to_add_index >= 0: diff --git a/scripts/commands.py b/scripts/commands.py index f8f96fe72cd8..0a7d27f49dc2 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -1,6 +1,6 @@ import browse import json -import memory as mem +from memory import PineconeMemory import datetime import agent_manager as agents import speak @@ -45,6 +45,7 @@ def get_command(response): def execute_command(command_name, arguments): + memory = PineconeMemory() try: if command_name == "google": @@ -55,11 +56,7 @@ def execute_command(command_name, arguments): else: return google_search(arguments["input"]) elif command_name == "memory_add": - return commit_memory(arguments["string"]) - elif command_name == "memory_del": - return delete_memory(arguments["key"]) - elif command_name == "memory_ovr": - return overwrite_memory(arguments["key"], arguments["string"]) + return memory.add(arguments["string"]) elif command_name == "start_agent": return start_agent( arguments["name"], @@ -176,35 +173,6 @@ def get_hyperlinks(url): return link_list -def commit_memory(string): - _text = f"""Committing memory with string "{string}" """ - mem.permanent_memory.append(string) - return _text - - -def delete_memory(key): - if key >= 0 and key < len(mem.permanent_memory): - _text = "Deleting memory with key " + str(key) - del mem.permanent_memory[key] - print(_text) - return _text - else: - print("Invalid key, cannot delete memory.") - return None - - -def overwrite_memory(key, string): - if int(key) >= 0 and key < len(mem.permanent_memory): - _text = "Overwriting memory with key " + \ - str(key) + " and string " + string - mem.permanent_memory[key] = string - print(_text) - return _text - else: - print("Invalid key, cannot overwrite memory.") - return None - - def shutdown(): print("Shutting down...") quit() diff --git a/scripts/config.py b/scripts/config.py index 766cb94f415f..4334f03acffc 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -4,6 +4,7 @@ # Load environment variables from .env file load_dotenv() + class Singleton(type): """ Singleton metaclass for ensuring only one instance of a class. @@ -39,6 +40,9 @@ def __init__(self): self.google_api_key = os.getenv("GOOGLE_API_KEY") self.custom_search_engine_id = os.getenv("CUSTOM_SEARCH_ENGINE_ID") + self.pinecone_api_key = os.getenv("PINECONE_API_KEY") + self.pinecone_region = os.getenv("PINECONE_ENV") + # Initialize the OpenAI API client openai.api_key = self.openai_api_key @@ -70,4 +74,10 @@ def set_google_api_key(self, value: str): self.google_api_key = value def set_custom_search_engine_id(self, value: str): - self.custom_search_engine_id = value \ No newline at end of file + self.custom_search_engine_id = value + + def set_pinecone_api_key(self, value: str): + self.pinecone_api_key = value + + def set_pinecone_region(self, value: str): + self.pinecone_region = value diff --git a/scripts/data/prompt.txt b/scripts/data/prompt.txt index a93e783eb3ac..9f31fad79539 100644 --- a/scripts/data/prompt.txt +++ b/scripts/data/prompt.txt @@ -1,29 +1,27 @@ CONSTRAINTS: -1. ~4000 word limit for memory. Your memory is short, so immediately save important information to long term memory and code to files. -2. No user assistance -3. Exclusively use the commands listed in double quotes e.g. "command name" +1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files. +2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember. +3. No user assistance +4. Exclusively use the commands listed in double quotes e.g. "command name" COMMANDS: 1. Google Search: "google", args: "input": "" -2. Memory Add: "memory_add", args: "string": "" -3. Memory Delete: "memory_del", args: "key": "" -4. Memory Overwrite: "memory_ovr", args: "key": "", "string": "" -5. Browse Website: "browse_website", args: "url": "", "question": "" -6. Start GPT Agent: "start_agent", args: "name": , "task": "", "prompt": "" -7. Message GPT Agent: "message_agent", args: "key": "", "message": "" -8. List GPT Agents: "list_agents", args: "" -9. Delete GPT Agent: "delete_agent", args: "key": "" -10. Write to file: "write_to_file", args: "file": "", "text": "" -11. Read file: "read_file", args: "file": "" -12. Append to file: "append_to_file", args: "file": "", "text": "" -13. Delete file: "delete_file", args: "file": "" -14. Evaluate Code: "evaluate_code", args: "code": "" -15. Get Improved Code: "improve_code", args: "suggestions": "", "code": "" -16. Write Tests: "write_tests", args: "code": "", "focus": "" -17. Execute Python File: "execute_python_file", args: "file": "" -18. Task Complete (Shutdown): "task_complete", args: "reason": "" +2. Browse Website: "browse_website", args: "url": "", "question": "" +3. Start GPT Agent: "start_agent", args: "name": , "task": "", "prompt": "" +4. Message GPT Agent: "message_agent", args: "key": "", "message": "" +5. List GPT Agents: "list_agents", args: "" +6. Delete GPT Agent: "delete_agent", args: "key": "" +7. Write to file: "write_to_file", args: "file": "", "text": "" +8. Read file: "read_file", args: "file": "" +9. Append to file: "append_to_file", args: "file": "", "text": "" +10. Delete file: "delete_file", args: "file": "" +11. Evaluate Code: "evaluate_code", args: "code": "" +12. Get Improved Code: "improve_code", args: "suggestions": "", "code": "" +13. Write Tests: "write_tests", args: "code": "", "focus": "" +14. Execute Python File: "execute_python_file", args: "file": "" +15. Task Complete (Shutdown): "task_complete", args: "reason": "" RESOURCES: diff --git a/scripts/main.py b/scripts/main.py index 93124234e6cd..2b76842f5139 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -1,7 +1,7 @@ import json import random import commands as cmd -import memory as mem +from memory import PineconeMemory import data import chat from colorama import Fore, Style @@ -280,6 +280,13 @@ def parse_arguments(): # Make a constant: user_input = "Determine which next command to use, and respond using the format specified above:" +# Initialize memory and make sure it is empty. +# this is particularly important for indexing and referencing pinecone memory +memory = PineconeMemory() +memory.clear() + +print('Using memory of type: ' + memory.__class__.__name__) + # Interaction Loop while True: # Send message to AI, get response @@ -288,7 +295,7 @@ def parse_arguments(): prompt, user_input, full_message_history, - mem.permanent_memory, + memory, cfg.fast_token_limit) # TODO: This hardcodes the model to use GPT3.5. Make this an argument # print("assistant reply: "+assistant_reply) @@ -349,6 +356,12 @@ def parse_arguments(): else: result = f"Command {command_name} returned: {cmd.execute_command(command_name, arguments)}" + memory_to_add = f"Assistant Reply: {assistant_reply} " \ + f"\nResult: {result} " \ + f"\nHuman Feedback: {user_input} " + + memory.add(memory_to_add) + # Check if there's a result from the command append it to the message # history if result is not None: diff --git a/scripts/memory.py b/scripts/memory.py index 0dc5b76663e1..0d265a31d8f4 100644 --- a/scripts/memory.py +++ b/scripts/memory.py @@ -1 +1,61 @@ -permanent_memory = [] +from config import Config, Singleton +import pinecone +import openai + +cfg = Config() + + +def get_ada_embedding(text): + text = text.replace("\n", " ") + return openai.Embedding.create(input=[text], model="text-embedding-ada-002")["data"][0]["embedding"] + + +def get_text_from_embedding(embedding): + return openai.Embedding.retrieve(embedding, model="text-embedding-ada-002")["data"][0]["text"] + + +class PineconeMemory(metaclass=Singleton): + def __init__(self): + pinecone_api_key = cfg.pinecone_api_key + pinecone_region = cfg.pinecone_region + pinecone.init(api_key=pinecone_api_key, environment=pinecone_region) + dimension = 1536 + metric = "cosine" + pod_type = "p1" + table_name = "auto-gpt" + # this assumes we don't start with memory. + # for now this works. + # we'll need a more complicated and robust system if we want to start with memory. + self.vec_num = 0 + if table_name not in pinecone.list_indexes(): + pinecone.create_index(table_name, dimension=dimension, metric=metric, pod_type=pod_type) + self.index = pinecone.Index(table_name) + + def add(self, data): + vector = get_ada_embedding(data) + # no metadata here. We may wish to change that long term. + resp = self.index.upsert([(str(self.vec_num), vector, {"raw_text": data})]) + _text = f"Inserting data into memory at index: {self.vec_num}:\n data: {data}" + self.vec_num += 1 + return _text + + def get(self, data): + return self.get_relevant(data, 1) + + def clear(self): + self.index.delete(deleteAll=True) + return "Obliviated" + + def get_relevant(self, data, num_relevant=5): + """ + Returns all the data in the memory that is relevant to the given data. + :param data: The data to compare to. + :param num_relevant: The number of relevant data to return. Defaults to 5 + """ + query_embedding = get_ada_embedding(data) + results = self.index.query(query_embedding, top_k=num_relevant, include_metadata=True) + sorted_results = sorted(results.matches, key=lambda x: x.score) + return [str(item['metadata']["raw_text"]) for item in sorted_results] + + def get_stats(self): + return self.index.describe_index_stats()