NBs and README

noahshinn · May 21, 2023 · c2159d4 · c2159d4
1 parent e531a5c
commit c2159d4
Show file tree

Hide file tree

Showing 14 changed files with 751 additions and 15,480 deletions.
diff --git a/README.md b/README.md
@@ -36,11 +36,50 @@ Run the trial
 The logs will be sent to `./root/<run_name>`.
 
 ### To Run: reasoning (HotPotQA)
-Clone this repo and move to the HotPotQA directory
+We provide a set of notebooks to easily run, explore, and interact with the results of the reasoning experiments. Each experiment consists of a random sample of 100 questions from the HotPotQA distractor dataset. Each question in the sample is attempted by an agent with a specific type and reflexion strategy.
+
+#### Setup
+
+To get started:
+
+1. Clone this repo and move to the HotPotQA directory:
 ```bash
 git clone https://github.com/noahshinn024/reflexion && cd ./hotpotqa_runs
 ```
 
+2. Install the module dependencies into your environment:
+```bash
+pip install -r requirements.txt
+```
+
+3. Set `OPENAI_API_KEY` environment variable to your OpenAI API key:
+```bash
+export OPENAI_API_KEY=<your key>
+```
+
+#### Agent Types
+
+Agent type is determined by the notebook you choose to run. The available agent types include:
+ - `ReAct` - ReAct Agent
+
+ - `CoT_context` - CoT Agent given supporting context about the question 
+
+ - `CoT_no_context` - CoT Agent given no supporting context about the question
+
+The notebook for each agent type is located in the `./hotpot_runs/notebooks` directory.
+
+#### Reflexion Strategies
+
+Each notebook allows you to specify the reflexion strategy to be used by the agents. The available reflexion strategies, which are defined in an `Enum`, include:
+
+ - `ReflexionStrategy.NONE` - The agent is not given any information about its last attempt. 
+
+ - `ReflexionStrategy.LAST_ATTEMPT` - The agent is given its reasoning trace from its last attempt on the question as context.
+
+ - `ReflexionStrategy.REFLEXION` - The agent is given its self-reflection on the last attempt as context. 
+
+ - `ReflexionStrategy.LAST_ATTEMPT_AND_REFLEXION` -  The agent is given both its reasoning trace and self-reflection on the last attempt as context.
+
 ### Another Note
 
 Due to the nature of these experiments, it may not be feasible for individual developers to rerun the results as GPT-4 has limited access and significant API charges. All runs from the paper and additional results are logged in `./alfworld_runs/root` for decision-making and `./hotpotqa_runs/root` for reasoning. 
@@ -64,4 +103,4 @@ For all questions, contact [[email protected]]([email protected])
   journal={arXiv preprint arXiv:2303.11366},
   year={2023}
 }
-```
+```
diff --git a/hotpotqa_runs/react_cls.py → hotpotqa_runs/agents.py b/hotpotqa_runs/react_cls.py → hotpotqa_runs/agents.py
@@ -1,6 +1,6 @@
 import re, string, os
 from typing import List, Union, Literal
-
+from enum import Enum
 import tiktoken
 from langchain import OpenAI, Wikipedia
 from langchain.llms.base import BaseLLM
@@ -11,14 +11,26 @@
 from prompts import cot_agent_prompt, cot_reflect_agent_prompt, cot_reflect_prompt, COT_INSTRUCTION, COT_REFLECT_INSTRUCTION
 from fewshots import WEBTHINK_SIMPLE6, REFLECTIONS, COT, COT_REFLECT
 
+class ReflexionStrategy(Enum):
+    """
+    NONE: No reflection
+    LAST_ATTEMPT: Use last reasoning trace in context 
+    REFLEXION: Apply reflexion to the next reasoning trace 
+    LAST_ATTEMPT_AND_REFLEXION: Use last reasoning trace in context and apply reflexion to the next reasoning trace 
+    """
+    NONE = 'base'
+    LAST_ATTEMPT = 'last_trial' 
+    REFLEXION = 'reflexion'
+    LAST_ATTEMPT_AND_REFLEXION = 'last_trial_and_reflexion'
+
+
 class CoTAgent:
     def __init__(self,
                     question: str,
                     context: str,
                     key: str,
                     agent_prompt: PromptTemplate = cot_reflect_agent_prompt,
                     reflect_prompt: PromptTemplate = cot_reflect_prompt,
-                    reflect_header: str = REFLECTION_HEADER,
                     cot_examples: str = COT,
                     reflect_examples: str = COT_REFLECT,
                     self_reflect_llm: BaseLLM = OpenAI(
@@ -34,13 +46,11 @@ def __init__(self,
                                             model_kwargs={"stop": "\n"},
                                             openai_api_key=os.environ['OPENAI_API_KEY']),
                     ) -> None:
-
         self.question = question
         self.context = context
         self.key = key
         self.agent_prompt = agent_prompt
         self.reflect_prompt = reflect_prompt
-        self.reflect_header = reflect_header
         self.cot_examples = cot_examples 
         self.reflect_examples = reflect_examples
         self.self_reflect_llm = self_reflect_llm
@@ -51,12 +61,10 @@ def __init__(self,
         self.step_n: int = 0
         self.reset()
 
-    def run(self, reflect: bool = True,
-            reflect_strategy: Union[Literal['last_attempt'],
-                                    Literal['reflexion'],
-                                    Literal['last_attempt + reflexion']] = 'reflexion') -> None:
-        if self.step_n > 0 and not self.is_correct() and reflect:
-            self.reflect(reflect_strategy)
+    def run(self,
+            reflexion_strategy: ReflexionStrategy = ReflexionStrategy.REFLEXION) -> None:
+        if self.step_n > 0 and not self.is_correct() and reflexion_strategy != ReflexionStrategy.NONE:
+            self.reflect(reflexion_strategy)
         self.reset()
         self.step()
         self.step_n += 1
@@ -87,17 +95,15 @@ def step(self) -> None:
             print('Invalid action type, please try again.')
 
     def reflect(self,
-                strategy: Union[Literal['last_attempt'],
-                                Literal['reflexion'],
-                                Literal['last_attempt + reflexion']]) -> None:
-        print('Reflecting...')
-        if strategy == 'last_attempt':
+                strategy: ReflexionStrategy) -> None:
+        print('Running Reflexion strategy...')
+        if strategy == ReflexionStrategy.LAST_ATTEMPT:
             self.reflections = [self.scratchpad]
             self.reflections_str = format_last_attempt(self.question , self.reflections[0])
-        elif strategy == 'reflexion':
+        elif strategy == ReflexionStrategy.REFLEXION:
             self.reflections += [self.prompt_reflection()]
             self.reflections_str = format_reflections(self.reflections)
-        elif strategy == 'last_attempt + reflexion':
+        elif strategy == ReflexionStrategy.LAST_ATTEMPT_AND_REFLEXION:
             self.reflections_str = format_last_attempt(self.question , self.scratchpad)
             self.reflections = [self.prompt_reflection()]
             self.reflections_str += '\n'+ format_reflections(self.reflections, header = REFLECTION_AFTER_LAST_TRIAL_HEADER)
@@ -253,7 +259,6 @@ def __init__(self,
                  max_steps: int = 6,
                  agent_prompt: PromptTemplate = react_reflect_agent_prompt,
                  reflect_prompt: PromptTemplate = reflect_prompt,
-                 reflect_header: str = REFLECTION_HEADER,
                  docstore: Docstore = Wikipedia(),
                  react_llm: BaseLLM = OpenAI(
                                              temperature=0,
@@ -269,29 +274,28 @@ def __init__(self,
                  ) -> None:
 
         super().__init__(question, key, max_steps, agent_prompt, docstore, react_llm)
-        self.reflect_header = reflect_header
         self.reflect_llm = reflect_llm
         self.reflect_prompt = reflect_prompt
         self.reflect_examples = REFLECTIONS
         self.reflections: List[str] = []
         self.reflections_str: str = ''
 
-    def run(self, reset = True, reflect_strategy: Union[Literal['last_attempt'], Literal['reflexion'], Literal['last_attempt + reflexion']] = 'reflexion') -> None:
+    def run(self, reset = True, reflect_strategy: ReflexionStrategy = ReflexionStrategy.REFLEXION) -> None:
         if (self.is_finished() or self.is_halted()) and not self.is_correct():
             self.reflect(reflect_strategy)
 
         ReactAgent.run(self, reset)
 
     def reflect(self,
-                strategy: Union[Literal['last_attempt'], Literal['reflexion'], Literal['last_attempt + reflexion']]) -> None:
+                strategy: ReflexionStrategy) -> None:
         print('Reflecting...')
-        if strategy == 'last_attempt':
+        if strategy == ReflexionStrategy.LAST_ATTEMPT:
             self.reflections = [self.scratchpad]
             self.reflections_str = format_last_attempt(self.question, self.reflections[0])
-        elif strategy == 'reflexion':
+        elif strategy == ReflexionStrategy.REFLEXION: 
             self.reflections += [self.prompt_reflection()]
             self.reflections_str = format_reflections(self.reflections)
-        elif strategy == 'last_attempt + reflexion':
+        elif strategy == ReflexionStrategy.LAST_ATTEMPT_AND_REFLEXION: 
             self.reflections_str = format_last_attempt(self.question, self.scratchpad)
             self.reflections = [self.prompt_reflection()]
             self.reflections_str += format_reflections(self.reflections, header = REFLECTION_AFTER_LAST_TRIAL_HEADER)