From 904f758b72df74cce900ea1fa1643fd72e60244a Mon Sep 17 00:00:00 2001 From: Piotr Gawlowicz Date: Fri, 20 Mar 2020 14:59:52 +0100 Subject: [PATCH] multi-agent env example --- scratch/multi-agent/agent1.py | 56 +++++++++ scratch/multi-agent/agent2.py | 56 +++++++++ scratch/multi-agent/mygym.cc | 223 ++++++++++++++++++++++++++++++++++ scratch/multi-agent/mygym.h | 57 +++++++++ scratch/multi-agent/readme.md | 22 ++++ scratch/multi-agent/sim.cc | 83 +++++++++++++ 6 files changed, 497 insertions(+) create mode 100755 scratch/multi-agent/agent1.py create mode 100755 scratch/multi-agent/agent2.py create mode 100644 scratch/multi-agent/mygym.cc create mode 100644 scratch/multi-agent/mygym.h create mode 100644 scratch/multi-agent/readme.md create mode 100644 scratch/multi-agent/sim.cc diff --git a/scratch/multi-agent/agent1.py b/scratch/multi-agent/agent1.py new file mode 100755 index 0000000000..5cb88141c1 --- /dev/null +++ b/scratch/multi-agent/agent1.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import argparse +from ns3gym import ns3env + +__author__ = "Piotr Gawlowicz" +__copyright__ = "Copyright (c) 2020, Technische Universität Berlin" +__version__ = "0.1.0" +__email__ = "gawlowicz@tkn.tu-berlin.de" + + +port = 5555 +env = ns3env.Ns3Env(port=port, startSim=False) +env.reset() + +ob_space = env.observation_space +ac_space = env.action_space +print("Observation space: ", ob_space, ob_space.dtype) +print("Action space: ", ac_space, ac_space.dtype) + + +stepIdx = 0 +currIt = 0 +iterationNum = 3 + +try: + while True: + obs = env.reset() + print("Step: ", stepIdx) + print("---obs: ", obs) + + while True: + stepIdx += 1 + action = env.action_space.sample() + print("---action: ", action) + + print("Step: ", stepIdx) + obs, reward, done, info = env.step(action) + print("---obs, reward, done, info: ", obs, reward, done, info) + + input("press enter....") + + if done: + break + + currIt += 1 + if currIt == iterationNum: + break + + +except KeyboardInterrupt: + print("Ctrl-C -> Exit") +finally: + env.close() + print("Done") \ No newline at end of file diff --git a/scratch/multi-agent/agent2.py b/scratch/multi-agent/agent2.py new file mode 100755 index 0000000000..42c184234d --- /dev/null +++ b/scratch/multi-agent/agent2.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import argparse +from ns3gym import ns3env + +__author__ = "Piotr Gawlowicz" +__copyright__ = "Copyright (c) 2020, Technische Universität Berlin" +__version__ = "0.1.0" +__email__ = "gawlowicz@tkn.tu-berlin.de" + + +port = 5556 +env = ns3env.Ns3Env(port=port, startSim=False) +env.reset() + +ob_space = env.observation_space +ac_space = env.action_space +print("Observation space: ", ob_space, ob_space.dtype) +print("Action space: ", ac_space, ac_space.dtype) + + +stepIdx = 0 +currIt = 0 +iterationNum = 3 + +try: + while True: + obs = env.reset() + print("Step: ", stepIdx) + print("---obs: ", obs) + + while True: + stepIdx += 1 + action = env.action_space.sample() + print("---action: ", action) + + print("Step: ", stepIdx) + obs, reward, done, info = env.step(action) + print("---obs, reward, done, info: ", obs, reward, done, info) + + input("press enter....") + + if done: + break + + currIt += 1 + if currIt == iterationNum: + break + + +except KeyboardInterrupt: + print("Ctrl-C -> Exit") +finally: + env.close() + print("Done") \ No newline at end of file diff --git a/scratch/multi-agent/mygym.cc b/scratch/multi-agent/mygym.cc new file mode 100644 index 0000000000..d618b5f504 --- /dev/null +++ b/scratch/multi-agent/mygym.cc @@ -0,0 +1,223 @@ +/* -*- Mode: C++; c-file-style: "gnu"; indent-tabs-mode:nil; -*- */ +/* + * Copyright (c) 2018 Technische Universität Berlin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Piotr Gawlowicz + */ + +#include "mygym.h" +#include "ns3/object.h" +#include "ns3/core-module.h" +#include "ns3/wifi-module.h" +#include "ns3/node-list.h" +#include "ns3/log.h" +#include +#include + +namespace ns3 { + +NS_LOG_COMPONENT_DEFINE ("MyGymEnv"); + +NS_OBJECT_ENSURE_REGISTERED (MyGymEnv); + +MyGymEnv::MyGymEnv () +{ + NS_LOG_FUNCTION (this); + m_interval = Seconds(0.1); + + Simulator::Schedule (Seconds(0.0), &MyGymEnv::ScheduleNextStateRead, this); +} + +MyGymEnv::MyGymEnv (uint32_t agentId, Time stepTime) +{ + NS_LOG_FUNCTION (this); + m_agentId = agentId; + m_interval = stepTime; + + Simulator::Schedule (Seconds(0.0), &MyGymEnv::ScheduleNextStateRead, this); +} + +void +MyGymEnv::ScheduleNextStateRead () +{ + NS_LOG_FUNCTION (this); + Simulator::Schedule (m_interval, &MyGymEnv::ScheduleNextStateRead, this); + Notify(); +} + +MyGymEnv::~MyGymEnv () +{ + NS_LOG_FUNCTION (this); +} + +TypeId +MyGymEnv::GetTypeId (void) +{ + static TypeId tid = TypeId ("MyGymEnv") + .SetParent () + .SetGroupName ("OpenGym") + .AddConstructor () + ; + return tid; +} + +void +MyGymEnv::DoDispose () +{ + NS_LOG_FUNCTION (this); +} + +/* +Define observation space +*/ +Ptr +MyGymEnv::GetObservationSpace() +{ + uint32_t nodeNum = 5; + float low = 0.0; + float high = 10.0; + std::vector shape = {nodeNum,}; + std::string dtype = TypeNameGet (); + + Ptr discrete = CreateObject (nodeNum); + Ptr box = CreateObject (low, high, shape, dtype); + + Ptr space = CreateObject (); + space->Add("box", box); + space->Add("discrete", discrete); + + NS_LOG_UNCOND ("AgentID: " << m_agentId << " MyGetObservationSpace: " << space); + return space; +} + +/* +Define action space +*/ +Ptr +MyGymEnv::GetActionSpace() +{ + uint32_t nodeNum = 5; + float low = 0.0; + float high = 10.0; + std::vector shape = {nodeNum,}; + std::string dtype = TypeNameGet (); + + Ptr discrete = CreateObject (nodeNum); + Ptr box = CreateObject (low, high, shape, dtype); + + Ptr space = CreateObject (); + space->Add("box", box); + space->Add("discrete", discrete); + + NS_LOG_UNCOND ("AgentID: " << m_agentId << " MyGetActionSpace: " << space); + return space; +} + +/* +Define game over condition +*/ +bool +MyGymEnv::GetGameOver() +{ + bool isGameOver = false; + bool test = false; + static float stepCounter = 0.0; + stepCounter += 1; + if (stepCounter == 10 && test) { + isGameOver = true; + } + NS_LOG_UNCOND ("AgentID: " << m_agentId << " MyGetGameOver: " << isGameOver); + return isGameOver; +} + +/* +Collect observations +*/ +Ptr +MyGymEnv::GetObservation() +{ + uint32_t nodeNum = 5; + uint32_t low = 0.0; + uint32_t high = 10.0; + Ptr rngInt = CreateObject (); + + std::vector shape = {nodeNum,}; + Ptr > box = CreateObject >(shape); + + // generate random data + for (uint32_t i = 0; iGetInteger(low, high); + box->AddValue(value); + } + + Ptr discrete = CreateObject(nodeNum); + uint32_t value = rngInt->GetInteger(low, high); + discrete->SetValue(value); + + Ptr data = CreateObject (); + data->Add(box); + data->Add(discrete); + + // Print data from tuple + Ptr > mbox = DynamicCast >(data->Get(0)); + Ptr mdiscrete = DynamicCast(data->Get(1)); + NS_LOG_UNCOND ("AgentID: " << m_agentId << " MyGetObservation: " << data); + NS_LOG_UNCOND ("---" << mbox); + NS_LOG_UNCOND ("---" << mdiscrete); + + return data; +} + +/* +Define reward function +*/ +float +MyGymEnv::GetReward() +{ + static float reward = 0.0; + reward += 1; + return reward; +} + +/* +Define extra info. Optional +*/ +std::string +MyGymEnv::GetExtraInfo() +{ + std::string myInfo = "testInfo"; + myInfo += "|123"; + NS_LOG_UNCOND("AgentID: " << m_agentId << " MyGetExtraInfo: " << myInfo); + return myInfo; +} + +/* +Execute received actions +*/ +bool +MyGymEnv::ExecuteActions(Ptr action) +{ + Ptr dict = DynamicCast(action); + Ptr > box = DynamicCast >(dict->Get("box")); + Ptr discrete = DynamicCast(dict->Get("discrete")); + + NS_LOG_UNCOND ("AgentID: " << m_agentId << " MyExecuteActions: " << action); + NS_LOG_UNCOND ("---" << box); + NS_LOG_UNCOND ("---" << discrete); + return true; +} + +} // ns3 namespace \ No newline at end of file diff --git a/scratch/multi-agent/mygym.h b/scratch/multi-agent/mygym.h new file mode 100644 index 0000000000..c24c77d126 --- /dev/null +++ b/scratch/multi-agent/mygym.h @@ -0,0 +1,57 @@ +/* -*- Mode: C++; c-file-style: "gnu"; indent-tabs-mode:nil; -*- */ +/* + * Copyright (c) 2018 Technische Universität Berlin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Piotr Gawlowicz + */ + + +#ifndef MY_GYM_ENTITY_H +#define MY_GYM_ENTITY_H + +#include "ns3/opengym-module.h" +#include "ns3/nstime.h" + +namespace ns3 { + +class MyGymEnv : public OpenGymEnv +{ +public: + MyGymEnv (); + MyGymEnv (uint32_t agentId, Time stepTime); + virtual ~MyGymEnv (); + static TypeId GetTypeId (void); + virtual void DoDispose (); + + Ptr GetActionSpace(); + Ptr GetObservationSpace(); + bool GetGameOver(); + Ptr GetObservation(); + float GetReward(); + std::string GetExtraInfo(); + bool ExecuteActions(Ptr action); + +private: + void ScheduleNextStateRead(); + + uint32_t m_agentId; + Time m_interval; +}; + +} + + +#endif // MY_GYM_ENTITY_H diff --git a/scratch/multi-agent/readme.md b/scratch/multi-agent/readme.md new file mode 100644 index 0000000000..a23e000f98 --- /dev/null +++ b/scratch/multi-agent/readme.md @@ -0,0 +1,22 @@ +multi-agent example +=================== + +This example shows how to create an ns3-gym environment with multiple agents and connects them to multiple independent Python processes. +Note that for each agent an independent ns3-gym gateway is created. +Each gateway binds its socket on different port number. +Here, agent 1 communicates over port number 5555, while agent 2 uses port number 5556. + +In order to run the example: + +``` +# Terminal 1 +./waf --run "multi-agent" + +# Terminal 2 +cd ./scratch/multi-agent +./agent1.py + +# Terminal 3 +cd ./scratch/multi-agent +./agent2.py +``` \ No newline at end of file diff --git a/scratch/multi-agent/sim.cc b/scratch/multi-agent/sim.cc new file mode 100644 index 0000000000..9749cabc72 --- /dev/null +++ b/scratch/multi-agent/sim.cc @@ -0,0 +1,83 @@ +/* -*- Mode: C++; c-file-style: "gnu"; indent-tabs-mode:nil; -*- */ +/* + * Copyright (c) 2018 Piotr Gawlowicz + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Piotr Gawlowicz + * + */ + +#include "ns3/core-module.h" +#include "ns3/opengym-module.h" +#include "mygym.h" + +using namespace ns3; + +NS_LOG_COMPONENT_DEFINE ("OpenGym"); + +int +main (int argc, char *argv[]) +{ + // Parameters of the scenario + uint32_t simSeed = 1; + double simulationTime = 1; //seconds + double envStepTime = 0.1; //seconds, ns3gym env step time interval + uint32_t openGymPort = 5555; + uint32_t testArg = 0; + + CommandLine cmd; + // required parameters for OpenGym interface + cmd.AddValue ("openGymPort", "Port number for OpenGym env. Default: 5555", openGymPort); + cmd.AddValue ("simSeed", "Seed for random generator. Default: 1", simSeed); + // optional parameters + cmd.AddValue ("simTime", "Simulation time in seconds. Default: 10s", simulationTime); + cmd.AddValue ("stepTime", "Gym Env step time in seconds. Default: 0.1s", envStepTime); + cmd.AddValue ("testArg", "Extra simulation argument. Default: 0", testArg); + cmd.Parse (argc, argv); + + NS_LOG_UNCOND("Ns3Env parameters:"); + NS_LOG_UNCOND("--simulationTime: " << simulationTime); + NS_LOG_UNCOND("--openGymPort: " << openGymPort); + NS_LOG_UNCOND("--envStepTime: " << envStepTime); + NS_LOG_UNCOND("--seed: " << simSeed); + NS_LOG_UNCOND("--testArg: " << testArg); + + RngSeedManager::SetSeed (1); + RngSeedManager::SetRun (simSeed); + + // OpenGym Env for agent 1 + uint32_t agentId = 1; + openGymPort = 5555; + Ptr openGymInterface1 = CreateObject (openGymPort); + Ptr myGymEnv1 = CreateObject (agentId, Seconds(envStepTime)); + myGymEnv1->SetOpenGymInterface(openGymInterface1); + + // OpenGym Env for agent 2 + agentId = 2; + openGymPort = 5556; + Ptr openGymInterface2 = CreateObject (openGymPort); + Ptr myGymEnv2 = CreateObject (agentId, Seconds(envStepTime)); + myGymEnv2->SetOpenGymInterface(openGymInterface2); + + NS_LOG_UNCOND ("Simulation start"); + Simulator::Stop (Seconds (simulationTime)); + Simulator::Run (); + NS_LOG_UNCOND ("Simulation stop"); + + openGymInterface1->NotifySimulationEnd(); + openGymInterface2->NotifySimulationEnd(); + Simulator::Destroy (); + +}