add share option

Signed-off-by: Yi Dong <[email protected]>
titu1994 · Oct 5, 2022 · 125e499 · 125e499
1 parent a5c8339
commit 125e499
Show file tree

Hide file tree

Showing 5 changed files with 6 additions and 4 deletions.
diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml
@@ -31,5 +31,6 @@ prompts: # prompts for GPT inference
 server: False  # whether launch the API server
 port: 5555 # the port number for the inference server
 web_server: False # whether launch the web inference server
+share: False  # whether create a public URL
 username: test # user name for web client
 password: test2  # password for web client
diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_universal_prompt_learning_inference.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_universal_prompt_learning_inference.yaml
@@ -28,5 +28,6 @@ batch_size: 16
 server: False
 port: 5555
 web_server: False # whether launch the web inference server
+share: False  # whether create a public URL
 username: test # user name for web client
 password: test2  # password for web client
diff --git a/examples/nlp/language_modeling/megatron_gpt_eval.py b/examples/nlp/language_modeling/megatron_gpt_eval.py
@@ -233,7 +233,7 @@ def main(cfg) -> None:
     if cfg.server:
         if parallel_state.is_pipeline_first_stage() and parallel_state.get_tensor_model_parallel_rank() == 0:
             if cfg.web_server:
-                thread = threading.Thread(target=get_demo, daemon=True, args=(cfg.username, cfg.password))
+                thread = threading.Thread(target=get_demo, daemon=True, args=(cfg.share, cfg.username, cfg.password))
                 thread.start()
             server = MegatronServer(model.cuda())
             server.run("0.0.0.0", port=cfg.port)

diff --git a/examples/nlp/language_modeling/megatron_gpt_universal_prompt_learning_eval.py b/examples/nlp/language_modeling/megatron_gpt_universal_prompt_learning_eval.py
@@ -119,7 +119,7 @@ def placeholder():
     if cfg.server:
         if parallel_state.is_pipeline_first_stage() and parallel_state.get_tensor_model_parallel_rank() == 0:
             if cfg.web_server:
-                thread = threading.Thread(target=get_demo, daemon=True, args=(cfg.username, cfg.password))
+                thread = threading.Thread(target=get_demo, daemon=True, args=(cfg.share, cfg.username, cfg.password))
                 thread.start()
             server = MegatronServer(model.cuda())
             server.run("0.0.0.0", port=cfg.port)

diff --git a/nemo/collections/nlp/modules/common/megatron_web_server.py b/nemo/collections/nlp/modules/common/megatron_web_server.py
@@ -45,7 +45,7 @@ def get_generation(prompt, greedy, add_BOS, token_to_gen, min_tokens, temp, top_
     return sentences[0]
 
 
-def get_demo(username, password):
+def get_demo(share, username, password):
     with gr.Blocks() as demo:
         with gr.Row():
             with gr.Column(scale=2, width=200):
@@ -62,4 +62,4 @@ def get_demo(username, password):
                 output_box = gr.Textbox(value="", label="Output")
                 btn = gr.Button(value="Submit")
                 btn.click(get_generation, inputs=[input_prompt, greedy_flag, add_BOS, token_to_gen, min_token_to_gen, temperature, top_p, top_k, repetition_penality], outputs=[output_box])
-    demo.launch(share=True, server_port=13570, server_name='0.0.0.0', auth=(username, password))
+    demo.launch(share=share, server_port=13570, server_name='0.0.0.0', auth=(username, password))