New v0.3 configuration. Missing config validation

It works somehow with validation disabled,validation is partially implemented but not yet working properly. Examples have been partially updated but docs not yet.
cavenditti · May 14, 2022 · 41542dc · 41542dc
1 parent 26c8cdd
commit 41542dc
Show file tree

Hide file tree

Showing 8 changed files with 340 additions and 245 deletions.
diff --git a/docs/roadmap.md b/docs/roadmap.md
@@ -22,7 +22,9 @@ $pipeline:  # pipeline name or "+all"
 		  source: $input
 		  $args
 		  expose:
-			  - $data: $alias
+			  - use: $data
+				as: $alias
+				$args
 
 		- $input: $input_name # input_name is optional
 		  $args
@@ -69,7 +71,7 @@ $pipeline:  # pipeline name or "+all"
 	# required
 	steps:
 		- do: $step
-		  input:
+		  inputs:
 			- from: $input_name
 			  use: $data
 			  as: $alias
@@ -79,3 +81,49 @@ $pipeline:  # pipeline name or "+all"
 One might also thing about a kind of hybrid approach: allowing to specify aliases for inputs
 inside the definition, that are automatically used inside the jobs, while also allowing to also just
 define them inside steps.
+
+Enclosing all parameters in a `with` field
+
+``` yaml title="alt-alt pipelines.yml specification"
+$pipeline:  # pipeline name or "+all"
+	# optional
+	inputs:
+		- name: $input_name # name is optional
+		  from: $input
+		  with:
+			  $args
+		  expose:
+			  - use: $data
+				as: $alias
+				with:
+					$args
+
+	# optional
+	outputs:
+		- name: $output_name # name is optional
+		  to: $output
+		  with:
+			  $args
+
+	# optional
+	hooks:
+		- run: $hook_func
+		  on: $hook_event
+		  with:
+			  $args
+
+	# required
+	steps:
+		- do: $step
+		  with:
+			  $args
+		  inputs:
+			- from: $input_name
+			  use: $data
+			  with:
+				  $args
+			  as: $alias
+		  after: $other_steps
+```
+
+This feels more consistent.
diff --git a/example/pipelines.yml b/example/pipelines.yml
@@ -1,68 +1,86 @@
 #
 # This is an example yapp-pipelines configuration file
-# You can also look at pipelines_annotated.yml for an extensively annotated version of this file
+# You can also look at pipelines_annotated.yml
+# for an extensively annotated version of this file
 #
 
 +all:
   hooks:
-    on_job_start:
-      - common.UselessHook.whatever
+    - run: common.UselessHook.whatever
+      on: job_start
+
 
 simple_task:
   inputs:
-    - file.CsvInput:
+    - from: file.CsvInput
+      with:
         directory: "./data"
-    - pandas.read_csv
-
-  expose:
-    - pandas.read_csv:
-      - "./data/a_csv_file.csv": example_csv
+    - from: pandas.read_csv
+      expose:
+        - use: "./data/a_csv_file.csv"
+          as: example_csv
 
   hooks:
-    on_job_start:
-      - common.UselessHook.whatever
-    on_pipeline_start:
-      - common.UselessHook.whatever
+    - run: common.UselessHook.whatever
+      on: job_start
+    - run: common.UselessHook.whatever
+      on: pipeline_start
 
   steps:
-    - common.PreProcessor
-    - MainTask: common.PreProcessor
-    - Later.print_something: MainTask
+    - run: common.PreProcessor
+    - run: MainTask
+      after: common.PreProcessor
+    - run: Later.print_something
+      after: MainTask
 
 
-sligthly_more_complex :
+sligthly_more_complex:
   inputs:
-    - pgsql.PgSqlInput:
+    - name: pg_input
+      from: pgsql.PgSqlInput
+      with:
         username: carlo
         password:
         host: localhost
         port: 5432
         database: postgres
-    - utils.DummyInput
-    - file.CsvInput:
+      expose:
+        - use: dataset
+          as: dataset
+    - from: utils.DummyInput
+      expose:
+        - use: "whatever"
+          as: anempytdataframe
+    - from: file.CsvInput
+      expose:
+        - use: "a_csv_file.csv"
+          as: example_csv
+      with:
         directory: "./data"
 
   outputs:
-    - pgsql.PgSqlOutput:
+    - to: pgsql.PgSqlOutput
+      with:
         username: carlo
         password: !env PG_PWD
         host: localhost
         port: 5432
         database: postgres
 
-  expose:
-    - PgSqlInput:
-      - dataset: dataset
-    - file.CsvInput:
-      - "a_csv_file.csv": example_csv
-    - utils.DummyInput:
-      - "whatever" : anempytdataframe
-
   steps:
-    - common.PreProcessor
-    - PredictorA: common.PreProcessor
-    - PredictorB: common.PreProcessor
-    - PredictorC: common.PreProcessor
-    - Ensemble: [PredictorA, PredictorB, PredictorC]
-    - common.PostProcessor: Ensemble
+    - run: common.PreProcessor
+
+    - run: PredictorA
+      after: common.PreProcessor
+
+    - run: PredictorB
+      after: common.PreProcessor
+
+    - run: PredictorC
+      after: common.PreProcessor
+
+    - run: Ensemble
+      after: [PredictorA, PredictorB, PredictorC]
 
+    - run: common.PostProcessor
+      after: Ensemble
diff --git a/src/yapp/cli/__init__.py b/src/yapp/cli/__init__.py
@@ -65,6 +65,16 @@ def main():
         help="Print colored output for logs",
     )
 
+    parser.add_argument(
+        "-S",
+        "--skip-validation",
+        action="store_const",
+        dest="skip_validation",
+        const=True,
+        default=False,
+        help="Skip configuration validation, used for test purposes",
+    )
+
     parser.add_argument("pipeline", type=str, help="Pipeline name")
 
     args = parser.parse_args()
@@ -80,7 +90,7 @@ def main():
 
     # Read configuration and create a new pipeline
     try:
-        pipeline = config_parser.parse()
+        pipeline = config_parser.parse(skip_validation=args.skip_validation)
     except YappFatalError as error:
         error.log_and_exit()
     except Exception as error:  # pylint: disable=broad-except

diff --git a/src/yapp/cli/logs.py b/src/yapp/cli/logs.py
@@ -133,14 +133,15 @@ def format(self, record):
         msg = str(record.msg) % record.args
 
         if record.exc_info:
-            msg = "> " + self.formatException(record.exc_info)
+            msg = ">> " + self.formatException(record.exc_info)
 
+        # TODO use loglevel
         # This could be done using levelno instead of logging into the message but that way there
         # would be no highlight in the file output which has no color output
-        if msg.startswith("> "):
+        if msg.startswith(">> ") or record.levelno >= logging.WARNING:
             head = self.get_color(record.levelno) + head
-            # msg = self.get_color(logging.DEBUG) + msg[2:] + self.get_color()
-            msg = msg[2:] + self.get_color()
+        elif msg.startswith("> "):
+            msg = self.get_color(logging.DEBUG) + msg[2:] + self.get_color()
         else:
             head = self.get_color(record.levelno) + head + self.get_color()