Adding to_dataframe method for multidim measurements. (google#702)

* Adding to_dataframe method to Measurement and DimensionedMeasuredValue to conver to a pandas.DataFrame' * adding pandas as a dependency for unit tests * added an example for multidim measurements including conversion to dataframes * cleaning up formatting in example/measurements.py * making minor changes suggested by code review * adding nice printout to examples/measurement.py * adding test case of pandas not present * patch measurements.pandas for existing test cases
toddwalstad · Jan 26, 2018 · 567afe0 · 567afe0
1 parent bce5b3b
commit 567afe0
Show file tree

Hide file tree

Showing 4 changed files with 122 additions and 2 deletions.
diff --git a/examples/measurements.py b/examples/measurements.py
@@ -47,6 +47,8 @@
 # Import openhtf with an abbreviated name, as we'll be using a bunch of stuff
 # from it throughout our test scripts. See __all__ at the top of
 # openhtf/__init__.py for details on what's in top-of-module namespace.
+import random
+
 import openhtf as htf
 
 # Import this output mechanism as it's the specific one we want to use.
@@ -118,10 +120,46 @@ def inline_phase(test):
   test.logger.info('Set inline_kwargs to a failing value, test should FAIL!')
 
 
+# A multidim measurement including how to convert to a pandas dataframe and
+# a numpy array.
+@htf.measures(htf.Measurement('power_time_series')
+              .with_dimensions('ms', 'V', 'A'))
+@htf.measures(htf.Measurement('average_voltage').with_units('V'))
+@htf.measures(htf.Measurement('average_current').with_units('A'))
+@htf.measures(htf.Measurement('resistance').with_units('ohm').in_range(9, 11))
+def multdim_measurements(test):
+  # Create some fake current and voltage over time data
+  for t in range(10):
+    resistance = 10
+    voltage = 10 + 10.0*t
+    current = voltage/resistance + .01*random.random()
+    dimensions = (t, voltage, current)
+    test.measurements['power_time_series'][dimensions] = 0
+
+  # When accessing your multi-dim measurement a DimensionedMeasuredValue
+  # is returned.
+  dim_measured_value = test.measurements['power_time_series']
+
+  # Let's convert that to a pandas dataframe
+  power_df = dim_measured_value.to_dataframe(columns=['ms', 'V', 'A', 'n/a'])
+  test.logger.info('This is what a dataframe looks like:\n%s', power_df)
+  test.measurements['average_voltage'] = power_df['V'].mean()
+
+  # We can convert the dataframe to a numpy array as well
+  power_array = power_df.as_matrix()
+  test.logger.info('This is the same data in a numpy array:\n%s', power_array)
+  test.measurements['average_current'] = power_array.mean(axis=0)[2]
+
+  # Finally, let's estimate the resistance
+  test.measurements['resistance'] = (
+      test.measurements['average_voltage'] /
+      test.measurements['average_current'])
+
+
 if __name__ == '__main__':
   # We instantiate our OpenHTF test with the phases we want to run as args.
   test = htf.Test(hello_phase, again_phase, lots_of_measurements,
-                  measure_seconds, inline_phase)
+                  measure_seconds, inline_phase, multdim_measurements)
 
   # In order to view the result of the test, we have to output it somewhere,
   # and a local JSON file is a convenient way to do this.  Custom output

diff --git a/openhtf/core/measurements.py b/openhtf/core/measurements.py
@@ -73,6 +73,11 @@ def WidgetTestPhase(test):
 from openhtf.util import validators
 from openhtf.util import units
 
+try:
+  import pandas
+except ImportError:
+  pandas = None
+
 _LOG = logging.getLogger(__name__)
 
 
@@ -194,7 +199,7 @@ def _maybe_make_dimension(self, dimension):
     if isinstance(dimension, units.UnitDescriptor):
       return Dimension.from_unit_descriptor(dimension)
     if isinstance(dimension, str):
-      return Dimension.from_string(string)
+      return Dimension.from_string(dimension)
 
     raise TypeError('Cannot convert %s to a dimension', dimension)
 
@@ -267,6 +272,21 @@ def _asdict(self):
         retval[attr] = getattr(self, attr)
     return retval
 
+  def to_dataframe(self, columns=None):
+    """Convert a multi-dim to a pandas dataframe."""
+    if not isinstance(self.measured_value, DimensionedMeasuredValue):
+      raise TypeError(
+        'Only a dimensioned measurement can be converted to a DataFrame')
+
+
+    if columns is None:
+      columns = [d.name for d in self.dimensions]
+      columns += [self.units.name if self.units else 'value']
+
+    dataframe = self.measured_value.to_dataframe(columns)
+
+    return dataframe
+
 
 class MeasuredValue(
     mutablerecords.Record('MeasuredValue', ['name'],
@@ -430,6 +450,15 @@ def value(self):
     return [dimensions + (value,) for dimensions, value in
             self.value_dict.items()]
 
+  def to_dataframe(self, columns=None):
+    """Converts to a `pandas.DataFrame`"""
+    if not self.is_value_set:
+      raise ValueError('Value must be set before converting to a DataFrame.')
+    if not pandas:
+      raise RuntimeError('Install pandas to convert to pandas.DataFrame')
+    return pandas.DataFrame.from_records(self.value, columns=columns)
+
+
 
 class Collection(mutablerecords.Record('Collection', ['_measurements'])):
   """Encapsulates a collection of measurements.

diff --git a/setup.py b/setup.py
@@ -208,6 +208,7 @@ def run_tests(self):
     ],
     tests_require=[
         'mock>=2.0.0',
+        'pandas>=0.22.0',
         'pytest>=2.9.2',
         'pytest-cov>=2.2.1',
     ],

diff --git a/test/core/measurements_test.py b/test/core/measurements_test.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 # Copyright 2016 Google Inc. All Rights Reserved.
 
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,8 +19,13 @@
 actually care about.
 """
 
+from openhtf.core import measurements
+
+import mock
+
 from examples import all_the_things
 import openhtf as htf
+from openhtf.core.measurements import Outcome
 from openhtf.util import test as htf_test
 
 
@@ -30,6 +36,12 @@
 
 class TestMeasurements(htf_test.TestCase):
 
+  def setUp(self):
+    # Ensure most measurements features work without pandas.
+    pandas_patch = mock.patch.object(measurements, 'pandas', None)
+    pandas_patch.start()
+    self.addCleanup(pandas_patch.stop)
+
   def test_unit_enforcement(self):
     """Creating a measurement with invalid units should raise."""
     self.assertRaises(TypeError, htf.Measurement('bad_units').with_units, 1701)
@@ -78,3 +90,43 @@ def test_measurement_order(self):
     self.assertEqual(list(record.measurements.keys()),
                      ['replaced_min_only', 'replaced_max_only',
                       'replaced_min_max'])
+
+
+class TestMeasurement(htf_test.TestCase):
+
+  @mock.patch.object(measurements, 'pandas', None)
+  def test_to_dataframe__no_pandas(self):
+    with self.assertRaises(RuntimeError):
+      self.test_to_dataframe(units=True)
+
+  def test_to_dataframe(self, units=True):
+    measurement = htf.Measurement('test_multidim')
+    measurement.with_dimensions('ms', 'assembly',
+                                htf.Dimension('my_zone', 'zone'))
+
+    if units:
+      measurement.with_units('°C')
+      measure_column_name = 'degree Celsius'
+    else:
+      measure_column_name = 'value'
+
+    for t in range(5):
+      for assembly in ['A', 'B', 'C']:
+        for zone in range(3):
+          temp = zone + t
+          dims = (t, assembly, zone)
+          measurement.measured_value[dims] = temp
+
+    measurement.outcome = Outcome.PASS
+
+    df = measurement.to_dataframe()
+    coordinates = (1, 'A', 2)
+    query = '(ms == %s) & (assembly == "%s") & (my_zone == %s)' % (
+        coordinates)
+
+    self.assertEqual(
+        measurement.measured_value[coordinates],
+        df.query(query)[measure_column_name].values[0])
+
+  def test_to_dataframe__no_units(self):
+    self.test_to_dataframe(units=False)