Skip to content

Commit

Permalink
source: csv: Enable setting repo src_url
Browse files Browse the repository at this point in the history
  • Loading branch information
sudharsana-kjl authored and pdxjohnny committed Jun 14, 2019
1 parent 418c826 commit 0972929
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
config for them.
- shouldi example uses updated `MemoryOrchestrator.basic_config` method and
includes more explanation in comments.
- CSVSource allows for setting the Repo's `src_url` from a csv column
### Fixed
- Docs get version from dffml.version.VERSION.

Expand Down
9 changes: 6 additions & 3 deletions dffml/source/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ async def load_fd(self, fd):
repo_data["features"][key] = ast.literal_eval(value)
except (SyntaxError, ValueError):
repo_data["features"][key] = value
if self.config.key is not None and self.config.key == key:
src_url = value
if self.config.key is None:
src_url = str(i)
i += 1
# Correct types and structure of repo data from csv_meta
if "classification" in csv_meta:
repo_data.update(
Expand All @@ -59,9 +64,7 @@ async def load_fd(self, fd):
}
}
)
# Create the repo with the source URL being the row index
repo = Repo(str(i), data=repo_data)
i += 1
repo = Repo(src_url, data=repo_data)
self.mem[repo.src_url] = repo
self.logger.debug("%r loaded %d records", self, len(self.mem))

Expand Down
3 changes: 3 additions & 0 deletions dffml/source/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

class FileSourceConfig(BaseConfig, NamedTuple):
filename: str
key: str = None
readonly: bool = False


Expand Down Expand Up @@ -114,11 +115,13 @@ def args(cls, args, *above) -> Dict[str, Arg]:
"readonly",
Arg(type=bool, action="store_true", default=False),
)
cls.config_set(args, above, "key", Arg(type=str, default=None))
return args

@classmethod
def config(cls, config, *above):
return FileSourceConfig(
filename=cls.config_get(config, above, "filename"),
readonly=cls.config_get(config, above, "readonly"),
key=cls.config_get(config, above, "key"),
)
22 changes: 22 additions & 0 deletions tests/source/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,30 @@
from dffml.source.csv import CSVSource
from dffml.util.testing.source import SourceTest
from dffml.util.asynctestcase import AsyncTestCase
from dffml.repo import Repo
import tempfile


class TestCSVSource(SourceTest, AsyncTestCase):
async def setUpSource(self, fileobj):
return CSVSource(FileSourceConfig(filename=fileobj.name))


class CSVTest(SourceTest, AsyncTestCase):
async def setUpSource(self, fileobj):
return CSVSource(FileSourceConfig(filename=fileobj.name))

async def test_key(self):
with tempfile.NamedTemporaryFile() as fileobj:
fileobj.write(b"KeyHeader,ValueColumn\n")
fileobj.write(b"a,42\n")
fileobj.write(b"b,420\n")
fileobj.seek(0)
async with CSVSource(
FileSourceConfig(filename=fileobj.name, key="KeyHeader")
) as source:
async with source() as sctx:
repo_a = await sctx.repo("a")
repo_b = await sctx.repo("b")
self.assertEqual(repo_a.data.features["ValueColumn"], 42)
self.assertEqual(repo_b.data.features["ValueColumn"], 420)
16 changes: 13 additions & 3 deletions tests/source/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ def test_args(self):
),
"config": {},
},
"key": {
"arg": Arg(type=str, default=None),
"config": {},
},
},
}
},
Expand All @@ -82,19 +86,25 @@ def test_config_readonly_default(self):
parse_unknown("--source-file-filename", "feedface")
)
self.assertEqual(config.filename, "feedface")
self.assertEqual(config.key, None)
self.assertFalse(config.readonly)

def test_config_readonly_set(self):
config = FileSource.config(
parse_unknown(
"--source-file-filename", "feedface", "--source-file-readonly"
"--source-file-filename",
"feedface",
"--source-file-key",
"default-key",
"--source-file-readonly",
)
)
self.assertEqual(config.filename, "feedface")
self.assertEqual(config.key, "default-key")
self.assertTrue(config.readonly)

def config(self, filename, readonly=False):
return FileSourceConfig(filename=filename, readonly=readonly)
def config(self, filename, key=None, readonly=False):
return FileSourceConfig(filename=filename, readonly=readonly, key=key)

async def test_open(self):
m_open = mock_open()
Expand Down

0 comments on commit 0972929

Please sign in to comment.