From 59ee035963bccecffd00aac0e212791b2b969c0b Mon Sep 17 00:00:00 2001 From: Kyle Bowman Date: Mon, 20 Jan 2025 14:21:37 -0500 Subject: [PATCH] add set-like operations to nomlist --- src/nom/base.py | 41 ++++++++++--- src/nom/entry.py | 27 +++------ src/nom/feed.py | 7 +++ tests/data/entry_multi.csv | 3 + tests/data/{entry.csv => entry_single.csv} | 1 - tests/test_entry.py | 70 +++++++++++++++------- 6 files changed, 97 insertions(+), 52 deletions(-) create mode 100644 tests/data/entry_multi.csv rename tests/data/{entry.csv => entry_single.csv} (65%) diff --git a/src/nom/base.py b/src/nom/base.py index 9b0ac0a..9897c9f 100644 --- a/src/nom/base.py +++ b/src/nom/base.py @@ -1,12 +1,17 @@ -from pathlib import Path -from csv import DictReader, DictWriter, excel_tab from abc import abstractmethod -from typing import Sequence +from csv import DictReader, DictWriter, excel_tab +from copy import copy +from pathlib import Path from nom.utils import NomError class NomListItem: + + @abstractmethod + def __hash__(self): + # Needed because line items are contained in sets, not lists. + raise NotImplementedError @classmethod def get_fieldnames(cls): @@ -17,7 +22,7 @@ class NomListItem: return delimiter.join([v for v in self.__dict__.values()]) def to_dict(self): - return self.__dict__ + return vars(self) @classmethod def from_dict(cls, dct: dict): @@ -26,10 +31,27 @@ class NomListItem: class NomList: - # TODO: - def __init__(self, items=[], delimiter: str="|"): + def __init__(self, items=set(), delimiter: str="|"): self.delimiter=delimiter - self.items : list[NomListItem] = items + self.items : set[NomListItem] = items + + def __add__(self, other): + dct = copy(vars(self)) + dct['items'] = self.items.union(other.items) + return self.__class__(**dct) + + def __contains__(self, value): + return value in self.items + + def __eq__(self, other): + return self.items == other.items + + def __len__(self): + return len(self.items) + + def update(self, other): + self.items.update(other.items) + # NOTE: To get the interface that I want (i.e `from_csv(path)`) # each subclass must override from_csv and pass in the constructor @@ -47,15 +69,16 @@ class NomList: with open(file, "r") as f: reader = DictReader(f,dialect=dialect) for row in reader: + # TODO: This should be from_dict() item = constructor(**row) items.append(item) - return cls(items=items, delimiter=delimiter) + return cls(items=set(items), delimiter=delimiter) def to_csv(self, file: Path): if not self.items: raise NomError("There are no entries to write.") - fieldnames=self.items[0].get_fieldnames() + fieldnames=next(iter(self.items)).get_fieldnames() dialect = excel_tab dialect.delimiter=self.delimiter diff --git a/src/nom/entry.py b/src/nom/entry.py index 0597a07..c3eb15d 100644 --- a/src/nom/entry.py +++ b/src/nom/entry.py @@ -11,13 +11,16 @@ from nom.base import NomList, NomListItem class EntryListItem(NomListItem): id_: str title: str - url: Optional[str] = "" + url: str = "" date: Optional[str] = "test" feed_url: Optional[str] = "" feed_alias: Optional[str] = "" viewed: Optional[bool] = "False" summary: Optional[str] = "" # TODO: Add this when you feel like stripping HTML + def __hash__(self): + return hash(self.url) + class EntryList(NomList): @@ -25,21 +28,7 @@ class EntryList(NomList): def from_csv(cls, path: Path): return super().from_csv(path, EntryListItem) - def update_from_feeds(self, feedlist): - pass - - def update_from_feed(self, feed): - pass - - def from_stdout(self): - pass - - -if __name__ == "__main__": - dct = dict(id_="1", title="Entry One", url="https://path/to/entry1.html") - path=Path("/home/kyle/projects/nom/tests/data/entry.csv") - #elist = EntryList() - #entry = EntryListItem.from_dict(dct) - #elist.add(entry) - #elist.to_csv(path) - elist=EntryList.from_csv(path) + # I think I like from_feed/update in place of update_from_feeds + @classmethod + def from_feed(cls, feed): + pass \ No newline at end of file diff --git a/src/nom/feed.py b/src/nom/feed.py index 1baa8d3..ba3e70b 100644 --- a/src/nom/feed.py +++ b/src/nom/feed.py @@ -36,6 +36,13 @@ class FeedListItem(NomListItem): url: str alias: str + # Where do fetch/parse belong? + def fetch(self, url): + pass + + def parse(self, url)->list[EntryListItem]: + pass + class FeedList(NomList): diff --git a/tests/data/entry_multi.csv b/tests/data/entry_multi.csv new file mode 100644 index 0000000..cfa5b87 --- /dev/null +++ b/tests/data/entry_multi.csv @@ -0,0 +1,3 @@ +id_|title|url|date|feed_url|feed_alias|viewed|summary +2|Entry One|https://path/to/entry2.html|test|||False| +3|Entry Two|https://path/to/entry3.html|test|||True| diff --git a/tests/data/entry.csv b/tests/data/entry_single.csv similarity index 65% rename from tests/data/entry.csv rename to tests/data/entry_single.csv index 92966eb..c3d8f78 100644 --- a/tests/data/entry.csv +++ b/tests/data/entry_single.csv @@ -1,3 +1,2 @@ id_|title|url|date|feed_url|feed_alias|viewed|summary 1|Entry One|https://path/to/entry1.html|test|||False| -2|Entry Two|https://path/to/entry2.html|test|||False| diff --git a/tests/test_entry.py b/tests/test_entry.py index 679d19b..e5cc8d2 100644 --- a/tests/test_entry.py +++ b/tests/test_entry.py @@ -1,25 +1,49 @@ from tempfile import NamedTemporaryFile +from pathlib import Path +from copy import copy +import pytest + +from nom.entry import EntryList, EntryListItem + + +@pytest.fixture +def elist_single(): + path = Path(__file__).parent / "data" / "entry_single.csv" + return EntryList.from_csv(path) + +@pytest.fixture +def elist_multi(): + path = Path(__file__).parent / "data" / "entry_multi.csv" + return EntryList.from_csv(path) + +@pytest.fixture +def elist_item(elist_single): + return next(iter(elist_single.items)) + + +def test_elist_constructors(elist_single): + assert elist_single is not None + +def test_elist_to_from_csv_idempotency(elist_single,tmp_path): + path = tmp_path / "delete-me.csv" + elist_single.to_csv(path) + remade = EntryList.from_csv(path) + assert remade == elist_single + +def test_from_feed(): + pass + +def test_eli_to_from_dict_idempotency(elist_item): + remade = elist_item.from_dict(elist_item.to_dict()) + assert remade == elist_item + +def test_elist_update(elist_multi, elist_single): + original_length = len(elist_multi) + elist_multi.update(elist_single) + assert len(elist_multi) == original_length + 1 + +def test_elist_addition(elist_multi, elist_single): + sum_ = elist_multi + elist_single + assert len(sum_) == len(elist_multi) + len(elist_single) + assert isinstance(sum_,EntryList) -from nom.entry import EntryListItem - -e1=EntryListItem( - id_="1", - title="Entry One", - url="https://path/to/entry1.html", - date="dummy-date", - feed_url="https://path/to/feed1.xml", - feed_alias=None, - viewed=False, - summary="Summary of entry one." -) - - -def test_eli_constructors(): - assert EntryListItem.from_dict(e1.to_dict()) == e1 - - #with NamedTemporaryFile(delete_on_close=False) as tmp_file: - # to_file(tmp_file.name, data) - # tmp_file.close() - # with open(tmp_file.name, 'r') as f: - # reader = csv.DictReader(f, delimiter='|') - # rows = list(reader) \ No newline at end of file -- 2.39.5