diff --git a/src/undate/converters/combined.py b/src/undate/converters/combined.py index 4c7767c..3d07c4a 100644 --- a/src/undate/converters/combined.py +++ b/src/undate/converters/combined.py @@ -1,7 +1,7 @@ """ -**Experimental** combined parser. Supports EDTF, Gregorian, Hebrew, and Hijri -where dates are unambiguous. Year-only dates are parsed as EDTF in -Gregorian calendar. +Combined parser. Supports EDTF, Gregorian, Hebrew, Hijri, and Christian +liturgical dates where dates are unambiguous. Year-only dates are parsed +as EDTF in Gregorian calendar. """ from typing import Union @@ -16,6 +16,7 @@ from undate.converters.calendars.gregorian.transformer import GregorianDateTransformer from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer from undate.converters.calendars.islamic.transformer import IslamicDateTransformer +from undate.converters.holidays import HolidayTransformer class CombinedDateTransformer(Transformer): @@ -35,6 +36,7 @@ def start(self, children): hebrew=HebrewDateTransformer(), islamic=IslamicDateTransformer(), gregorian=GregorianDateTransformer(), + holidays=HolidayTransformer(), ) @@ -47,14 +49,16 @@ def start(self, children): class OmnibusDateConverter(BaseDateConverter): """ Combination parser that aggregates existing parser grammars. - Currently supports EDTF, Gregorian, Hebrew, and Hijri where dates are unambiguous. - (Year-only dates are parsed as EDTF in Gregorian calendar.) + Supports EDTF, Gregorian, Hebrew, Hijri, and Christian liturgical dates + where dates are unambiguous. Year-only dates are parsed as EDTF in + Gregorian calendar. Does not support serialization. Example usage:: - Undate.parse("Tammuz 4816", "omnibus") + Undate.parse("Tammuz 4812", "omnibus") + Undate.parse("Easter 1916", "omnibus") """ diff --git a/src/undate/converters/grammars/combined.lark b/src/undate/converters/grammars/combined.lark index 3f6a568..72fbf97 100644 --- a/src/undate/converters/grammars/combined.lark +++ b/src/undate/converters/grammars/combined.lark @@ -5,7 +5,7 @@ %import .undate_common.DATE_PUNCTUATION %ignore DATE_PUNCTUATION -start: (edtf__start | hebrew__hebrew_date | islamic__islamic_date | gregorian__gregorian_date ) +start: (edtf__start | hebrew__hebrew_date | islamic__islamic_date | gregorian__gregorian_date | holidays__holiday_date) // Renaming of the import variables is required, as they receive the namespace of this file. // See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 @@ -30,6 +30,8 @@ start: (edtf__start | hebrew__hebrew_date | islamic__islamic_date | gregorian__ // gregorian calendar, in multiple languages %import .gregorian.gregorian_date -> gregorian__gregorian_date +// relative import from holidays.lark +%import .holidays.holiday_date -> holidays__holiday_date // override hebrew date to omit year-only, since year without calendar is ambiguous // NOTE: potentially support year with calendar label diff --git a/src/undate/converters/grammars/holidays.lark b/src/undate/converters/grammars/holidays.lark new file mode 100644 index 0000000..f257425 --- /dev/null +++ b/src/undate/converters/grammars/holidays.lark @@ -0,0 +1,37 @@ +%import common.WS +%ignore WS + +%import .undate_common.DATE_PUNCTUATION +%ignore DATE_PUNCTUATION + + +holiday_date: movable_feast year | fixed_date year? + +// holidays that shift depending on the year +movable_feast: EASTER | EASTER_MONDAY | HOLY_SATURDAY | ASCENSION + | PENTECOST | WHIT_MONDAY | TRINITY | ASH_WEDNESDAY | SHROVE_TUESDAY + +// holidays that are always on the same date +fixed_date: EPIPHANY | CANDLEMASS | ST_PATRICKS | ALL_FOOLS | ST_CYPRIANS + +year: /\d{4}/ + +// all patterns use case-insensitive regex + +// Fixed-date holidays +EPIPHANY: /epiphany/i +CANDLEMASS: /candlemass?/i // recognize with both one and 2 s +ST_PATRICKS: /st\.?\s*patrick'?s?\s*day/i +ALL_FOOLS: /(april|all)\s*fools?\s*day/i +ST_CYPRIANS: /st\.?\s*cyprian'?s?\s*day/i + +// Moveable feasts +EASTER: /easter/i +EASTER_MONDAY: /easter\s*monday/i +HOLY_SATURDAY: /holy\s*saturday/i +ASCENSION: /ascension\s*day|ascension/i +PENTECOST: /pentecost/i +WHIT_MONDAY: /whit\s*monday|whitsun\s*monday/i +TRINITY: /trinity\s*sunday|trinity/i +ASH_WEDNESDAY: /ash\s*wednesday/i +SHROVE_TUESDAY: /shrove\s*tuesday/i diff --git a/src/undate/converters/holidays.py b/src/undate/converters/holidays.py new file mode 100644 index 0000000..00ce865 --- /dev/null +++ b/src/undate/converters/holidays.py @@ -0,0 +1,171 @@ +""" +Holiday date Converter: parse Christian liturgical dates and convert to Gregorian. +""" + +import datetime + +from lark import Lark, Transformer, Tree, Token +from lark.exceptions import UnexpectedInput + +from convertdate import holidays +from undate import Undate, Calendar +from undate.converters.base import BaseDateConverter, GRAMMAR_FILE_PATH + +# To add a new holiday: +# 1. Add a name and pattern to holidays.lark grammar file +# 2. Include the in appropriate section (fixed or movable) +# 3. Add an entry to FIXED_HOLIDAYS or MOVEABLE_FEASTS; must match grammar terminal name + + +# holidays that fall on the same date every year +# key must match grammar term; value is tuple of numeric month, day +FIXED_HOLIDAYS = { + "EPIPHANY": (1, 6), # January 6 + "CANDLEMASS": (2, 2), # February 2; 40th day & end of epiphany + "ST_PATRICKS": (3, 17), # March 17 + "ALL_FOOLS": (4, 1), # All / April fools day, April 1 + "ST_CYPRIANS": (9, 16), # St. Cyprian's Feast day: September 16 +} + +# holidays that shift depending on the year; value is days relative to Easter +MOVEABLE_FEASTS = { + "EASTER": 0, # Easter, no offset + "HOLY_SATURDAY": -1, # day before Easter + "EASTER_MONDAY": 1, # day after Easter + "ASCENSION": 39, # fortieth day of Easter + "PENTECOST": 49, # 7 weeks after Easter + "WHIT_MONDAY": 50, # Monday after Pentecost + "TRINITY": 56, # first Sunday after Pentecost + "ASH_WEDNESDAY": -46, # Wednesday of the 7th week before Easter + "SHROVE_TUESDAY": -47, # day before Ash Wednesday +} + + +parser = Lark.open( + str(GRAMMAR_FILE_PATH / "holidays.lark"), rel_to=__file__, start="holiday_date" +) + + +class HolidayTransformer(Transformer): + calendar = Calendar.GREGORIAN + + def year(self, items): + value = "".join([str(i) for i in items]) + return Token("year", value) + # return Tree(data="year", children=[value]) + + def movable_feast(self, items): + # moveable feast day can't be calculated without the year, + # so pass through + return items[0] + + def fixed_date(self, items): + item = items[0] + holiday_name = item.type.split("__")[-1] + # token_type = item.type + # token type is holiday fixed-date name; use to determine month/day + month, day = FIXED_HOLIDAYS.get(holiday_name) + return Tree("fixed_date", [Token("month", month), Token("day", day)]) + # for key in FIXED_HOLIDAYS: + # if token_type == key or token_type == f"holidays__{key}": + # month, day = FIXED_HOLIDAYS[key] + # return Tree("fixed_date", [Token("month", month), Token("day", day)]) + # raise ValueError(f"Unknown fixed holiday: {item.type}") + + def holiday_date(self, items): + parts = self._get_date_parts(items) + return Undate(**parts) + + def _get_date_parts(self, items) -> dict[str, int | str]: + # recursive method to take parsed tokens and trees and generate + # a dictionary of year, month, day for initializing an undate object + # handles nested tree with month/day (for fixed date holidays) + # and includes movable feast logic, after year is determined. + + parts = {} + date_parts = ["year", "month", "day"] + movable_feast = None + for child in items: + field = value = None + # if this is a token, get type and value + if isinstance(child, Token): + # month/day from fixed date holiday + if child.type in date_parts: + field = child.type + value = child.value + # check for movable feast terminal + elif child.type in MOVEABLE_FEASTS: + # collect but don't handle until we know the year + movable_feast = child.type + # handle namespaced token type; happens when called from combined grammar + elif ( + "__" in child.type and child.type.split("__")[-1] in MOVEABLE_FEASTS + ): + # collect but don't handle until we know the year + movable_feast = child.type.split("__")[-1] + + # if a tree, check for type and anonymous token + if isinstance(child, Tree): + # if tree is a date field (i.e., year), get the value + if child.data in date_parts: + field = child.data + # in this case we expect one value; + # convert anonymous token to value + value = child.children[0] + # if tree has children, recurse to get date parts + elif child.children: + parts.update(self._get_date_parts(child.children)) + + # if date fields were found, add to dictionary + if field and value: + # currently all date parts are integer only + parts[str(field)] = int(value) + + # if date is a movable feast, calculate relative to Easter based on the year + if movable_feast is not None: + offset = MOVEABLE_FEASTS[movable_feast] + holiday_date = datetime.date( + *holidays.easter(parts["year"]) + ) + datetime.timedelta(days=offset) + parts.update({"month": holiday_date.month, "day": holiday_date.day}) + + return parts + + +class HolidayDateConverter(BaseDateConverter): + """ + Converter for Christian liturgical dates. + + Supports fixed-date holidays (Epiphany, Candlemass, etc.) and + Easter-relative moveable feasts (Easter, Ash Wednesday, Pentecost, etc.). + + Example usage:: + + Undate.parse("Easter 1942", "holidays") + Undate.parse("Ash Wednesday 1942", "holidays") + Undate.parse("Epiphany", "holidays") + + Does not support serialization. + """ + + name = "holidays" + + def __init__(self): + self.transformer = HolidayTransformer() + + def parse(self, value: str) -> Undate: + if not value: + raise ValueError("Parsing empty string is not supported") + + try: + parsetree = parser.parse(value) + # transform the parse tree into an undate or undate interval + undate_obj = self.transformer.transform(parsetree) + # set the input holiday text as a label on the undate object + undate_obj.label = value + return undate_obj + except UnexpectedInput as err: + raise ValueError(f"Could not parse '{value}' as a holiday date") from err + + def to_string(self, undate: Undate) -> str: + raise ValueError("Holiday converter does not support serialization") diff --git a/src/undate/date.py b/src/undate/date.py index 44f79fa..ee87d30 100644 --- a/src/undate/date.py +++ b/src/undate/date.py @@ -261,7 +261,7 @@ def weekday(self) -> Optional[int]: thursday_week = self.astype("datetime64[W]") days_from_thursday = (self - thursday_week).astype(int) # if monday is 0, thursday is 3 - return (days_from_thursday + 3) % 7 + return int((days_from_thursday + 3) % 7) return None @@ -280,6 +280,18 @@ def __sub__(self, other): # NOTE: add should not be subclassed because we want to return a Date, not a delta +class Weekday(IntEnum): + """Weekday as an integer, compatible with :meth:`datetime.date.weekday`.""" + + MONDAY = 0 + TUESDAY = 1 + WEDNESDAY = 2 + THURSDAY = 3 + FRIDAY = 4 + SATURDAY = 5 + SUNDAY = 6 + + class DatePrecision(IntEnum): """date precision, to indicate date precision independent from how much of the date is known.""" diff --git a/tests/test_converters/test_combined_parser.py b/tests/test_converters/test_combined_parser.py index d6e0621..02a79e6 100644 --- a/tests/test_converters/test_combined_parser.py +++ b/tests/test_converters/test_combined_parser.py @@ -24,6 +24,12 @@ ("13 Jan 1602", Undate(1602, 1, 13, calendar="Gregorian")), ("2022 ugu. 4", Undate(2022, 11, 4, calendar="Gregorian")), ("18 avril", Undate(month=4, day=18, calendar="Gregorian")), + # Christian liturgical dates + ("Easter 1942", Undate(1942, 4, 5)), + ("Epiphany 1921", Undate(1921, 1, 6)), + ("Pentecost 2016", Undate(2016, 5, 15)), + ("Ash Wednesday 2000", Undate(2000, 3, 8)), + ("Whit Monday 2023", Undate(2023, 5, 29)), ] diff --git a/tests/test_converters/test_holidays.py b/tests/test_converters/test_holidays.py new file mode 100644 index 0000000..087b1c9 --- /dev/null +++ b/tests/test_converters/test_holidays.py @@ -0,0 +1,74 @@ +import pytest + +from undate import Undate, Calendar +from undate.date import Weekday +from undate.converters.holidays import HolidayDateConverter + + +class TestHolidayConverter: + converter = HolidayDateConverter() + + @pytest.mark.parametrize( + "input_string,expected", + [ + ("Epiphany 1921", Undate(1921, 1, 6)), + ("candlemas 1913", Undate(1913, 2, 2)), + ("Candlemass 1862", Undate(1862, 2, 2)), + ("st. patrick's day 1823", Undate(1823, 3, 17)), + ("st patrick's day 1901", Undate(1901, 3, 17)), + ("all fools day 1933", Undate(1933, 4, 1)), + ("st. cyprian's day 1902", Undate(1902, 9, 16)), + ], + ) + def test_fixed_holidays(self, input_string, expected): + assert self.converter.parse(input_string) == expected + + @pytest.mark.parametrize( + "input_string,expected,expected_weekday", + [ + ("Easter 1900", Undate(1900, 4, 15), Weekday.SUNDAY), + ("easter monday 1925", Undate(1925, 4, 13), Weekday.MONDAY), + ("holy saturday 2018", Undate(2018, 3, 31), Weekday.SATURDAY), + ("Ash Wednesday 2000", Undate(2000, 3, 8), Weekday.WEDNESDAY), + ("shrove tuesday 1940", Undate(1940, 2, 6), Weekday.TUESDAY), + ("Ascension 1988", Undate(1988, 5, 12), Weekday.THURSDAY), + ("Ascension Day 1999", Undate(1999, 5, 13), Weekday.THURSDAY), + ("Pentecost 2016", Undate(2016, 5, 15), Weekday.SUNDAY), + ("whit monday 2005", Undate(2005, 5, 16), Weekday.MONDAY), + ("whitsun monday 2023", Undate(2023, 5, 29), Weekday.MONDAY), + ("trinity 1978", Undate(1978, 5, 21), Weekday.SUNDAY), + ("Trinity Sunday 1967", Undate(1967, 5, 21), Weekday.SUNDAY), + ], + ) + def test_moveable_feasts(self, input_string, expected, expected_weekday): + result = self.converter.parse(input_string) + assert result == expected + assert result.label == input_string + assert result.earliest.weekday == expected_weekday + + def test_holiday_without_year(self): + result = self.converter.parse("Epiphany") + assert result.label == "Epiphany" + assert result.format("EDTF") == "XXXX-01-06" + assert not result.known_year + assert result.calendar == Calendar.GREGORIAN + + def test_undate_parse(self): + # accessible through main undate parse method + assert Undate.parse("Epiphany 1942", "holidays") == Undate(1942, 1, 6) + + def test_parse_empty(self): + with pytest.raises(ValueError, match="empty string"): + self.converter.parse("") + + def test_parse_error(self): + with pytest.raises(ValueError, match="Could not parse"): + self.converter.parse("Not a holiday") + + def test_moveable_without_year(self): + with pytest.raises(ValueError, match="Could not parse"): + self.converter.parse("Easter") + + def test_to_string_error(self): + with pytest.raises(ValueError, match="does not support"): + self.converter.to_string(Undate(1916))