Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions src/undate/converters/combined.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
**Experimental** combined parser. Supports EDTF, Gregorian, Hebrew, and Hijri
where dates are unambiguous. Year-only dates are parsed as EDTF in
Gregorian calendar.
Combined parser. Supports EDTF, Gregorian, Hebrew, Hijri, and Christian
liturgical dates where dates are unambiguous. Year-only dates are parsed
as EDTF in Gregorian calendar.
"""

from typing import Union
Expand All @@ -16,6 +16,7 @@
from undate.converters.calendars.gregorian.transformer import GregorianDateTransformer
from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer
from undate.converters.calendars.islamic.transformer import IslamicDateTransformer
from undate.converters.holidays import HolidayTransformer


class CombinedDateTransformer(Transformer):
Expand All @@ -35,6 +36,7 @@ def start(self, children):
hebrew=HebrewDateTransformer(),
islamic=IslamicDateTransformer(),
gregorian=GregorianDateTransformer(),
holidays=HolidayTransformer(),
)


Expand All @@ -47,14 +49,16 @@ def start(self, children):
class OmnibusDateConverter(BaseDateConverter):
"""
Combination parser that aggregates existing parser grammars.
Currently supports EDTF, Gregorian, Hebrew, and Hijri where dates are unambiguous.
(Year-only dates are parsed as EDTF in Gregorian calendar.)
Supports EDTF, Gregorian, Hebrew, Hijri, and Christian liturgical dates
where dates are unambiguous. Year-only dates are parsed as EDTF in
Gregorian calendar.

Does not support serialization.

Example usage::

Undate.parse("Tammuz 4816", "omnibus")
Undate.parse("Tammuz 4812", "omnibus")
Undate.parse("Easter 1916", "omnibus")

"""

Expand Down
4 changes: 3 additions & 1 deletion src/undate/converters/grammars/combined.lark
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
%import .undate_common.DATE_PUNCTUATION
%ignore DATE_PUNCTUATION

start: (edtf__start | hebrew__hebrew_date | islamic__islamic_date | gregorian__gregorian_date )
start: (edtf__start | hebrew__hebrew_date | islamic__islamic_date | gregorian__gregorian_date | holidays__holiday_date)

// Renaming of the import variables is required, as they receive the namespace of this file.
// See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565
Expand All @@ -30,6 +30,8 @@ start: (edtf__start | hebrew__hebrew_date | islamic__islamic_date | gregorian__
// gregorian calendar, in multiple languages
%import .gregorian.gregorian_date -> gregorian__gregorian_date

// relative import from holidays.lark
%import .holidays.holiday_date -> holidays__holiday_date

// override hebrew date to omit year-only, since year without calendar is ambiguous
// NOTE: potentially support year with calendar label
Expand Down
37 changes: 37 additions & 0 deletions src/undate/converters/grammars/holidays.lark
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
%import common.WS
%ignore WS

%import .undate_common.DATE_PUNCTUATION
%ignore DATE_PUNCTUATION


holiday_date: movable_feast year | fixed_date year?

// holidays that shift depending on the year
movable_feast: EASTER | EASTER_MONDAY | HOLY_SATURDAY | ASCENSION
| PENTECOST | WHIT_MONDAY | TRINITY | ASH_WEDNESDAY | SHROVE_TUESDAY

// holidays that are always on the same date
fixed_date: EPIPHANY | CANDLEMASS | ST_PATRICKS | ALL_FOOLS | ST_CYPRIANS

year: /\d{4}/
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
python - <<'PY'
import datetime

for year in (0, 1):
    try:
        print(year, datetime.date(year, 1, 1).isoformat())
    except Exception as exc:
        print(year, type(exc).__name__, exc)
PY

Repository: dh-tech/undate-python

Length of output: 113


🏁 Script executed:

# Find and examine the holidays.py file
fd -t f holidays.py

Repository: dh-tech/undate-python

Length of output: 137


🏁 Script executed:

# Look at the resolver code around the mentioned lines
find . -name "holidays.py" -type f | head -5 | xargs wc -l

Repository: dh-tech/undate-python

Length of output: 104


🏁 Script executed:

# Search for the resolver code that uses datetime.date
rg -A 5 -B 5 "datetime.date\|holidays.easter" --type py

Repository: dh-tech/undate-python

Length of output: 47


🏁 Script executed:

cat -n src/undate/converters/holidays.py | head -150

Repository: dh-tech/undate-python

Length of output: 6947


🏁 Script executed:

python - <<'PY'
from convertdate import holidays
import datetime

# Test what happens when calling holidays.easter(0)
for year in (0, 1, 9999, 10000):
    try:
        easter_result = holidays.easter(year)
        print(f"holidays.easter({year}) = {easter_result}")
        # Now try to create datetime.date with it
        try:
            d = datetime.date(*easter_result)
            print(f"  datetime.date(*{easter_result}) = {d.isoformat()}")
        except Exception as e:
            print(f"  datetime.date(*{easter_result}) raises {type(e).__name__}: {e}")
    except Exception as e:
        print(f"holidays.easter({year}) raises {type(e).__name__}: {e}")
PY

Repository: dh-tech/undate-python

Length of output: 188


🏁 Script executed:

# Find project configuration files
fd -t f -e "toml" -e "txt" -e "cfg" | grep -i "setup\|requirements\|pyproject"

Repository: dh-tech/undate-python

Length of output: 79


🏁 Script executed:

# Test datetime.date year boundaries
python - <<'PY'
import datetime

# Test valid year boundaries
test_years = [1, 9999, 10000, -1]
for year in test_years:
    try:
        d = datetime.date(year, 1, 1)
        print(f"datetime.date({year}, 1, 1) = {d.isoformat()}")
    except Exception as e:
        print(f"datetime.date({year}, 1, 1) raises {type(e).__name__}: {e}")
PY

Repository: dh-tech/undate-python

Length of output: 279


🏁 Script executed:

cat -n src/undate/converters/grammars/holidays.lark

Repository: dh-tech/undate-python

Length of output: 1407


Reject 0000 before it reaches the movable-feast calculation.

The grammar currently allows year: /\d{4}/ to match 0000, but the movable-feast resolver at line 127 uses datetime.date(*holidays.easter(parts["year"])), which only supports years 1–9999. Parsing inputs like Easter 0000 will therefore result in a ValueError at transformation time instead of a normal parse failure.

Grammar fix
-year: /\d{4}/
+year: /(?!0000)\d{4}/
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
year: /\d{4}/
year: /(?!0000)\d{4}/
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/undate/converters/grammars/holidays.lark` at line 17, The grammar rule
year: /\d{4}/ currently accepts "0000", which later causes
datetime.date(*holidays.easter(parts["year"])) in the movable-feast resolver to
raise ValueError; update the year rule to disallow "0000" (e.g. use a regex that
rejects all-zero year such as a negative lookahead or require the first digit
1-9) so that inputs like "0000" fail during parsing rather than during
transformation.


// all patterns use case-insensitive regex

// Fixed-date holidays
EPIPHANY: /epiphany/i
CANDLEMASS: /candlemass?/i // recognize with both one and 2 s
ST_PATRICKS: /st\.?\s*patrick'?s?\s*day/i
ALL_FOOLS: /(april|all)\s*fools?\s*day/i
ST_CYPRIANS: /st\.?\s*cyprian'?s?\s*day/i

// Moveable feasts
EASTER: /easter/i
EASTER_MONDAY: /easter\s*monday/i
HOLY_SATURDAY: /holy\s*saturday/i
ASCENSION: /ascension\s*day|ascension/i
PENTECOST: /pentecost/i
WHIT_MONDAY: /whit\s*monday|whitsun\s*monday/i
TRINITY: /trinity\s*sunday|trinity/i
ASH_WEDNESDAY: /ash\s*wednesday/i
SHROVE_TUESDAY: /shrove\s*tuesday/i
171 changes: 171 additions & 0 deletions src/undate/converters/holidays.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
"""
Holiday date Converter: parse Christian liturgical dates and convert to Gregorian.
"""

import datetime

from lark import Lark, Transformer, Tree, Token
from lark.exceptions import UnexpectedInput

from convertdate import holidays
from undate import Undate, Calendar
from undate.converters.base import BaseDateConverter, GRAMMAR_FILE_PATH

# To add a new holiday:
# 1. Add a name and pattern to holidays.lark grammar file
# 2. Include the in appropriate section (fixed or movable)
# 3. Add an entry to FIXED_HOLIDAYS or MOVEABLE_FEASTS; must match grammar terminal name


# holidays that fall on the same date every year
# key must match grammar term; value is tuple of numeric month, day
FIXED_HOLIDAYS = {
"EPIPHANY": (1, 6), # January 6
"CANDLEMASS": (2, 2), # February 2; 40th day & end of epiphany
"ST_PATRICKS": (3, 17), # March 17
"ALL_FOOLS": (4, 1), # All / April fools day, April 1
"ST_CYPRIANS": (9, 16), # St. Cyprian's Feast day: September 16
}

# holidays that shift depending on the year; value is days relative to Easter
MOVEABLE_FEASTS = {
"EASTER": 0, # Easter, no offset
"HOLY_SATURDAY": -1, # day before Easter
"EASTER_MONDAY": 1, # day after Easter
"ASCENSION": 39, # fortieth day of Easter
"PENTECOST": 49, # 7 weeks after Easter
"WHIT_MONDAY": 50, # Monday after Pentecost
"TRINITY": 56, # first Sunday after Pentecost
"ASH_WEDNESDAY": -46, # Wednesday of the 7th week before Easter
"SHROVE_TUESDAY": -47, # day before Ash Wednesday
}


parser = Lark.open(
str(GRAMMAR_FILE_PATH / "holidays.lark"), rel_to=__file__, start="holiday_date"
)


class HolidayTransformer(Transformer):
calendar = Calendar.GREGORIAN

def year(self, items):
value = "".join([str(i) for i in items])
return Token("year", value)
# return Tree(data="year", children=[value])

def movable_feast(self, items):
# moveable feast day can't be calculated without the year,
# so pass through
return items[0]

def fixed_date(self, items):
item = items[0]
holiday_name = item.type.split("__")[-1]
# token_type = item.type
# token type is holiday fixed-date name; use to determine month/day
month, day = FIXED_HOLIDAYS.get(holiday_name)
return Tree("fixed_date", [Token("month", month), Token("day", day)])
# for key in FIXED_HOLIDAYS:
# if token_type == key or token_type == f"holidays__{key}":
# month, day = FIXED_HOLIDAYS[key]
# return Tree("fixed_date", [Token("month", month), Token("day", day)])
# raise ValueError(f"Unknown fixed holiday: {item.type}")

def holiday_date(self, items):
parts = self._get_date_parts(items)
return Undate(**parts)

def _get_date_parts(self, items) -> dict[str, int | str]:
# recursive method to take parsed tokens and trees and generate
# a dictionary of year, month, day for initializing an undate object
# handles nested tree with month/day (for fixed date holidays)
# and includes movable feast logic, after year is determined.

parts = {}
date_parts = ["year", "month", "day"]
movable_feast = None
for child in items:
field = value = None
# if this is a token, get type and value
if isinstance(child, Token):
# month/day from fixed date holiday
if child.type in date_parts:
field = child.type
value = child.value
# check for movable feast terminal
elif child.type in MOVEABLE_FEASTS:
# collect but don't handle until we know the year
movable_feast = child.type
# handle namespaced token type; happens when called from combined grammar
elif (
"__" in child.type and child.type.split("__")[-1] in MOVEABLE_FEASTS
):
# collect but don't handle until we know the year
movable_feast = child.type.split("__")[-1]

# if a tree, check for type and anonymous token
if isinstance(child, Tree):
# if tree is a date field (i.e., year), get the value
if child.data in date_parts:
field = child.data
# in this case we expect one value;
# convert anonymous token to value
value = child.children[0]
# if tree has children, recurse to get date parts
elif child.children:
parts.update(self._get_date_parts(child.children))

# if date fields were found, add to dictionary
if field and value:
# currently all date parts are integer only
parts[str(field)] = int(value)

# if date is a movable feast, calculate relative to Easter based on the year
if movable_feast is not None:
offset = MOVEABLE_FEASTS[movable_feast]
holiday_date = datetime.date(
*holidays.easter(parts["year"])
) + datetime.timedelta(days=offset)
parts.update({"month": holiday_date.month, "day": holiday_date.day})

return parts


class HolidayDateConverter(BaseDateConverter):
"""
Converter for Christian liturgical dates.

Supports fixed-date holidays (Epiphany, Candlemass, etc.) and
Easter-relative moveable feasts (Easter, Ash Wednesday, Pentecost, etc.).

Example usage::

Undate.parse("Easter 1942", "holidays")
Undate.parse("Ash Wednesday 1942", "holidays")
Undate.parse("Epiphany", "holidays")

Does not support serialization.
"""

name = "holidays"

def __init__(self):
self.transformer = HolidayTransformer()

def parse(self, value: str) -> Undate:
if not value:
raise ValueError("Parsing empty string is not supported")

try:
parsetree = parser.parse(value)
# transform the parse tree into an undate or undate interval
undate_obj = self.transformer.transform(parsetree)
# set the input holiday text as a label on the undate object
undate_obj.label = value
return undate_obj
except UnexpectedInput as err:
raise ValueError(f"Could not parse '{value}' as a holiday date") from err

def to_string(self, undate: Undate) -> str:
raise ValueError("Holiday converter does not support serialization")
14 changes: 13 additions & 1 deletion src/undate/date.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def weekday(self) -> Optional[int]:
thursday_week = self.astype("datetime64[W]")
days_from_thursday = (self - thursday_week).astype(int)
# if monday is 0, thursday is 3
return (days_from_thursday + 3) % 7
return int((days_from_thursday + 3) % 7)

return None

Expand All @@ -280,6 +280,18 @@ def __sub__(self, other):
# NOTE: add should not be subclassed because we want to return a Date, not a delta


class Weekday(IntEnum):
"""Weekday as an integer, compatible with :meth:`datetime.date.weekday`."""

MONDAY = 0
TUESDAY = 1
WEDNESDAY = 2
THURSDAY = 3
FRIDAY = 4
SATURDAY = 5
SUNDAY = 6


class DatePrecision(IntEnum):
"""date precision, to indicate date precision independent from how much
of the date is known."""
Expand Down
6 changes: 6 additions & 0 deletions tests/test_converters/test_combined_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@
("13 Jan 1602", Undate(1602, 1, 13, calendar="Gregorian")),
("2022 ugu. 4", Undate(2022, 11, 4, calendar="Gregorian")),
("18 avril", Undate(month=4, day=18, calendar="Gregorian")),
# Christian liturgical dates
("Easter 1942", Undate(1942, 4, 5)),
("Epiphany 1921", Undate(1921, 1, 6)),
("Pentecost 2016", Undate(2016, 5, 15)),
("Ash Wednesday 2000", Undate(2000, 3, 8)),
("Whit Monday 2023", Undate(2023, 5, 29)),
]


Expand Down
74 changes: 74 additions & 0 deletions tests/test_converters/test_holidays.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import pytest

from undate import Undate, Calendar
from undate.date import Weekday
from undate.converters.holidays import HolidayDateConverter


class TestHolidayConverter:
converter = HolidayDateConverter()

@pytest.mark.parametrize(
"input_string,expected",
[
("Epiphany 1921", Undate(1921, 1, 6)),
("candlemas 1913", Undate(1913, 2, 2)),
("Candlemass 1862", Undate(1862, 2, 2)),
("st. patrick's day 1823", Undate(1823, 3, 17)),
("st patrick's day 1901", Undate(1901, 3, 17)),
("all fools day 1933", Undate(1933, 4, 1)),
("st. cyprian's day 1902", Undate(1902, 9, 16)),
],
)
def test_fixed_holidays(self, input_string, expected):
assert self.converter.parse(input_string) == expected

@pytest.mark.parametrize(
"input_string,expected,expected_weekday",
[
("Easter 1900", Undate(1900, 4, 15), Weekday.SUNDAY),
("easter monday 1925", Undate(1925, 4, 13), Weekday.MONDAY),
("holy saturday 2018", Undate(2018, 3, 31), Weekday.SATURDAY),
("Ash Wednesday 2000", Undate(2000, 3, 8), Weekday.WEDNESDAY),
("shrove tuesday 1940", Undate(1940, 2, 6), Weekday.TUESDAY),
("Ascension 1988", Undate(1988, 5, 12), Weekday.THURSDAY),
("Ascension Day 1999", Undate(1999, 5, 13), Weekday.THURSDAY),
("Pentecost 2016", Undate(2016, 5, 15), Weekday.SUNDAY),
("whit monday 2005", Undate(2005, 5, 16), Weekday.MONDAY),
("whitsun monday 2023", Undate(2023, 5, 29), Weekday.MONDAY),
("trinity 1978", Undate(1978, 5, 21), Weekday.SUNDAY),
("Trinity Sunday 1967", Undate(1967, 5, 21), Weekday.SUNDAY),
],
)
def test_moveable_feasts(self, input_string, expected, expected_weekday):
result = self.converter.parse(input_string)
assert result == expected
assert result.label == input_string
assert result.earliest.weekday == expected_weekday

def test_holiday_without_year(self):
result = self.converter.parse("Epiphany")
assert result.label == "Epiphany"
assert result.format("EDTF") == "XXXX-01-06"
assert not result.known_year
assert result.calendar == Calendar.GREGORIAN

def test_undate_parse(self):
# accessible through main undate parse method
assert Undate.parse("Epiphany 1942", "holidays") == Undate(1942, 1, 6)

def test_parse_empty(self):
with pytest.raises(ValueError, match="empty string"):
self.converter.parse("")

def test_parse_error(self):
with pytest.raises(ValueError, match="Could not parse"):
self.converter.parse("Not a holiday")

def test_moveable_without_year(self):
with pytest.raises(ValueError, match="Could not parse"):
self.converter.parse("Easter")

def test_to_string_error(self):
with pytest.raises(ValueError, match="does not support"):
self.converter.to_string(Undate(1916))
Loading