From 119b900422e91cfbd004660552ce93d200c77723 Mon Sep 17 00:00:00 2001 From: Serhii A Date: Fri, 20 Dec 2024 09:15:06 +0100 Subject: [PATCH 1/6] Fix tests --- .github/workflows/cifuzz.yml | 2 +- .../custom_language_detection/fasttext.py | 5 ++- .../fasttext_wrapper.py | 34 +++++++++++++++++++ fuzzing/requirements.txt | 1 - setup.cfg | 23 ++++--------- tests/test_clean_api.py | 2 +- tests/test_search.py | 9 ++++- tox.ini | 9 +++-- 8 files changed, 56 insertions(+), 29 deletions(-) create mode 100644 dateparser/custom_language_detection/fasttext_wrapper.py delete mode 100644 fuzzing/requirements.txt diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml index 979b81116..111913019 100644 --- a/.github/workflows/cifuzz.yml +++ b/.github/workflows/cifuzz.yml @@ -21,7 +21,7 @@ jobs: fuzz-seconds: 600 output-sarif: true - name: Upload Crash - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: failure() && steps.build.outcome == 'success' with: name: artifacts diff --git a/dateparser/custom_language_detection/fasttext.py b/dateparser/custom_language_detection/fasttext.py index 713b014c3..6dab11109 100644 --- a/dateparser/custom_language_detection/fasttext.py +++ b/dateparser/custom_language_detection/fasttext.py @@ -1,7 +1,6 @@ import os -import fasttext - +from dateparser.custom_language_detection.fasttext_wrapper import load_model from dateparser_cli.exceptions import FastTextModelNotFoundException from dateparser_cli.fasttext_manager import fasttext_downloader from dateparser_cli.utils import create_data_model_home, dateparser_model_home @@ -27,7 +26,7 @@ def _load_fasttext_model(): model_path = os.path.join(dateparser_model_home, downloaded_models[0]) if not os.path.isfile(model_path): raise FastTextModelNotFoundException("Fasttext model file not found") - _FastTextCache.model = fasttext.load_model(model_path) + _FastTextCache.model = load_model(model_path) return _FastTextCache.model diff --git a/dateparser/custom_language_detection/fasttext_wrapper.py b/dateparser/custom_language_detection/fasttext_wrapper.py new file mode 100644 index 000000000..ffcea9439 --- /dev/null +++ b/dateparser/custom_language_detection/fasttext_wrapper.py @@ -0,0 +1,34 @@ +import fasttext +import numpy as np + + +class FastTextWrapper: + def __init__(self, model_path): + self.model = fasttext.load_model(model_path) + + def predict(self, text, k=1, threshold=0.0, on_unicode_error="strict"): + def check(entry): + if entry.find("\n") != -1: + raise ValueError("predict processes one line at a time (remove '\\n')") + entry += "\n" + return entry + + if isinstance(text, list): + text = [check(entry) for entry in text] + all_labels, all_probs = self.model.f.multilinePredict( + text, k, threshold, on_unicode_error + ) + return all_labels, all_probs + else: + text = check(text) + predictions = self.model.f.predict(text, k, threshold, on_unicode_error) + if predictions: + probs, labels = zip(*predictions) + else: + probs, labels = ([], ()) + + return labels, np.asarray(probs) + + +def load_model(model_path): + return FastTextWrapper(model_path) diff --git a/fuzzing/requirements.txt b/fuzzing/requirements.txt deleted file mode 100644 index 58397a6ef..000000000 --- a/fuzzing/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -atheris diff --git a/setup.cfg b/setup.cfg index e04aecc8f..982dbc8a1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,28 +4,17 @@ universal = 1 [flake8] max-line-length = 119 ignore = - # This rule goes against the PEP 8 recommended style and it's incompatible - # with W504 W503 - # Exclude automatically generated files - # E501: Line too long - dateparser/data/date_translation_data/* E501 - - # Exclude files that are meant to provide top-level imports - # F401: Module imported but unused - dateparser/data/__init__.py F401 - dateparser/languages/__init__.py F401 - - # Issues pending a review: - dateparser/freshness_date_parser.py E722 - dateparser/parser.py E722 - dateparser/docs/conf.py E402 - - # Additional ignored codes E203 E501 E722 F401 E701 E704 + +exclude = + dateparser/data/date_translation_data/* + dateparser/data/__init__.py + dateparser/languages/__init__.py + docs/conf.py diff --git a/tests/test_clean_api.py b/tests/test_clean_api.py index 62e3b218a..4ed18a7b3 100644 --- a/tests/test_clean_api.py +++ b/tests/test_clean_api.py @@ -119,7 +119,7 @@ def test_dates_which_match_locales_are_parsed( languages=["en"], region="", date_formats=["%a", "%a", "%a", "%a"], - expected_date=datetime(1969, 1, 31, 14, 4), + expected_date=datetime(1969, 1, 31, 13, 4), ) ] ) diff --git a/tests/test_search.py b/tests/test_search.py index ba5a1ea3c..cbd157ce0 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -642,7 +642,14 @@ def test_search_and_parse(self, shortname, string, expected, settings=None): ( "June 23th 5 pm EST", datetime.datetime( - 2023, 6, 23, 17, 0, tzinfo=pytz.timezone("EST") + 2023, + 6, + 23, + 17, + 0, + tzinfo=datetime.timezone( + datetime.timedelta(hours=-5), name="EST" + ), ), ), ("May 31", datetime.datetime(2023, 5, 31, 0, 0)), diff --git a/tox.ini b/tox.ini index e62b86828..5206021db 100644 --- a/tox.ini +++ b/tox.ini @@ -5,7 +5,7 @@ envlist = flake8, py3 deps = -rdateparser_scripts/requirements.txt -rtests/requirements.txt - -rfuzzing/requirements.txt + atheris; python_version < '3.12' [testenv] deps = @@ -40,10 +40,9 @@ commands = [testenv:twinecheck] basepython = python3 -extras = [] deps = - twine==4.0.2 - build==1.0.3 + build + twine commands = - python -m build --sdist + python -m build --sdist --wheel twine check dist/* From c0dfbd73d88686f8824db7b376e90dc696ae5030 Mon Sep 17 00:00:00 2001 From: Serhii A Date: Thu, 30 Jan 2025 16:15:48 +0100 Subject: [PATCH 2/6] Add comment to explain why we need this file --- dateparser/custom_language_detection/fasttext_wrapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dateparser/custom_language_detection/fasttext_wrapper.py b/dateparser/custom_language_detection/fasttext_wrapper.py index ffcea9439..70eadc276 100644 --- a/dateparser/custom_language_detection/fasttext_wrapper.py +++ b/dateparser/custom_language_detection/fasttext_wrapper.py @@ -27,6 +27,7 @@ def check(entry): else: probs, labels = ([], ()) + # Using np.asarray(probs) to avoid errors in the test return labels, np.asarray(probs) From c3f6f370506d4864b1d0e6b75ccc33c75eda2f26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Fri, 31 Jan 2025 11:08:30 +0100 Subject: [PATCH 3/6] WIP --- .flake8 | 7 --- .github/workflows/main.yml | 48 ++++--------------- .pre-commit-config.yaml | 18 ++----- dateparser/calendars/jalali_parser.py | 1 - .../custom_language_detection/fasttext.py | 5 +- .../fasttext_wrapper.py | 35 -------------- dateparser/data/__init__.py | 3 +- dateparser/date.py | 1 - dateparser/freshness_date_parser.py | 2 +- dateparser/parser.py | 16 +++---- docs/conf.py | 4 +- pyproject.toml | 6 +++ setup.cfg | 20 -------- setup.py | 4 +- tests/requirements.txt | 4 -- tests/test_date.py | 2 +- tests/test_date_parser.py | 8 +--- tests/test_dateparser_data_integrity.py | 4 ++ tests/test_hijri.py | 6 ++- tests/test_jalali.py | 4 ++ tests/test_language_detect.py | 4 ++ tests/test_languages.py | 14 ++---- tests/test_search.py | 3 +- tests/test_timezone_parser.py | 6 +-- tox.ini | 34 ++++++++----- 25 files changed, 83 insertions(+), 176 deletions(-) delete mode 100644 .flake8 delete mode 100644 dateparser/custom_language_detection/fasttext_wrapper.py create mode 100644 pyproject.toml delete mode 100644 setup.cfg delete mode 100644 tests/requirements.txt diff --git a/.flake8 b/.flake8 deleted file mode 100644 index cda1b919f..000000000 --- a/.flake8 +++ /dev/null @@ -1,7 +0,0 @@ -[flake8] - -max-line-length = 119 -ignore = W503, E203, E501, E722, F401, E701, E704 - -exclude = - docs/conf.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 76634bf49..839e30cd3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -14,42 +14,17 @@ jobs: fail-fast: false matrix: include: - - python-version: 3.9 - env: - TOXENV: flake8 - - python-version: 3.9 - env: - TOXENV: py - - python-version: "3.10" - env: - TOXENV: py + - python-version: "3.13" + toxenv: pre-commit + - python-version: "3.9" - python-version: "3.10" - env: - TOXENV: latest - - python-version: "3.11" - env: - TOXENV: py - python-version: "3.11" - env: - TOXENV: latest - python-version: "3.12" - env: - TOXENV: py - - python-version: "3.12" - env: - TOXENV: latest - - python-version: "3.13" - env: - TOXENV: py - python-version: "3.13" - env: - TOXENV: latest - - python-version: "3.13" - env: - TOXENV: twinecheck - python-version: "3.12" # Keep in sync with tox.ini - env: - TOXENV: docs + toxenv: docs + - python-version: "3.13" + toxenv: twinecheck steps: - uses: actions/checkout@v3 - name: 'Set up Python ${{ matrix.python-version }}' @@ -63,13 +38,6 @@ jobs: python -m pip install --upgrade pip pip install tox - name: Run tests - run: tox -e ${{ matrix.env.TOXENV }} + run: tox -e ${{ matrix.toxenv || 'py' }} - name: Upload coverage.xml to codecov - if: ${{ matrix.env.python-version == '3.9' && matrix.env.TOXENV == 'latest' }} - uses: codecov/codecov-action@v3 - - pre-commit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: pre-commit/action@v3.0.0 + uses: codecov/codecov-action@v4.0.1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e660bf66a..dbfbfa834 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,15 +1,7 @@ repos: -- repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 # 6.0.0 drops Python 3.7 support +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.9.3 hooks: - - id: flake8 - args: ['--config=.flake8'] -- repo: https://github.com/psf/black.git - rev: 23.3.0 - hooks: - - id: black - exclude: ^dateparser/data/date_translation_data/ -- repo: https://github.com/pycqa/isort - rev: 5.12.0 # 5.12 drops Python 3.7 support - hooks: - - id: isort + - id: ruff + args: [ --fix ] + - id: ruff-format diff --git a/dateparser/calendars/jalali_parser.py b/dateparser/calendars/jalali_parser.py index 4395fc101..ef437d5fb 100644 --- a/dateparser/calendars/jalali_parser.py +++ b/dateparser/calendars/jalali_parser.py @@ -1,6 +1,5 @@ import re from collections import OrderedDict -from datetime import datetime from functools import reduce from convertdate import persian diff --git a/dateparser/custom_language_detection/fasttext.py b/dateparser/custom_language_detection/fasttext.py index 6dab11109..713b014c3 100644 --- a/dateparser/custom_language_detection/fasttext.py +++ b/dateparser/custom_language_detection/fasttext.py @@ -1,6 +1,7 @@ import os -from dateparser.custom_language_detection.fasttext_wrapper import load_model +import fasttext + from dateparser_cli.exceptions import FastTextModelNotFoundException from dateparser_cli.fasttext_manager import fasttext_downloader from dateparser_cli.utils import create_data_model_home, dateparser_model_home @@ -26,7 +27,7 @@ def _load_fasttext_model(): model_path = os.path.join(dateparser_model_home, downloaded_models[0]) if not os.path.isfile(model_path): raise FastTextModelNotFoundException("Fasttext model file not found") - _FastTextCache.model = load_model(model_path) + _FastTextCache.model = fasttext.load_model(model_path) return _FastTextCache.model diff --git a/dateparser/custom_language_detection/fasttext_wrapper.py b/dateparser/custom_language_detection/fasttext_wrapper.py deleted file mode 100644 index 70eadc276..000000000 --- a/dateparser/custom_language_detection/fasttext_wrapper.py +++ /dev/null @@ -1,35 +0,0 @@ -import fasttext -import numpy as np - - -class FastTextWrapper: - def __init__(self, model_path): - self.model = fasttext.load_model(model_path) - - def predict(self, text, k=1, threshold=0.0, on_unicode_error="strict"): - def check(entry): - if entry.find("\n") != -1: - raise ValueError("predict processes one line at a time (remove '\\n')") - entry += "\n" - return entry - - if isinstance(text, list): - text = [check(entry) for entry in text] - all_labels, all_probs = self.model.f.multilinePredict( - text, k, threshold, on_unicode_error - ) - return all_labels, all_probs - else: - text = check(text) - predictions = self.model.f.predict(text, k, threshold, on_unicode_error) - if predictions: - probs, labels = zip(*predictions) - else: - probs, labels = ([], ()) - - # Using np.asarray(probs) to avoid errors in the test - return labels, np.asarray(probs) - - -def load_model(model_path): - return FastTextWrapper(model_path) diff --git a/dateparser/data/__init__.py b/dateparser/data/__init__.py index d09e0eb7a..0bd6b7d25 100644 --- a/dateparser/data/__init__.py +++ b/dateparser/data/__init__.py @@ -1,3 +1,2 @@ from dateparser.data import date_translation_data - -from .languages_info import language_locale_dict, language_order +from .languages_info import language_order, language_locale_dict diff --git a/dateparser/date.py b/dateparser/date.py index d53907a75..5d8f9a63a 100644 --- a/dateparser/date.py +++ b/dateparser/date.py @@ -1,5 +1,4 @@ import collections -import sys from collections.abc import Set from datetime import datetime, timedelta diff --git a/dateparser/freshness_date_parser.py b/dateparser/freshness_date_parser.py index 649824ae4..024fe023e 100644 --- a/dateparser/freshness_date_parser.py +++ b/dateparser/freshness_date_parser.py @@ -31,7 +31,7 @@ def _parse_time(self, date_string, settings): date_string = re.sub(r"\b(?:ago|in)\b", "", date_string) try: return time_parser(date_string) - except: + except Exception: pass def get_local_tz(self): diff --git a/dateparser/parser.py b/dateparser/parser.py index 40aa8ff7b..62b22a9ef 100644 --- a/dateparser/parser.py +++ b/dateparser/parser.py @@ -182,7 +182,7 @@ def _find_best_matching_date(cls, datestring): dt = strptime(datestring, fmt), cls._get_period(fmt) if len(str(dt[0].year)) == 4: return dt - except: + except Exception: pass return None @@ -216,7 +216,7 @@ def parse(cls, datestring, settings): missing = _get_missing_parts(fmt) _check_strict_parsing(missing, settings) return dt - except: + except Exception: pass else: if ambiguous_date: @@ -332,7 +332,7 @@ def __init__(self, tokens, settings): token.index(":") # Is after period? raise ValueError if '.' can't be found: self.tokens[self.tokens.index((token, 0)) + 1][0].index(".") - except: + except Exception: microsecond = None if microsecond: @@ -342,7 +342,7 @@ def __init__(self, tokens, settings): meridian = MERIDIAN.search( self.filtered_tokens[meridian_index][0] ).group() - except: + except Exception: meridian = None if any([":" in token, meridian, microsecond]): @@ -514,9 +514,9 @@ def _correct_for_time_frame(self, dateobj, tz): # NOTE: If this assert fires, self.now needs to be made offset-aware in a similar # way that dateobj is temporarily made offset-aware. - assert not ( - self.now.tzinfo is None and dateobj.tzinfo is not None - ), "`self.now` doesn't have `tzinfo`. Review comment in code for details." + assert not (self.now.tzinfo is None and dateobj.tzinfo is not None), ( + "`self.now` doesn't have `tzinfo`. Review comment in code for details." + ) # Store the original dateobj values so that upon subsequent parsing everything is not # treated as offset-aware if offset awareness is changed. @@ -686,7 +686,7 @@ def parse_alpha(token, skip_component=None): (component, getattr(do, component)), ("day", prev_value), ] - except: + except Exception: pass else: raise ValueError("Unable to parse: %s" % token) diff --git a/docs/conf.py b/docs/conf.py index 28a10ef44..3035675d7 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -13,6 +13,8 @@ import os import sys +import dateparser + # Get the project root dir, which is the parent dir of this cwd = os.getcwd() project_root = os.path.dirname(cwd) @@ -22,8 +24,6 @@ # version is used. sys.path.insert(0, project_root) -import dateparser - # -- General configuration --------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..4c90b4713 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[tool.ruff] +exclude = ["date_translation_data"] + +[tool.ruff.lint.per-file-ignores] +"dateparser/data/__init__.py" = ["F401"] +"dateparser/languages/__init__.py" = ["F401"] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 982dbc8a1..000000000 --- a/setup.cfg +++ /dev/null @@ -1,20 +0,0 @@ -[wheel] -universal = 1 - -[flake8] -max-line-length = 119 -ignore = - W503 - - E203 - E501 - E722 - F401 - E701 - E704 - -exclude = - dateparser/data/date_translation_data/* - dateparser/data/__init__.py - dateparser/languages/__init__.py - docs/conf.py diff --git a/setup.py b/setup.py index fce81acc4..c4bc21406 100644 --- a/setup.py +++ b/setup.py @@ -15,8 +15,6 @@ r":mod:|:class:|:func:", "", open("HISTORY.rst", encoding="utf-8").read() ) -test_requirements = open("tests/requirements.txt").read().splitlines() - setup( name="dateparser", version=__version__, @@ -42,7 +40,7 @@ }, extras_require={ "calendars": ["hijridate", "convertdate"], - "fasttext": ["fasttext"], + "fasttext": ["fasttext", "numpy<2"], "langdetect": ["langdetect"], }, license="BSD", diff --git a/tests/requirements.txt b/tests/requirements.txt deleted file mode 100644 index 5367d85e6..000000000 --- a/tests/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -flake8 -parameterized -pytest -pytest-cov diff --git a/tests/test_date.py b/tests/test_date.py index 34f7be3a4..1657feabe 100644 --- a/tests/test_date.py +++ b/tests/test_date.py @@ -81,7 +81,7 @@ def test_should_reject_easily_mistaken_dateutil_arguments(self, invalid_period): self.when_date_range_generated( begin=datetime(2014, 6, 15), end=datetime(2014, 6, 25), - **{invalid_period: 1} + **{invalid_period: 1}, ) self.then_period_was_rejected(invalid_period) diff --git a/tests/test_date_parser.py b/tests/test_date_parser.py index 2d2795045..9be297847 100644 --- a/tests/test_date_parser.py +++ b/tests/test_date_parser.py @@ -198,9 +198,7 @@ def setUp(self): param("[সেপ্টেম্বর] 04, 2014.", datetime(2014, 9, 4)), param("মঙ্গলবার জুলাই 22, 2014", datetime(2014, 7, 22)), param("শুক্রবার", datetime(2012, 11, 9)), - param( - "শুক্র, 12 ডিসেম্বর 2014 10:55:50", datetime(2014, 12, 12, 10, 55, 50) - ), + param("শুক্র, 12 ডিসেম্বর 2014 10:55:50", datetime(2014, 12, 12, 10, 55, 50)), param("1লা জানুয়ারী 2015", datetime(2015, 1, 1)), param("25শে মার্চ 1971", datetime(1971, 3, 25)), param("8ই মে 2002", datetime(2002, 5, 8)), @@ -418,9 +416,7 @@ def test_stringified_datetime_should_parse_fine(self): param("[সেপ্টেম্বর] 04, 2014.", datetime(2014, 9, 4)), param("মঙ্গলবার জুলাই 22, 2014", datetime(2014, 7, 22)), param("শুক্রবার", datetime(2012, 11, 9)), - param( - "শুক্র, 12 ডিসেম্বর 2014 10:55:50", datetime(2014, 12, 12, 10, 55, 50) - ), + param("শুক্র, 12 ডিসেম্বর 2014 10:55:50", datetime(2014, 12, 12, 10, 55, 50)), param("1লা জানুয়ারী 2015", datetime(2015, 1, 1)), param("25শে মার্চ 1971", datetime(1971, 3, 25)), param("8ই মে 2002", datetime(2002, 5, 8)), diff --git a/tests/test_dateparser_data_integrity.py b/tests/test_dateparser_data_integrity.py index f5c59e612..7e58e81d4 100644 --- a/tests/test_dateparser_data_integrity.py +++ b/tests/test_dateparser_data_integrity.py @@ -1,3 +1,7 @@ +import pytest + +pytest.importorskip("ruamel") + from dateparser_scripts.write_complete_data import write_complete_data diff --git a/tests/test_hijri.py b/tests/test_hijri.py index 508e3562e..00b58da61 100644 --- a/tests/test_hijri.py +++ b/tests/test_hijri.py @@ -1,5 +1,7 @@ -import sys -import unittest +import pytest + +pytest.importorskip("hijridate") + from datetime import datetime from parameterized import param, parameterized diff --git a/tests/test_jalali.py b/tests/test_jalali.py index a336d67b2..7511f1661 100644 --- a/tests/test_jalali.py +++ b/tests/test_jalali.py @@ -1,3 +1,7 @@ +import pytest + +pytest.importorskip("convertdate") + from datetime import datetime from parameterized import param, parameterized diff --git a/tests/test_language_detect.py b/tests/test_language_detect.py index 1ecb5fad0..fe82c1db4 100644 --- a/tests/test_language_detect.py +++ b/tests/test_language_detect.py @@ -1,3 +1,7 @@ +import pytest + +pytest.importorskip("fasttext") + import unittest from datetime import datetime from unittest.mock import Mock diff --git a/tests/test_languages.py b/tests/test_languages.py index f1525b446..111ef266b 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -145,9 +145,7 @@ def setUp(self): param("be", "3 стд 2015 г. у 10:33", "3 january 2015 year. 10:33"), # Arabic param("ar", "6 يناير، 2015، الساعة 05:16 مساءً", "6 january 2015 05:16 pm"), - param( - "ar", "7 يناير، 2015، الساعة 11:00 صباحاً", "7 january 2015 11:00 am" - ), + param("ar", "7 يناير، 2015، الساعة 11:00 صباحاً", "7 january 2015 11:00 am"), # Vietnamese param("vi", "Thứ Năm, ngày 8 tháng 1 năm 2015", "thursday 8 january 2015"), param("vi", "Thứ Tư, 07/01/2015 | 22:34", "wednesday 07/01/2015 22:34"), @@ -385,9 +383,7 @@ def setUp(self): param("dyo", "arjuma de 10", "friday december 10"), # dz param("dz", "ཟླ་བཅུ་གཅིག་པ་ 10 གཟའ་ཉི་མ་", "november 10 saturday"), - param( - "dz", "མིར་ 2 སྤྱི་ཟླ་དྲུག་པ 2009 2 ཕྱི་ཆ་", "monday 2 june 2009 2 pm" - ), + param("dz", "མིར་ 2 སྤྱི་ཟླ་དྲུག་པ 2009 2 ཕྱི་ཆ་", "monday 2 june 2009 2 pm"), # ebu param( "ebu", "mweri wa gatantatũ 11 maa 08:05 ut", "june 11 friday 08:05 pm" @@ -761,9 +757,7 @@ def setUp(self): param("nl", "4 augustus 1678 zaterdag", "4 august 1678 saturday"), param("nl", "vr 27 juni 1997", "friday 27 june 1997"), # nmg - param( - "nmg", "5 ngwɛn ńna 1897 sɔ́ndɔ mafú málal", "5 april 1897 wednesday" - ), + param("nmg", "5 ngwɛn ńna 1897 sɔ́ndɔ mafú málal", "5 april 1897 wednesday"), param( "nmg", "mɔ́ndɔ 1 ng11 1678 04:15 kugú", @@ -2598,7 +2592,7 @@ def get_log_str(self): param( "en", "string instead of dict", - log_msg="Language 'en' info expected to be dict, " "but have got str", + log_msg="Language 'en' info expected to be dict, but have got str", ), ] ) diff --git a/tests/test_search.py b/tests/test_search.py index cbd157ce0..252195503 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -576,8 +576,7 @@ def test_search_and_parse(self, shortname, string, expected, settings=None): ), param( "en", - "2014 was good! October was excellent!" - " Friday, 21 was especially good!", + "2014 was good! October was excellent! Friday, 21 was especially good!", [ ("2014", datetime.datetime(2014, today.month, today.day, 0, 0)), ("October", datetime.datetime(2014, 10, today.day, 0, 0)), diff --git a/tests/test_timezone_parser.py b/tests/test_timezone_parser.py index b8d2d0d46..82b853cb6 100644 --- a/tests/test_timezone_parser.py +++ b/tests/test_timezone_parser.py @@ -19,9 +19,9 @@ class TestTZPopping(BaseTestCase): def setUp(self): super().setUp() - self.initial_string = ( - self.datetime_string - ) = self.timezone_offset = NotImplemented + self.initial_string = self.datetime_string = self.timezone_offset = ( + NotImplemented + ) @parameterized.expand( [ diff --git a/tox.ini b/tox.ini index 5206021db..eb3f4bfaa 100644 --- a/tox.ini +++ b/tox.ini @@ -1,33 +1,41 @@ [tox] -envlist = flake8, py3 +; envlist = pre-commit,min,min-all,py39,py310,py311,py312,py313,all,scripts,docs,twinedeck +; TODO: Setup min envs +envlist = pre-commit,py39,py310,py311,py312,py313,all,scripts,docs,twinedeck [base] deps = - -rdateparser_scripts/requirements.txt - -rtests/requirements.txt - atheris; python_version < '3.12' + pytest + pytest-cov [testenv] deps = {[base]deps} - tzlocal<3.0b1 -extras = calendars, fasttext, langdetect + parameterized + atheris; python_version < '3.12' commands = pytest --cov=dateparser --cov-report=xml {posargs: tests} -[testenv:latest] +[testenv:all] +basepython = python3.13 +extras = + calendars + fasttext + langdetect + +[testenv:scripts] deps = {[base]deps} - tzlocal>=3.0b1 + -rdateparser_scripts/requirements.txt +commands = + pytest --cov=dateparser --cov-report=xml {posargs:tests/test_dateparser_data_integrity.py} -[testenv:flake8] +[testenv:pre-commit] basepython = python3 -extras = calendars, fasttext, langdetect deps = - {[testenv]deps} - pytest-flake8 + pre-commit commands = - pytest --flake8 + pre-commit run {posargs:--all-files} [testenv:docs] changedir = docs From 171925803e59c4ef765a744fb6c26500a8a38b8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Fri, 31 Jan 2025 11:13:15 +0100 Subject: [PATCH 4/6] Fix the TestLocalTZOffset test --- tests/test_timezone_parser.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_timezone_parser.py b/tests/test_timezone_parser.py index 82b853cb6..d837efc64 100644 --- a/tests/test_timezone_parser.py +++ b/tests/test_timezone_parser.py @@ -157,9 +157,7 @@ def given_time(self, utc_dt_string, local_dt_string): datetime_cls = dateparser.timezone_parser.datetime if not isinstance(datetime_cls, Mock): datetime_cls = Mock(wraps=datetime) - utc_dt_obj = datetime.strptime(utc_dt_string, "%Y-%m-%d %H:%M").astimezone( - dt.timezone.utc - ) + utc_dt_obj = datetime.strptime(utc_dt_string, "%Y-%m-%d %H:%M").replace(tzinfo=dt.timezone.utc) local_dt_obj = datetime.strptime(local_dt_string, "%Y-%m-%d %H:%M") def _dt_now(tz=None): From e7a8127e650ec4620f8100ded7b4568282ee26f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Fri, 31 Jan 2025 12:14:49 +0100 Subject: [PATCH 5/6] Setup tests for minimum deps --- .github/workflows/main.yml | 7 +++++++ setup.py | 15 +++++++-------- tox.ini | 24 +++++++++++++++++++++--- 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 839e30cd3..d3977cf1e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,11 +16,18 @@ jobs: include: - python-version: "3.13" toxenv: pre-commit + - python-version: "3.9" + toxenv: min + - python-version: "3.9" + toxenv: min-all - python-version: "3.9" - python-version: "3.10" - python-version: "3.11" - python-version: "3.12" - python-version: "3.13" + toxenv: all + - python-version: "3.13" + toxenv: scripts - python-version: "3.12" # Keep in sync with tox.ini toxenv: docs - python-version: "3.13" diff --git a/setup.py b/setup.py index c4bc21406..edab943b3 100644 --- a/setup.py +++ b/setup.py @@ -29,19 +29,18 @@ packages=find_packages(exclude=("tests", "tests.*")), include_package_data=True, install_requires=[ - "python-dateutil", - "pytz", - # https://bitbucket.org/mrabarnett/mrab-regex/issues/314/import-error-no-module-named - "regex !=2019.02.19,!=2021.8.27", - "tzlocal", + "python-dateutil>=2.7.0", + "pytz>=2024.2", + "regex>=2015.06.24,!=2019.02.19,!=2021.8.27", + "tzlocal>=0.2", ], entry_points={ "console_scripts": ["dateparser-download = dateparser_cli.cli:entrance"], }, extras_require={ - "calendars": ["hijridate", "convertdate"], - "fasttext": ["fasttext", "numpy<2"], - "langdetect": ["langdetect"], + "calendars": ["convertdate>=2.2.1", "hijridate"], + "fasttext": ["fasttext>=0.9.1", "numpy>=1.19.3,<2"], + "langdetect": ["langdetect>=1.0.0"], }, license="BSD", zip_safe=False, diff --git a/tox.ini b/tox.ini index eb3f4bfaa..312b4b3ae 100644 --- a/tox.ini +++ b/tox.ini @@ -1,7 +1,5 @@ [tox] -; envlist = pre-commit,min,min-all,py39,py310,py311,py312,py313,all,scripts,docs,twinedeck -; TODO: Setup min envs -envlist = pre-commit,py39,py310,py311,py312,py313,all,scripts,docs,twinedeck +envlist = pre-commit,min,min-all,py39,py310,py311,py312,py313,all,scripts,docs,twinedeck [base] deps = @@ -23,6 +21,26 @@ extras = fasttext langdetect +[testenv:min] +basepython = python3.9 +deps = + {[testenv]deps} + python-dateutil==2.7.0 + pytz==2024.2 + regex==2015.06.24 + tzlocal==0.2 + +[testenv:min-all] +basepython = {[testenv:min]basepython} +extras = {[testenv:all]extras} +deps = + {[testenv:min]deps} + convertdate==2.2.1 + fasttext==0.9.1 + hijridate==2.3.0 + langdetect==1.0.0 + numpy==1.19.3 + [testenv:scripts] deps = {[base]deps} From 2d79a4e13859b96918edd0bbb142e33f9aeca168 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Fri, 31 Jan 2025 12:18:44 +0100 Subject: [PATCH 6/6] Run pre-commit --- tests/test_timezone_parser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_timezone_parser.py b/tests/test_timezone_parser.py index d837efc64..5279fa622 100644 --- a/tests/test_timezone_parser.py +++ b/tests/test_timezone_parser.py @@ -157,7 +157,9 @@ def given_time(self, utc_dt_string, local_dt_string): datetime_cls = dateparser.timezone_parser.datetime if not isinstance(datetime_cls, Mock): datetime_cls = Mock(wraps=datetime) - utc_dt_obj = datetime.strptime(utc_dt_string, "%Y-%m-%d %H:%M").replace(tzinfo=dt.timezone.utc) + utc_dt_obj = datetime.strptime(utc_dt_string, "%Y-%m-%d %H:%M").replace( + tzinfo=dt.timezone.utc + ) local_dt_obj = datetime.strptime(local_dt_string, "%Y-%m-%d %H:%M") def _dt_now(tz=None):