diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..cc6b873 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,2 @@ +# Apply black format +627f3bd9ea5210f40dbd5697eff9351bb5af019c diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ddb5115..e701aca 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -3,6 +3,15 @@ on: - pull_request - push jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + with: + python-version: "3.11" + - uses: pre-commit/action@v3.0.0 + tests: runs-on: ubuntu-latest strategy: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..57fee0e --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,24 @@ +default_language_version: + python: python3.10 +repos: + - hooks: + - id: black + language_version: python3 + repo: https://github.com/ambv/black + rev: 23.3.0 + - hooks: + - id: isort + language_version: python3 + repo: https://github.com/PyCQA/isort + rev: 5.12.0 + - hooks: + - id: flake8 + language_version: python3 + additional_dependencies: + - flake8-bugbear + - flake8-comprehensions + - flake8-debugger + - flake8-docstrings + - flake8-string-format + repo: https://github.com/pycqa/flake8 + rev: 6.0.0 diff --git a/README.rst b/README.rst index 81751ad..a33b041 100644 --- a/README.rst +++ b/README.rst @@ -73,4 +73,5 @@ All contributions are welcome! * File an `issue here `_, if there isn't one yet * Fork this repository * Create a branch to work on your changes + * Run `pre-commit install` to install pre-commit hooks * Push your local branch and submit a Pull Request diff --git a/docs/_ext/__init__.py b/docs/_ext/__init__.py index 8b13789..e69de29 100644 --- a/docs/_ext/__init__.py +++ b/docs/_ext/__init__.py @@ -1 +0,0 @@ - diff --git a/docs/_ext/github.py b/docs/_ext/github.py index e1adcfc..00f8783 100644 --- a/docs/_ext/github.py +++ b/docs/_ext/github.py @@ -1,19 +1,31 @@ +from typing import Optional + from docutils import nodes from docutils.parsers.rst.roles import set_classes def setup(app): - app.add_role('gh', github_role) + app.add_role("gh", github_role) -def github_role(name, rawtext, text, lineno, inliner, options={}, content=[]): +def github_role( + name, + rawtext, + text, + lineno, + inliner, + options: Optional[dict] = None, + content: Optional[list] = None, +): + options = options or {} + content = content or [] if text.isdigit(): - display_text = f'#{text}' - url = f'https://github.com/scrapy/itemloaders/issues/{text}' + display_text = f"#{text}" + url = f"https://github.com/scrapy/itemloaders/issues/{text}" else: short_commit = text[:7] display_text = short_commit - url = f'https://github.com/scrapy/itemloaders/commit/{short_commit}' + url = f"https://github.com/scrapy/itemloaders/commit/{short_commit}" set_classes(options) node = nodes.reference(rawtext, display_text, refuri=url, **options) diff --git a/docs/conf.py b/docs/conf.py index 9713489..862dde8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -15,6 +15,8 @@ from datetime import datetime from os import path +import sphinx_rtd_theme + # If your extensions are in another directory, add it here. If the directory # is relative to the documentation root, use os.path.abspath to make it # absolute, like shown here. @@ -27,72 +29,72 @@ # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ - '_ext.github', - 'sphinx.ext.autodoc', - 'sphinx.ext.coverage', - 'sphinx.ext.intersphinx', - 'sphinx.ext.viewcode', + "_ext.github", + "sphinx.ext.autodoc", + "sphinx.ext.coverage", + "sphinx.ext.intersphinx", + "sphinx.ext.viewcode", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8' +# source_encoding = 'utf-8' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'itemloaders' -copyright = '2020–{}, Zyte Group Ltd'.format(datetime.now().year) +project = "itemloaders" +copyright = "2020–{}, Zyte Group Ltd".format(datetime.now().year) # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '' -release = '' +version = "" +release = "" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -language = 'en' +language = "en" # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of documents that shouldn't be included in the build. -#unused_docs = [] +# unused_docs = [] -exclude_patterns = ['build'] +exclude_patterns = ["build"] # List of directories, relative to source directory, that shouldn't be searched # for source files. -exclude_trees = ['.build'] +exclude_trees = [".build"] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # Options for HTML output @@ -100,17 +102,17 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. # Add path to the RTD explicitly to robustify builds (otherwise might # fail in a clean Debian build env) -import sphinx_rtd_theme + html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] @@ -121,19 +123,19 @@ # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -142,23 +144,23 @@ # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -html_last_updated_fmt = '%b %d, %Y' +html_last_updated_fmt = "%b %d, %Y" # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_use_modindex = True +# html_use_modindex = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, the reST sources are included in the HTML build as _sources/. html_copy_source = True @@ -166,68 +168,68 @@ # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = '' +# html_file_suffix = '' # Output file base name for HTML help builder. -htmlhelp_basename = 'itemloadersdoc' +htmlhelp_basename = "itemloadersdoc" # Options for LaTeX output # ------------------------ # The paper size ('letter' or 'a4'). -#latex_paper_size = 'letter' +# latex_paper_size = 'letter' # The font size ('10pt', '11pt' or '12pt'). -#latex_font_size = '10pt' +# latex_font_size = '10pt' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, document class [howto/manual]). latex_documents = [ - ('index', 'itemloaders.tex', 'itemloaders Documentation', 'Zyte', 'manual'), + ("index", "itemloaders.tex", "itemloaders Documentation", "Zyte", "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # Additional stuff for the LaTeX preamble. -#latex_preamble = '' +# latex_preamble = '' # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_use_modindex = True +# latex_use_modindex = True # autodocs def setup(app): - app.connect('autodoc-skip-member', maybe_skip_member) + app.connect("autodoc-skip-member", maybe_skip_member) def maybe_skip_member(app, what, name, obj, skip, options): if not skip: # autodocs was generating a text "alias of" for the following members # https://github.com/sphinx-doc/sphinx/issues/4422 - return name in {'default_item_class', 'default_selector_class'} + return name in {"default_item_class", "default_selector_class"} return skip nitpicky = True intersphinx_mapping = { - 'parsel': ('https://parsel.readthedocs.io/en/stable/', None), - 'python': ('https://docs.python.org/3', None), - 'scrapy': ('https://docs.scrapy.org/en/latest/', None), - 'w3lib': ('https://w3lib.readthedocs.io/en/latest', None), + "parsel": ("https://parsel.readthedocs.io/en/stable/", None), + "python": ("https://docs.python.org/3", None), + "scrapy": ("https://docs.scrapy.org/en/latest/", None), + "w3lib": ("https://w3lib.readthedocs.io/en/latest", None), } diff --git a/itemloaders/__init__.py b/itemloaders/__init__.py index a84da84..deb80f4 100644 --- a/itemloaders/__init__.py +++ b/itemloaders/__init__.py @@ -245,7 +245,7 @@ def get_value(self, value, *processors, re=None, **kw): raise ValueError( "Error with processor %s value=%r error='%s: %s'" % (_proc.__class__.__name__, value, type(e).__name__, str(e)) - ) + ) from e return value def load_item(self): @@ -276,7 +276,7 @@ def get_output_value(self, field_name): raise ValueError( "Error with output processor: field=%r value=%r error='%s: %s'" % (field_name, value, type(e).__name__, str(e)) - ) + ) from e def get_collected_values(self, field_name): """Return the collected values for the given field.""" @@ -319,7 +319,7 @@ def _process_input_value(self, field_name, value): type(e).__name__, str(e), ) - ) + ) from e def _check_selector_method(self): if self.selector is None: diff --git a/itemloaders/common.py b/itemloaders/common.py index 4fc24c6..6c0b7fa 100644 --- a/itemloaders/common.py +++ b/itemloaders/common.py @@ -1,6 +1,7 @@ """Common functions used in Item Loaders code""" from functools import partial + from itemloaders.utils import get_func_args @@ -8,7 +9,7 @@ def wrap_loader_context(function, context): """Wrap functions that receive loader_context to contain the context "pre-loaded" and expose a interface that receives only one argument """ - if 'loader_context' in get_func_args(function): + if "loader_context" in get_func_args(function): return partial(function, loader_context=context) else: return function diff --git a/itemloaders/processors.py b/itemloaders/processors.py index 4bf3440..c4aa039 100644 --- a/itemloaders/processors.py +++ b/itemloaders/processors.py @@ -5,8 +5,8 @@ """ from collections import ChainMap -from itemloaders.utils import arg_to_iter from itemloaders.common import wrap_loader_context +from itemloaders.utils import arg_to_iter class MapCompose: @@ -51,7 +51,7 @@ class MapCompose: See :class:`Compose` processor for more info. .. _`parsel selectors`: https://parsel.readthedocs.io/en/latest/parsel.html#parsel.selector.Selector.extract - """ + """ # noqa def __init__(self, *functions, **default_loader_context): self.functions = functions @@ -70,10 +70,11 @@ def __call__(self, value, loader_context=None): try: next_values += arg_to_iter(func(v)) except Exception as e: - raise ValueError("Error in MapCompose with " - "%s value=%r error='%s: %s'" % - (str(func), value, type(e).__name__, - str(e))) + raise ValueError( + "Error in MapCompose with " + "%s value=%r error='%s: %s'" + % (str(func), value, type(e).__name__, str(e)) + ) from e values = next_values return values @@ -109,7 +110,7 @@ class Compose: def __init__(self, *functions, **default_loader_context): self.functions = functions - self.stop_on_none = default_loader_context.get('stop_on_none', True) + self.stop_on_none = default_loader_context.get("stop_on_none", True) self.default_loader_context = default_loader_context def __call__(self, value, loader_context=None): @@ -124,9 +125,11 @@ def __call__(self, value, loader_context=None): try: value = func(value) except Exception as e: - raise ValueError("Error in Compose with " - "%s value=%r error='%s: %s'" % - (str(func), value, type(e).__name__, str(e))) + raise ValueError( + "Error in Compose with " + "%s value=%r error='%s: %s'" + % (str(func), value, type(e).__name__, str(e)) + ) from e return value @@ -146,7 +149,7 @@ class TakeFirst: def __call__(self, values): for value in values: - if value is not None and value != '': + if value is not None and value != "": return value @@ -197,6 +200,7 @@ class SelectJmes: def __init__(self, json_path): self.json_path = json_path import jmespath + self.compiled_path = jmespath.compile(self.json_path) def __call__(self, value): @@ -226,7 +230,7 @@ class Join: 'one
two
three' """ - def __init__(self, separator=' '): + def __init__(self, separator=" "): self.separator = separator def __call__(self, values): diff --git a/itemloaders/utils.py b/itemloaders/utils.py index 85570a6..361814f 100644 --- a/itemloaders/utils.py +++ b/itemloaders/utils.py @@ -7,7 +7,6 @@ from itemadapter import is_item - _ITERABLE_SINGLE_VALUES = str, bytes @@ -20,7 +19,7 @@ def arg_to_iter(arg): if arg is None: return [] elif ( - hasattr(arg, '__iter__') + hasattr(arg, "__iter__") and not isinstance(arg, _ITERABLE_SINGLE_VALUES) and not is_item(arg) ): @@ -32,7 +31,7 @@ def arg_to_iter(arg): def get_func_args(func, stripself=False): """Return the argument name list of a callable object""" if not callable(func): - raise TypeError(f"func must be callable, got '{type(func).__name__}'") + raise TypeError(f"func must be callable, got {type(func).__name__!r}") args = [] try: diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index c17e1cf..0000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,7 +0,0 @@ -w3lib>=1.21.0 -parsel>=1.5.2 -jmespath>=0.9.5 -itemadapter>=0.1.0 - -pytest==5.4.1 -flake8==3.7.9 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index ab1cdc8..6e8d795 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,4 +2,7 @@ ignore = E266, E501, W503 max-line-length = 100 select = B,C,E,F,W,T4,B9 -exclude = .git,__pycache__,.venv \ No newline at end of file +exclude = .git,__pycache__,.venv + +[isort] +profile = black diff --git a/setup.py b/setup.py index 56135c1..f750571 100644 --- a/setup.py +++ b/setup.py @@ -1,48 +1,48 @@ -from setuptools import setup, find_packages +from setuptools import find_packages, setup -with open('README.rst') as f: +with open("README.rst") as f: long_description = f.read() setup( - name='itemloaders', - version='1.0.6', - url='https://github.com/scrapy/itemloaders', + name="itemloaders", + version="1.0.6", + url="https://github.com/scrapy/itemloaders", project_urls={ - 'Documentation': 'https://itemloaders.readthedocs.io/', - 'Source': 'https://github.com/scrapy/itemloaders', + "Documentation": "https://itemloaders.readthedocs.io/", + "Source": "https://github.com/scrapy/itemloaders", }, description="Base library for scrapy's ItemLoader", long_description=long_description, long_description_content_type="text/x-rst", - author='Zyte', - author_email='opensource@zyte.com', - license='BSD', - packages=find_packages(exclude=('tests', 'tests.*')), + author="Zyte", + author_email="opensource@zyte.com", + license="BSD", + packages=find_packages(exclude=("tests", "tests.*")), include_package_data=True, zip_safe=False, classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: BSD License', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy', + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", ], - python_requires='>=3.7', + python_requires=">=3.7", install_requires=[ # before updating these versions, be sure they are not higher than # scrapy's requirements - 'w3lib>=1.17.0', - 'parsel>=1.5.0', - 'jmespath>=0.9.5', - 'itemadapter>=0.1.0', + "w3lib>=1.17.0", + "parsel>=1.5.0", + "jmespath>=0.9.5", + "itemadapter>=0.1.0", ], # extras_require=extras_require, ) diff --git a/tests/test_base_loader.py b/tests/test_base_loader.py index b4ed396..c0bf007 100644 --- a/tests/test_base_loader.py +++ b/tests/test_base_loader.py @@ -1,5 +1,5 @@ -from functools import partial import unittest +from functools import partial from itemloaders import ItemLoader from itemloaders.processors import Compose, Identity, Join, MapCompose, TakeFirst @@ -15,27 +15,26 @@ class DefaultedItemLoader(ItemLoader): # test processors def processor_with_args(value, other=None, loader_context=None): - if 'key' in loader_context: - return loader_context['key'] + if "key" in loader_context: + return loader_context["key"] return value class BasicItemLoaderTest(unittest.TestCase): - def test_load_item_using_default_loader(self): - i = dict(summary='lala') + i = {"summary": "lala"} il = ItemLoader(item=i) - il.add_value('name', 'marta') + il.add_value("name", "marta") item = il.load_item() assert item is i - assert item['summary'] == ['lala'] - assert item['name'] == ['marta'] + assert item["summary"] == ["lala"] + assert item["name"] == ["marta"] def test_load_item_using_custom_loader(self): il = CustomItemLoader() - il.add_value('name', 'marta') + il.add_value("name", "marta") item = il.load_item() - assert item['name'] == ['Marta'] + assert item["name"] == ["Marta"] def test_load_item_ignore_none_field_values(self): def validate_sku(value): @@ -48,227 +47,239 @@ class MyLoader(ItemLoader): price_out = Compose(TakeFirst(), float) sku_out = Compose(TakeFirst(), validate_sku) - valid_fragment = 'SKU: 1234' - invalid_fragment = 'SKU: not available' - sku_re = 'SKU: (.+)' + valid_fragment = "SKU: 1234" + invalid_fragment = "SKU: not available" + sku_re = "SKU: (.+)" il = MyLoader(item={}) # Should not return "sku: None". - il.add_value('sku', [invalid_fragment], re=sku_re) + il.add_value("sku", [invalid_fragment], re=sku_re) # Should not ignore empty values. - il.add_value('name', '') - il.add_value('price', ['0']) - assert il.load_item() == {'name': '', 'price': 0.0} + il.add_value("name", "") + il.add_value("price", ["0"]) + assert il.load_item() == {"name": "", "price": 0.0} - il.replace_value('sku', [valid_fragment], re=sku_re) - self.assertEqual(il.load_item()['sku'], '1234') + il.replace_value("sku", [valid_fragment], re=sku_re) + self.assertEqual(il.load_item()["sku"], "1234") def test_self_referencing_loader(self): class MyLoader(ItemLoader): url_out = TakeFirst() def img_url_out(self, values): - return (self.get_output_value('url') or '') + values[0] + return (self.get_output_value("url") or "") + values[0] il = MyLoader(item={}) - il.add_value('url', 'http://example.com/') - il.add_value('img_url', '1234.png') + il.add_value("url", "http://example.com/") + il.add_value("img_url", "1234.png") assert il.load_item() == { - 'url': 'http://example.com/', - 'img_url': 'http://example.com/1234.png', + "url": "http://example.com/", + "img_url": "http://example.com/1234.png", } il = MyLoader(item={}) - il.add_value('img_url', '1234.png') - assert il.load_item() == {'img_url': '1234.png'} + il.add_value("img_url", "1234.png") + assert il.load_item() == {"img_url": "1234.png"} def test_add_value(self): il = CustomItemLoader() - il.add_value('name', 'marta') - assert il.get_collected_values('name') == ['Marta'] - assert il.get_output_value('name') == ['Marta'] + il.add_value("name", "marta") + assert il.get_collected_values("name") == ["Marta"] + assert il.get_output_value("name") == ["Marta"] - il.add_value('name', 'pepe') - assert il.get_collected_values('name') == ['Marta', 'Pepe'] - assert il.get_output_value('name') == ['Marta', 'Pepe'] + il.add_value("name", "pepe") + assert il.get_collected_values("name") == ["Marta", "Pepe"] + assert il.get_output_value("name") == ["Marta", "Pepe"] # test add object value - il.add_value('summary', {'key': 1}) - assert il.get_collected_values('summary') == [{'key': 1}] + il.add_value("summary", {"key": 1}) + assert il.get_collected_values("summary") == [{"key": 1}] - il.add_value(None, 'Jim', lambda x: {'name': x}) - assert il.get_collected_values('name') == ['Marta', 'Pepe', 'Jim'] + il.add_value(None, "Jim", lambda x: {"name": x}) + assert il.get_collected_values("name") == ["Marta", "Pepe", "Jim"] def test_add_zero(self): il = ItemLoader() - il.add_value('name', 0) - assert il.get_collected_values('name') == [0] + il.add_value("name", 0) + assert il.get_collected_values("name") == [0] def test_add_none(self): il = ItemLoader() - il.add_value('name', None) - assert il.get_collected_values('name') == [] + il.add_value("name", None) + assert il.get_collected_values("name") == [] def test_replace_value(self): il = CustomItemLoader() - il.replace_value('name', 'marta') - self.assertEqual(il.get_collected_values('name'), ['Marta']) - self.assertEqual(il.get_output_value('name'), ['Marta']) - il.replace_value('name', 'pepe') - self.assertEqual(il.get_collected_values('name'), ['Pepe']) - self.assertEqual(il.get_output_value('name'), ['Pepe']) + il.replace_value("name", "marta") + self.assertEqual(il.get_collected_values("name"), ["Marta"]) + self.assertEqual(il.get_output_value("name"), ["Marta"]) + il.replace_value("name", "pepe") + self.assertEqual(il.get_collected_values("name"), ["Pepe"]) + self.assertEqual(il.get_output_value("name"), ["Pepe"]) - il.replace_value(None, 'Jim', lambda x: {'name': x}) - self.assertEqual(il.get_collected_values('name'), ['Jim']) + il.replace_value(None, "Jim", lambda x: {"name": x}) + self.assertEqual(il.get_collected_values("name"), ["Jim"]) def test_replace_value_none(self): il = CustomItemLoader() - il.replace_value('name', None) - self.assertEqual(il.get_collected_values('name'), []) - il.replace_value('name', 'marta') - self.assertEqual(il.get_collected_values('name'), ['Marta']) - il.replace_value('name', None) # when replacing with `None` nothing should happen - self.assertEqual(il.get_collected_values('name'), ['Marta']) + il.replace_value("name", None) + self.assertEqual(il.get_collected_values("name"), []) + il.replace_value("name", "marta") + self.assertEqual(il.get_collected_values("name"), ["Marta"]) + il.replace_value( + "name", None + ) # when replacing with `None` nothing should happen + self.assertEqual(il.get_collected_values("name"), ["Marta"]) def test_get_value(self): il = ItemLoader() - self.assertEqual('FOO', il.get_value(['foo', 'bar'], TakeFirst(), str.upper)) - self.assertEqual(['foo', 'bar'], il.get_value(['name:foo', 'name:bar'], re='name:(.*)$')) - self.assertEqual('foo', il.get_value(['name:foo', 'name:bar'], TakeFirst(), re='name:(.*)$')) - self.assertEqual(None, il.get_value(['foo', 'bar'], TakeFirst(), re='name:(.*)$')) + self.assertEqual("FOO", il.get_value(["foo", "bar"], TakeFirst(), str.upper)) + self.assertEqual( + ["foo", "bar"], il.get_value(["name:foo", "name:bar"], re="name:(.*)$") + ) + self.assertEqual( + "foo", il.get_value(["name:foo", "name:bar"], TakeFirst(), re="name:(.*)$") + ) + self.assertEqual( + None, il.get_value(["foo", "bar"], TakeFirst(), re="name:(.*)$") + ) self.assertEqual(None, il.get_value(None, TakeFirst())) - il.add_value('name', ['name:foo', 'name:bar'], TakeFirst(), re='name:(.*)$') - self.assertEqual(['foo'], il.get_collected_values('name')) - il.replace_value('name', 'name:bar', re='name:(.*)$') - self.assertEqual(['bar'], il.get_collected_values('name')) + il.add_value("name", ["name:foo", "name:bar"], TakeFirst(), re="name:(.*)$") + self.assertEqual(["foo"], il.get_collected_values("name")) + il.replace_value("name", "name:bar", re="name:(.*)$") + self.assertEqual(["bar"], il.get_collected_values("name")) def test_iter_on_input_processor_input(self): class NameFirstItemLoader(ItemLoader): name_in = TakeFirst() il = NameFirstItemLoader() - il.add_value('name', 'marta') - self.assertEqual(il.get_collected_values('name'), ['marta']) + il.add_value("name", "marta") + self.assertEqual(il.get_collected_values("name"), ["marta"]) il = NameFirstItemLoader() - il.add_value('name', ['marta', 'jose']) - self.assertEqual(il.get_collected_values('name'), ['marta']) + il.add_value("name", ["marta", "jose"]) + self.assertEqual(il.get_collected_values("name"), ["marta"]) il = NameFirstItemLoader() - il.replace_value('name', 'marta') - self.assertEqual(il.get_collected_values('name'), ['marta']) + il.replace_value("name", "marta") + self.assertEqual(il.get_collected_values("name"), ["marta"]) il = NameFirstItemLoader() - il.replace_value('name', ['marta', 'jose']) - self.assertEqual(il.get_collected_values('name'), ['marta']) + il.replace_value("name", ["marta", "jose"]) + self.assertEqual(il.get_collected_values("name"), ["marta"]) il = NameFirstItemLoader() - il.add_value('name', 'marta') - il.add_value('name', ['jose', 'pedro']) - self.assertEqual(il.get_collected_values('name'), ['marta', 'jose']) + il.add_value("name", "marta") + il.add_value("name", ["jose", "pedro"]) + self.assertEqual(il.get_collected_values("name"), ["marta", "jose"]) def test_map_compose_filter(self): def filter_world(x): - return None if x == 'world' else x + return None if x == "world" else x proc = MapCompose(filter_world, str.upper) - self.assertEqual(proc(['hello', 'world', 'this', 'is', 'scrapy']), - ['HELLO', 'THIS', 'IS', 'SCRAPY']) + self.assertEqual( + proc(["hello", "world", "this", "is", "scrapy"]), + ["HELLO", "THIS", "IS", "SCRAPY"], + ) def test_map_compose_filter_multil(self): class CustomItemLoader(ItemLoader): name_in = MapCompose(lambda v: v.title(), lambda v: v[:-1]) il = CustomItemLoader() - il.add_value('name', 'marta') - self.assertEqual(il.get_output_value('name'), ['Mart']) + il.add_value("name", "marta") + self.assertEqual(il.get_output_value("name"), ["Mart"]) item = il.load_item() - self.assertEqual(item['name'], ['Mart']) + self.assertEqual(item["name"], ["Mart"]) def test_default_input_processor(self): il = DefaultedItemLoader() - il.add_value('name', 'marta') - self.assertEqual(il.get_output_value('name'), ['mart']) + il.add_value("name", "marta") + self.assertEqual(il.get_output_value("name"), ["mart"]) def test_inherited_default_input_processor(self): class InheritDefaultedItemLoader(DefaultedItemLoader): pass il = InheritDefaultedItemLoader() - il.add_value('name', 'marta') - self.assertEqual(il.get_output_value('name'), ['mart']) + il.add_value("name", "marta") + self.assertEqual(il.get_output_value("name"), ["mart"]) def test_input_processor_inheritance(self): class ChildItemLoader(CustomItemLoader): url_in = MapCompose(lambda v: v.lower()) il = ChildItemLoader() - il.add_value('url', 'HTTP://scrapy.ORG') - self.assertEqual(il.get_output_value('url'), ['http://scrapy.org']) - il.add_value('name', 'marta') - self.assertEqual(il.get_output_value('name'), ['Marta']) + il.add_value("url", "HTTP://scrapy.ORG") + self.assertEqual(il.get_output_value("url"), ["http://scrapy.org"]) + il.add_value("name", "marta") + self.assertEqual(il.get_output_value("name"), ["Marta"]) class ChildChildItemLoader(ChildItemLoader): url_in = MapCompose(lambda v: v.upper()) summary_in = MapCompose(lambda v: v) il = ChildChildItemLoader() - il.add_value('url', 'http://scrapy.org') - self.assertEqual(il.get_output_value('url'), ['HTTP://SCRAPY.ORG']) - il.add_value('name', 'marta') - self.assertEqual(il.get_output_value('name'), ['Marta']) + il.add_value("url", "http://scrapy.org") + self.assertEqual(il.get_output_value("url"), ["HTTP://SCRAPY.ORG"]) + il.add_value("name", "marta") + self.assertEqual(il.get_output_value("name"), ["Marta"]) def test_empty_map_compose(self): class IdentityDefaultedItemLoader(DefaultedItemLoader): name_in = MapCompose() il = IdentityDefaultedItemLoader() - il.add_value('name', 'marta') - self.assertEqual(il.get_output_value('name'), ['marta']) + il.add_value("name", "marta") + self.assertEqual(il.get_output_value("name"), ["marta"]) def test_identity_input_processor(self): class IdentityDefaultedItemLoader(DefaultedItemLoader): name_in = Identity() il = IdentityDefaultedItemLoader() - il.add_value('name', 'marta') - self.assertEqual(il.get_output_value('name'), ['marta']) + il.add_value("name", "marta") + self.assertEqual(il.get_output_value("name"), ["marta"]) def test_extend_custom_input_processors(self): class ChildItemLoader(CustomItemLoader): name_in = MapCompose(CustomItemLoader.name_in, str.swapcase) il = ChildItemLoader() - il.add_value('name', 'marta') - self.assertEqual(il.get_output_value('name'), ['mARTA']) + il.add_value("name", "marta") + self.assertEqual(il.get_output_value("name"), ["mARTA"]) def test_extend_default_input_processors(self): class ChildDefaultedItemLoader(DefaultedItemLoader): - name_in = MapCompose(DefaultedItemLoader.default_input_processor, str.swapcase) + name_in = MapCompose( + DefaultedItemLoader.default_input_processor, str.swapcase + ) il = ChildDefaultedItemLoader() - il.add_value('name', 'marta') - self.assertEqual(il.get_output_value('name'), ['MART']) + il.add_value("name", "marta") + self.assertEqual(il.get_output_value("name"), ["MART"]) def test_output_processor_using_function(self): il = CustomItemLoader() - il.add_value('name', ['mar', 'ta']) - self.assertEqual(il.get_output_value('name'), ['Mar', 'Ta']) + il.add_value("name", ["mar", "ta"]) + self.assertEqual(il.get_output_value("name"), ["Mar", "Ta"]) class TakeFirstItemLoader(CustomItemLoader): - name_out = u" ".join + name_out = " ".join il = TakeFirstItemLoader() - il.add_value('name', ['mar', 'ta']) - self.assertEqual(il.get_output_value('name'), 'Mar Ta') + il.add_value("name", ["mar", "ta"]) + self.assertEqual(il.get_output_value("name"), "Mar Ta") def test_output_processor_error(self): class CustomItemLoader(ItemLoader): name_out = MapCompose(float) il = CustomItemLoader() - il.add_value('name', ['$10']) + il.add_value("name", ["$10"]) try: - float('$10') + float("$10") except Exception as e: expected_exc_str = str(e) @@ -279,86 +290,86 @@ class CustomItemLoader(ItemLoader): exc = e assert isinstance(exc, ValueError) s = str(exc) - assert 'name' in s, s - assert '$10' in s, s - assert 'ValueError' in s, s + assert "name" in s, s + assert "$10" in s, s + assert "ValueError" in s, s assert expected_exc_str in s, s def test_output_processor_using_classes(self): il = CustomItemLoader() - il.add_value('name', ['mar', 'ta']) - self.assertEqual(il.get_output_value('name'), ['Mar', 'Ta']) + il.add_value("name", ["mar", "ta"]) + self.assertEqual(il.get_output_value("name"), ["Mar", "Ta"]) class TakeFirstItemLoader(CustomItemLoader): name_out = Join() il = TakeFirstItemLoader() - il.add_value('name', ['mar', 'ta']) - self.assertEqual(il.get_output_value('name'), 'Mar Ta') + il.add_value("name", ["mar", "ta"]) + self.assertEqual(il.get_output_value("name"), "Mar Ta") class TakeFirstItemLoader(CustomItemLoader): name_out = Join("
") il = TakeFirstItemLoader() - il.add_value('name', ['mar', 'ta']) - self.assertEqual(il.get_output_value('name'), 'Mar
Ta') + il.add_value("name", ["mar", "ta"]) + self.assertEqual(il.get_output_value("name"), "Mar
Ta") def test_default_output_processor(self): il = CustomItemLoader() - il.add_value('name', ['mar', 'ta']) - self.assertEqual(il.get_output_value('name'), ['Mar', 'Ta']) + il.add_value("name", ["mar", "ta"]) + self.assertEqual(il.get_output_value("name"), ["Mar", "Ta"]) class LalaItemLoader(CustomItemLoader): default_output_processor = Identity() il = LalaItemLoader() - il.add_value('name', ['mar', 'ta']) - self.assertEqual(il.get_output_value('name'), ['Mar', 'Ta']) + il.add_value("name", ["mar", "ta"]) + self.assertEqual(il.get_output_value("name"), ["Mar", "Ta"]) def test_loader_context_on_declaration(self): class ChildItemLoader(CustomItemLoader): - url_in = MapCompose(processor_with_args, key='val') + url_in = MapCompose(processor_with_args, key="val") il = ChildItemLoader() - il.add_value('url', 'text') - self.assertEqual(il.get_output_value('url'), ['val']) - il.replace_value('url', 'text2') - self.assertEqual(il.get_output_value('url'), ['val']) + il.add_value("url", "text") + self.assertEqual(il.get_output_value("url"), ["val"]) + il.replace_value("url", "text2") + self.assertEqual(il.get_output_value("url"), ["val"]) def test_loader_context_on_instantiation(self): class ChildItemLoader(CustomItemLoader): url_in = MapCompose(processor_with_args) - il = ChildItemLoader(key='val') - il.add_value('url', 'text') - self.assertEqual(il.get_output_value('url'), ['val']) - il.replace_value('url', 'text2') - self.assertEqual(il.get_output_value('url'), ['val']) + il = ChildItemLoader(key="val") + il.add_value("url", "text") + self.assertEqual(il.get_output_value("url"), ["val"]) + il.replace_value("url", "text2") + self.assertEqual(il.get_output_value("url"), ["val"]) def test_loader_context_on_assign(self): class ChildItemLoader(CustomItemLoader): url_in = MapCompose(processor_with_args) il = ChildItemLoader() - il.context['key'] = 'val' - il.add_value('url', 'text') - self.assertEqual(il.get_output_value('url'), ['val']) - il.replace_value('url', 'text2') - self.assertEqual(il.get_output_value('url'), ['val']) + il.context["key"] = "val" + il.add_value("url", "text") + self.assertEqual(il.get_output_value("url"), ["val"]) + il.replace_value("url", "text2") + self.assertEqual(il.get_output_value("url"), ["val"]) def test_item_passed_to_input_processor_functions(self): def processor(value, loader_context): - return loader_context['item']['name'] + return loader_context["item"]["name"] class ChildItemLoader(CustomItemLoader): url_in = MapCompose(processor) - it = dict(name='marta') + it = {"name": "marta"} il = ChildItemLoader(item=it) - il.add_value('url', 'text') - self.assertEqual(il.get_output_value('url'), ['marta']) - il.replace_value('url', 'text2') - self.assertEqual(il.get_output_value('url'), ['marta']) + il.add_value("url", "text") + self.assertEqual(il.get_output_value("url"), ["marta"]) + il.replace_value("url", "text2") + self.assertEqual(il.get_output_value("url"), ["marta"]) # def test_add_value_on_unknown_field(self): # il = CustomItemLoader() @@ -369,60 +380,60 @@ class CustomItemLoader(ItemLoader): name_out = Compose(lambda v: v[0], lambda v: v.title(), lambda v: v[:-1]) il = CustomItemLoader() - il.add_value('name', ['marta', 'other']) - self.assertEqual(il.get_output_value('name'), 'Mart') + il.add_value("name", ["marta", "other"]) + self.assertEqual(il.get_output_value("name"), "Mart") item = il.load_item() - self.assertEqual(item['name'], 'Mart') + self.assertEqual(item["name"], "Mart") def test_partial_processor(self): def join(values, sep=None, loader_context=None, ignored=None): if sep is not None: return sep.join(values) - elif loader_context and 'sep' in loader_context: - return loader_context['sep'].join(values) + elif loader_context and "sep" in loader_context: + return loader_context["sep"].join(values) else: - return ''.join(values) + return "".join(values) class CustomItemLoader(ItemLoader): - name_out = Compose(partial(join, sep='+')) - url_out = Compose(partial(join, loader_context={'sep': '.'})) - summary_out = Compose(partial(join, ignored='foo')) + name_out = Compose(partial(join, sep="+")) + url_out = Compose(partial(join, loader_context={"sep": "."})) + summary_out = Compose(partial(join, ignored="foo")) il = CustomItemLoader() - il.add_value('name', ['rabbit', 'hole']) - il.add_value('url', ['rabbit', 'hole']) - il.add_value('summary', ['rabbit', 'hole']) + il.add_value("name", ["rabbit", "hole"]) + il.add_value("url", ["rabbit", "hole"]) + il.add_value("summary", ["rabbit", "hole"]) item = il.load_item() - self.assertEqual(item['name'], 'rabbit+hole') - self.assertEqual(item['url'], 'rabbit.hole') - self.assertEqual(item['summary'], 'rabbithole') + self.assertEqual(item["name"], "rabbit+hole") + self.assertEqual(item["url"], "rabbit.hole") + self.assertEqual(item["summary"], "rabbithole") def test_error_input_processor(self): class CustomItemLoader(ItemLoader): name_in = MapCompose(float) il = CustomItemLoader() - self.assertRaises(ValueError, il.add_value, 'name', - ['marta', 'other']) + self.assertRaises(ValueError, il.add_value, "name", ["marta", "other"]) def test_error_output_processor(self): class CustomItemLoader(ItemLoader): name_out = Compose(Join(), float) il = CustomItemLoader() - il.add_value('name', 'marta') + il.add_value("name", "marta") with self.assertRaises(ValueError): il.load_item() def test_error_processor_as_argument(self): il = CustomItemLoader() - self.assertRaises(ValueError, il.add_value, 'name', - ['marta', 'other'], Compose(float)) + self.assertRaises( + ValueError, il.add_value, "name", ["marta", "other"], Compose(float) + ) def test_get_unset_value(self): loader = ItemLoader() self.assertEqual(loader.load_item(), {}) - self.assertEqual(loader.get_output_value('foo'), []) + self.assertEqual(loader.get_output_value("foo"), []) self.assertEqual(loader.load_item(), {}) @@ -439,28 +450,37 @@ class NoInputReprocessingFromDictTest(unittest.TestCase): """ Loaders initialized from loaded items must not reprocess fields (dict instances) """ + def test_avoid_reprocessing_with_initial_values_single(self): - il = NoInputReprocessingDictLoader(item=dict(title='foo')) + il = NoInputReprocessingDictLoader(item={"title": "foo"}) il_loaded = il.load_item() - self.assertEqual(il_loaded, dict(title='foo')) - self.assertEqual(NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title='foo')) + self.assertEqual(il_loaded, {"title": "foo"}) + self.assertEqual( + NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "foo"} + ) def test_avoid_reprocessing_with_initial_values_list(self): - il = NoInputReprocessingDictLoader(item=dict(title=['foo', 'bar'])) + il = NoInputReprocessingDictLoader(item={"title": ["foo", "bar"]}) il_loaded = il.load_item() - self.assertEqual(il_loaded, dict(title='foo')) - self.assertEqual(NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title='foo')) + self.assertEqual(il_loaded, {"title": "foo"}) + self.assertEqual( + NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "foo"} + ) def test_avoid_reprocessing_without_initial_values_single(self): il = NoInputReprocessingDictLoader() - il.add_value('title', 'foo') + il.add_value("title", "foo") il_loaded = il.load_item() - self.assertEqual(il_loaded, dict(title='FOO')) - self.assertEqual(NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title='FOO')) + self.assertEqual(il_loaded, {"title": "FOO"}) + self.assertEqual( + NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "FOO"} + ) def test_avoid_reprocessing_without_initial_values_list(self): il = NoInputReprocessingDictLoader() - il.add_value('title', ['foo', 'bar']) + il.add_value("title", ["foo", "bar"]) il_loaded = il.load_item() - self.assertEqual(il_loaded, dict(title='FOO')) - self.assertEqual(NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title='FOO')) + self.assertEqual(il_loaded, {"title": "FOO"}) + self.assertEqual( + NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "FOO"} + ) diff --git a/tests/test_loader_initialization.py b/tests/test_loader_initialization.py index 7e8d51a..0f63253 100644 --- a/tests/test_loader_initialization.py +++ b/tests/test_loader_initialization.py @@ -4,90 +4,89 @@ class InitializationTestMixin: - item_class = None def test_keep_single_value(self): """Loaded item should contain values from the initial item""" - input_item = self.item_class(name='foo') + input_item = self.item_class(name="foo") il = ItemLoader(item=input_item) loaded_item = il.load_item() self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(dict(loaded_item), {'name': ['foo']}) + self.assertEqual(dict(loaded_item), {"name": ["foo"]}) def test_keep_list(self): """Loaded item should contain values from the initial item""" - input_item = self.item_class(name=['foo', 'bar']) + input_item = self.item_class(name=["foo", "bar"]) il = ItemLoader(item=input_item) loaded_item = il.load_item() self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar']}) + self.assertEqual(dict(loaded_item), {"name": ["foo", "bar"]}) def test_add_value_singlevalue_singlevalue(self): """Values added after initialization should be appended""" - input_item = self.item_class(name='foo') + input_item = self.item_class(name="foo") il = ItemLoader(item=input_item) - il.add_value('name', 'bar') + il.add_value("name", "bar") loaded_item = il.load_item() self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar']}) + self.assertEqual(dict(loaded_item), {"name": ["foo", "bar"]}) def test_add_value_singlevalue_list(self): """Values added after initialization should be appended""" - input_item = self.item_class(name='foo') + input_item = self.item_class(name="foo") il = ItemLoader(item=input_item) - il.add_value('name', ['item', 'loader']) + il.add_value("name", ["item", "loader"]) loaded_item = il.load_item() self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(dict(loaded_item), {'name': ['foo', 'item', 'loader']}) + self.assertEqual(dict(loaded_item), {"name": ["foo", "item", "loader"]}) def test_add_value_list_singlevalue(self): """Values added after initialization should be appended""" - input_item = self.item_class(name=['foo', 'bar']) + input_item = self.item_class(name=["foo", "bar"]) il = ItemLoader(item=input_item) - il.add_value('name', 'qwerty') + il.add_value("name", "qwerty") loaded_item = il.load_item() self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar', 'qwerty']}) + self.assertEqual(dict(loaded_item), {"name": ["foo", "bar", "qwerty"]}) def test_add_value_list_list(self): """Values added after initialization should be appended""" - input_item = self.item_class(name=['foo', 'bar']) + input_item = self.item_class(name=["foo", "bar"]) il = ItemLoader(item=input_item) - il.add_value('name', ['item', 'loader']) + il.add_value("name", ["item", "loader"]) loaded_item = il.load_item() self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar', 'item', 'loader']}) + self.assertEqual(dict(loaded_item), {"name": ["foo", "bar", "item", "loader"]}) def test_get_output_value_singlevalue(self): """Getting output value must not remove value from item""" - input_item = self.item_class(name='foo') + input_item = self.item_class(name="foo") il = ItemLoader(item=input_item) - self.assertEqual(il.get_output_value('name'), ['foo']) + self.assertEqual(il.get_output_value("name"), ["foo"]) loaded_item = il.load_item() self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(loaded_item, dict({'name': ['foo']})) + self.assertEqual(loaded_item, {"name": ["foo"]}) def test_get_output_value_list(self): """Getting output value must not remove value from item""" - input_item = self.item_class(name=['foo', 'bar']) + input_item = self.item_class(name=["foo", "bar"]) il = ItemLoader(item=input_item) - self.assertEqual(il.get_output_value('name'), ['foo', 'bar']) + self.assertEqual(il.get_output_value("name"), ["foo", "bar"]) loaded_item = il.load_item() self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(loaded_item, dict({'name': ['foo', 'bar']})) + self.assertEqual(loaded_item, {"name": ["foo", "bar"]}) def test_values_single(self): """Values from initial item must be added to loader._values""" - input_item = self.item_class(name='foo') + input_item = self.item_class(name="foo") il = ItemLoader(item=input_item) - self.assertEqual(il._values.get('name'), ['foo']) + self.assertEqual(il._values.get("name"), ["foo"]) def test_values_list(self): """Values from initial item must be added to loader._values""" - input_item = self.item_class(name=['foo', 'bar']) + input_item = self.item_class(name=["foo", "bar"]) il = ItemLoader(item=input_item) - self.assertEqual(il._values.get('name'), ['foo', 'bar']) + self.assertEqual(il._values.get("name"), ["foo", "bar"]) class InitializationFromDictTest(InitializationTestMixin, unittest.TestCase): diff --git a/tests/test_nested_items.py b/tests/test_nested_items.py index 0bdfbf2..444431a 100644 --- a/tests/test_nested_items.py +++ b/tests/test_nested_items.py @@ -8,8 +8,8 @@ class NestedItemTest(unittest.TestCase): def _test_item(self, item): il = ItemLoader() - il.add_value('item_list', item) - self.assertEqual(il.load_item(), {'item_list': [item]}) + il.add_value("item_list", item) + self.assertEqual(il.load_item(), {"item_list": [item]}) def test_attrs(self): try: @@ -21,7 +21,7 @@ def test_attrs(self): class TestItem: foo = attr.ib() - self._test_item(TestItem(foo='bar')) + self._test_item(TestItem(foo="bar")) def test_dataclass(self): try: @@ -33,10 +33,10 @@ def test_dataclass(self): class TestItem: foo: str - self._test_item(TestItem(foo='bar')) + self._test_item(TestItem(foo="bar")) def test_dict(self): - self._test_item({'foo': 'bar'}) + self._test_item({"foo": "bar"}) def test_scrapy_item(self): try: @@ -47,4 +47,4 @@ def test_scrapy_item(self): class TestItem(Item): foo = Field() - self._test_item(TestItem(foo='bar')) + self._test_item(TestItem(foo="bar")) diff --git a/tests/test_nested_loader.py b/tests/test_nested_loader.py index 1e193d3..58b9bec 100644 --- a/tests/test_nested_loader.py +++ b/tests/test_nested_loader.py @@ -6,7 +6,8 @@ class SubselectorLoaderTest(unittest.TestCase): - selector = Selector(text=""" + selector = Selector( + text="""
@@ -19,75 +20,91 @@ class SubselectorLoaderTest(unittest.TestCase): - """) + """ + ) def test_nested_xpath(self): loader = ItemLoader(selector=self.selector) nl = loader.nested_xpath("//header") - nl.add_xpath('name', 'div/text()') - nl.add_css('name_div', '#id') - nl.add_value('name_value', nl.selector.xpath('div[@id = "id"]/text()').getall()) - - self.assertEqual(loader.get_output_value('name'), ['marta']) - self.assertEqual(loader.get_output_value('name_div'), ['
marta
']) - self.assertEqual(loader.get_output_value('name_value'), ['marta']) - - self.assertEqual(loader.get_output_value('name'), nl.get_output_value('name')) - self.assertEqual(loader.get_output_value('name_div'), nl.get_output_value('name_div')) - self.assertEqual(loader.get_output_value('name_value'), nl.get_output_value('name_value')) + nl.add_xpath("name", "div/text()") + nl.add_css("name_div", "#id") + nl.add_value("name_value", nl.selector.xpath('div[@id = "id"]/text()').getall()) + + self.assertEqual(loader.get_output_value("name"), ["marta"]) + self.assertEqual( + loader.get_output_value("name_div"), ['
marta
'] + ) + self.assertEqual(loader.get_output_value("name_value"), ["marta"]) + + self.assertEqual(loader.get_output_value("name"), nl.get_output_value("name")) + self.assertEqual( + loader.get_output_value("name_div"), nl.get_output_value("name_div") + ) + self.assertEqual( + loader.get_output_value("name_value"), nl.get_output_value("name_value") + ) def test_nested_css(self): loader = ItemLoader(selector=self.selector) nl = loader.nested_css("header") - nl.add_xpath('name', 'div/text()') - nl.add_css('name_div', '#id') - nl.add_value('name_value', nl.selector.xpath('div[@id = "id"]/text()').getall()) - - self.assertEqual(loader.get_output_value('name'), ['marta']) - self.assertEqual(loader.get_output_value('name_div'), ['
marta
']) - self.assertEqual(loader.get_output_value('name_value'), ['marta']) - - self.assertEqual(loader.get_output_value('name'), nl.get_output_value('name')) - self.assertEqual(loader.get_output_value('name_div'), nl.get_output_value('name_div')) - self.assertEqual(loader.get_output_value('name_value'), nl.get_output_value('name_value')) + nl.add_xpath("name", "div/text()") + nl.add_css("name_div", "#id") + nl.add_value("name_value", nl.selector.xpath('div[@id = "id"]/text()').getall()) + + self.assertEqual(loader.get_output_value("name"), ["marta"]) + self.assertEqual( + loader.get_output_value("name_div"), ['
marta
'] + ) + self.assertEqual(loader.get_output_value("name_value"), ["marta"]) + + self.assertEqual(loader.get_output_value("name"), nl.get_output_value("name")) + self.assertEqual( + loader.get_output_value("name_div"), nl.get_output_value("name_div") + ) + self.assertEqual( + loader.get_output_value("name_value"), nl.get_output_value("name_value") + ) def test_nested_replace(self): loader = ItemLoader(selector=self.selector) - nl1 = loader.nested_xpath('//footer') - nl2 = nl1.nested_xpath('a') + nl1 = loader.nested_xpath("//footer") + nl2 = nl1.nested_xpath("a") - loader.add_xpath('url', '//footer/a/@href') - self.assertEqual(loader.get_output_value('url'), ['http://www.scrapy.org']) - nl1.replace_xpath('url', 'img/@src') - self.assertEqual(loader.get_output_value('url'), ['/images/logo.png']) - nl2.replace_xpath('url', '@href') - self.assertEqual(loader.get_output_value('url'), ['http://www.scrapy.org']) + loader.add_xpath("url", "//footer/a/@href") + self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"]) + nl1.replace_xpath("url", "img/@src") + self.assertEqual(loader.get_output_value("url"), ["/images/logo.png"]) + nl2.replace_xpath("url", "@href") + self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"]) def test_nested_ordering(self): loader = ItemLoader(selector=self.selector) - nl1 = loader.nested_xpath('//footer') - nl2 = nl1.nested_xpath('a') - - nl1.add_xpath('url', 'img/@src') - loader.add_xpath('url', '//footer/a/@href') - nl2.add_xpath('url', 'text()') - loader.add_xpath('url', '//footer/a/@href') - - self.assertEqual(loader.get_output_value('url'), [ - '/images/logo.png', - 'http://www.scrapy.org', - 'homepage', - 'http://www.scrapy.org', - ]) + nl1 = loader.nested_xpath("//footer") + nl2 = nl1.nested_xpath("a") + + nl1.add_xpath("url", "img/@src") + loader.add_xpath("url", "//footer/a/@href") + nl2.add_xpath("url", "text()") + loader.add_xpath("url", "//footer/a/@href") + + self.assertEqual( + loader.get_output_value("url"), + [ + "/images/logo.png", + "http://www.scrapy.org", + "homepage", + "http://www.scrapy.org", + ], + ) def test_nested_load_item(self): loader = ItemLoader(selector=self.selector) - nl1 = loader.nested_xpath('//footer') - nl2 = nl1.nested_xpath('img') + nl1 = loader.nested_xpath("//footer") + nl2 = nl1.nested_xpath("img") - loader.add_xpath('name', '//header/div/text()') - nl1.add_xpath('url', 'a/@href') - nl2.add_xpath('image', '@src') + loader.add_xpath("name", "//header/div/text()") + nl1.add_xpath("url", "a/@href") + nl2.add_xpath("image", "@src") item = loader.load_item() @@ -95,6 +112,6 @@ def test_nested_load_item(self): assert item is nl1.item assert item is nl2.item - self.assertEqual(item['name'], ['marta']) - self.assertEqual(item['url'], ['http://www.scrapy.org']) - self.assertEqual(item['image'], ['/images/logo.png']) + self.assertEqual(item["name"], ["marta"]) + self.assertEqual(item["url"], ["http://www.scrapy.org"]) + self.assertEqual(item["image"], ["/images/logo.png"]) diff --git a/tests/test_output_processor.py b/tests/test_output_processor.py index 54bb1fe..f4aa387 100644 --- a/tests/test_output_processor.py +++ b/tests/test_output_processor.py @@ -1,16 +1,15 @@ import unittest from itemloaders import ItemLoader -from itemloaders.processors import Identity, Compose, TakeFirst +from itemloaders.processors import Compose, Identity, TakeFirst class TestOutputProcessorDict(unittest.TestCase): def test_output_processor(self): - class TempDict(dict): def __init__(self, *args, **kwargs): super(TempDict, self).__init__(self, *args, **kwargs) - self.setdefault('temp', 0.3) + self.setdefault("temp", 0.3) class TempLoader(ItemLoader): default_item_class = TempDict @@ -20,7 +19,7 @@ class TempLoader(ItemLoader): loader = TempLoader() item = loader.load_item() self.assertIsInstance(item, TempDict) - self.assertEqual(dict(item), {'temp': 0.3}) + self.assertEqual(dict(item), {"temp": 0.3}) class TestOutputProcessorItem(unittest.TestCase): @@ -29,9 +28,9 @@ class TempLoader(ItemLoader): default_input_processor = Identity() default_output_processor = Compose(TakeFirst()) - item = dict() - item.setdefault('temp', 0.3) + item = {} + item.setdefault("temp", 0.3) loader = TempLoader(item=item) item = loader.load_item() self.assertIsInstance(item, dict) - self.assertEqual(dict(item), {'temp': 0.3}) + self.assertEqual(dict(item), {"temp": 0.3}) diff --git a/tests/test_processors.py b/tests/test_processors.py index 769597d..55a0c9e 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -1,47 +1,49 @@ import unittest -from itemloaders.processors import (Compose, Identity, Join, - MapCompose, TakeFirst) +from itemloaders.processors import Compose, Identity, Join, MapCompose, TakeFirst class ProcessorsTest(unittest.TestCase): - def test_take_first(self): proc = TakeFirst() - self.assertEqual(proc([None, '', 'hello', 'world']), 'hello') - self.assertEqual(proc([None, '', 0, 'hello', 'world']), 0) + self.assertEqual(proc([None, "", "hello", "world"]), "hello") + self.assertEqual(proc([None, "", 0, "hello", "world"]), 0) def test_identity(self): proc = Identity() - self.assertEqual(proc([None, '', 'hello', 'world']), - [None, '', 'hello', 'world']) + self.assertEqual( + proc([None, "", "hello", "world"]), [None, "", "hello", "world"] + ) def test_join(self): proc = Join() - self.assertRaises(TypeError, proc, [None, '', 'hello', 'world']) - self.assertEqual(proc(['', 'hello', 'world']), u' hello world') - self.assertEqual(proc(['hello', 'world']), u'hello world') - self.assertIsInstance(proc(['hello', 'world']), str) + self.assertRaises(TypeError, proc, [None, "", "hello", "world"]) + self.assertEqual(proc(["", "hello", "world"]), " hello world") + self.assertEqual(proc(["hello", "world"]), "hello world") + self.assertIsInstance(proc(["hello", "world"]), str) def test_compose(self): proc = Compose(lambda v: v[0], str.upper) - self.assertEqual(proc(['hello', 'world']), 'HELLO') + self.assertEqual(proc(["hello", "world"]), "HELLO") proc = Compose(str.upper) self.assertEqual(proc(None), None) proc = Compose(str.upper, stop_on_none=False) self.assertRaises(ValueError, proc, None) proc = Compose(str.upper, lambda x: x + 1) - self.assertRaises(ValueError, proc, 'hello') + self.assertRaises(ValueError, proc, "hello") def test_mapcompose(self): def filter_world(x): - return None if x == 'world' else x + return None if x == "world" else x + proc = MapCompose(filter_world, str.upper) - self.assertEqual(proc([u'hello', u'world', u'this', u'is', u'scrapy']), - [u'HELLO', u'THIS', u'IS', u'SCRAPY']) + self.assertEqual( + proc(["hello", "world", "this", "is", "scrapy"]), + ["HELLO", "THIS", "IS", "SCRAPY"], + ) proc = MapCompose(filter_world, str.upper) self.assertEqual(proc(None), []) proc = MapCompose(filter_world, str.upper) self.assertRaises(ValueError, proc, [1]) proc = MapCompose(filter_world, lambda x: x + 1) - self.assertRaises(ValueError, proc, 'hello') + self.assertRaises(ValueError, proc, "hello") diff --git a/tests/test_select_jmes.py b/tests/test_select_jmes.py index d3c8cc7..1754863 100644 --- a/tests/test_select_jmes.py +++ b/tests/test_select_jmes.py @@ -5,24 +5,22 @@ class SelectJmesTestCase(unittest.TestCase): test_list_equals = { - 'simple': ('foo.bar', {"foo": {"bar": "baz"}}, "baz"), - 'invalid': ('foo.bar.baz', {"foo": {"bar": "baz"}}, None), - 'top_level': ('foo', {"foo": {"bar": "baz"}}, {"bar": "baz"}), - 'double_vs_single_quote_string': ('foo.bar', {"foo": {"bar": "baz"}}, "baz"), - 'dict': ( - 'foo.bar[*].name', + "simple": ("foo.bar", {"foo": {"bar": "baz"}}, "baz"), + "invalid": ("foo.bar.baz", {"foo": {"bar": "baz"}}, None), + "top_level": ("foo", {"foo": {"bar": "baz"}}, {"bar": "baz"}), + "double_vs_single_quote_string": ("foo.bar", {"foo": {"bar": "baz"}}, "baz"), + "dict": ( + "foo.bar[*].name", {"foo": {"bar": [{"name": "one"}, {"name": "two"}]}}, - ['one', 'two'] + ["one", "two"], ), - 'list': ('[1]', [1, 2], 2) + "list": ("[1]", [1, 2], 2), } def test_output(self): - for l in self.test_list_equals: - expr, test_list, expected = self.test_list_equals[l] + for key in self.test_list_equals: + expr, test_list, expected = self.test_list_equals[key] test = SelectJmes(expr)(test_list) self.assertEqual( - test, - expected, - msg='test "{}" got {} expected {}'.format(l, test, expected) + test, expected, msg=f"test {key!r} got {test} expected {expected}" ) diff --git a/tests/test_selector_loader.py b/tests/test_selector_loader.py index 972de71..484c239 100644 --- a/tests/test_selector_loader.py +++ b/tests/test_selector_loader.py @@ -13,7 +13,8 @@ class CustomItemLoader(ItemLoader): class SelectortemLoaderTest(unittest.TestCase): - selector = Selector(text=""" + selector = Selector( + text="""
marta
@@ -22,9 +23,11 @@ class SelectortemLoaderTest(unittest.TestCase): Scrapy - """) + """ + ) - jmes_selector = Selector(text=""" + jmes_selector = Selector( + text=""" { "name": "marta", "description": "paragraph", @@ -34,7 +37,8 @@ class SelectortemLoaderTest(unittest.TestCase): }, "logo": "/images/logo.png" } - """) + """ + ) def test_init_method(self): loader = CustomItemLoader() @@ -42,149 +46,157 @@ def test_init_method(self): def test_init_method_errors(self): loader = CustomItemLoader() - self.assertRaises(RuntimeError, loader.add_xpath, 'url', '//a/@href') - self.assertRaises(RuntimeError, loader.replace_xpath, 'url', '//a/@href') - self.assertRaises(RuntimeError, loader.get_xpath, '//a/@href') - self.assertRaises(RuntimeError, loader.add_css, 'name', '#name::text') - self.assertRaises(RuntimeError, loader.replace_css, 'name', '#name::text') - self.assertRaises(RuntimeError, loader.get_css, '#name::text') + self.assertRaises(RuntimeError, loader.add_xpath, "url", "//a/@href") + self.assertRaises(RuntimeError, loader.replace_xpath, "url", "//a/@href") + self.assertRaises(RuntimeError, loader.get_xpath, "//a/@href") + self.assertRaises(RuntimeError, loader.add_css, "name", "#name::text") + self.assertRaises(RuntimeError, loader.replace_css, "name", "#name::text") + self.assertRaises(RuntimeError, loader.get_css, "#name::text") def test_init_method_with_selector(self): loader = CustomItemLoader(selector=self.selector) self.assertTrue(loader.selector) - loader.add_xpath('name', '//div/text()') - self.assertEqual(loader.get_output_value('name'), ['Marta']) + loader.add_xpath("name", "//div/text()") + self.assertEqual(loader.get_output_value("name"), ["Marta"]) def test_init_method_with_selector_css(self): loader = CustomItemLoader(selector=self.selector) self.assertTrue(loader.selector) - loader.add_css('name', 'div::text') - self.assertEqual(loader.get_output_value('name'), [u'Marta']) + loader.add_css("name", "div::text") + self.assertEqual(loader.get_output_value("name"), ["Marta"]) - loader.add_css('url', 'a::attr(href)') - self.assertEqual(loader.get_output_value('url'), [u'http://www.scrapy.org']) + loader.add_css("url", "a::attr(href)") + self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"]) # combining/accumulating CSS selectors and XPath expressions - loader.add_xpath('name', '//div/text()') - self.assertEqual(loader.get_output_value('name'), [u'Marta', u'Marta']) + loader.add_xpath("name", "//div/text()") + self.assertEqual(loader.get_output_value("name"), ["Marta", "Marta"]) - loader.add_xpath('url', '//img/@src') - self.assertEqual(loader.get_output_value('url'), [u'http://www.scrapy.org', u'/images/logo.png']) + loader.add_xpath("url", "//img/@src") + self.assertEqual( + loader.get_output_value("url"), + ["http://www.scrapy.org", "/images/logo.png"], + ) def test_add_xpath_re(self): loader = CustomItemLoader(selector=self.selector) - loader.add_xpath('name', '//div/text()', re='ma') - self.assertEqual(loader.get_output_value('name'), ['Ma']) + loader.add_xpath("name", "//div/text()", re="ma") + self.assertEqual(loader.get_output_value("name"), ["Ma"]) loader = CustomItemLoader(selector=self.selector) - loader.add_xpath('name', '//div/text()', re=re.compile('ma')) - self.assertEqual(loader.get_output_value('name'), ['Ma']) - + loader.add_xpath("name", "//div/text()", re=re.compile("ma")) + self.assertEqual(loader.get_output_value("name"), ["Ma"]) def test_add_xpath_variables(self): loader = CustomItemLoader(selector=self.selector) - loader.add_xpath('name', 'id($id)/text()', id="id") - self.assertEqual(loader.get_output_value('name'), ['Marta']) + loader.add_xpath("name", "id($id)/text()", id="id") + self.assertEqual(loader.get_output_value("name"), ["Marta"]) loader = CustomItemLoader(selector=self.selector) - loader.add_xpath('name', 'id($id)/text()', id="id2") - self.assertEqual(loader.get_output_value('name'), []) + loader.add_xpath("name", "id($id)/text()", id="id2") + self.assertEqual(loader.get_output_value("name"), []) def test_replace_xpath(self): loader = CustomItemLoader(selector=self.selector) self.assertTrue(loader.selector) - loader.add_xpath('name', '//div/text()') - self.assertEqual(loader.get_output_value('name'), ['Marta']) - loader.replace_xpath('name', '//p/text()') - self.assertEqual(loader.get_output_value('name'), ['Paragraph']) + loader.add_xpath("name", "//div/text()") + self.assertEqual(loader.get_output_value("name"), ["Marta"]) + loader.replace_xpath("name", "//p/text()") + self.assertEqual(loader.get_output_value("name"), ["Paragraph"]) - loader.replace_xpath('name', ['//p/text()', '//div/text()']) - self.assertEqual(loader.get_output_value('name'), ['Paragraph', 'Marta']) + loader.replace_xpath("name", ["//p/text()", "//div/text()"]) + self.assertEqual(loader.get_output_value("name"), ["Paragraph", "Marta"]) def test_get_xpath(self): loader = CustomItemLoader(selector=self.selector) - self.assertEqual(loader.get_xpath('//p/text()'), ['paragraph']) - self.assertEqual(loader.get_xpath('//p/text()', TakeFirst()), 'paragraph') - self.assertEqual(loader.get_xpath('//p/text()', TakeFirst(), re='pa'), 'pa') + self.assertEqual(loader.get_xpath("//p/text()"), ["paragraph"]) + self.assertEqual(loader.get_xpath("//p/text()", TakeFirst()), "paragraph") + self.assertEqual(loader.get_xpath("//p/text()", TakeFirst(), re="pa"), "pa") - self.assertEqual(loader.get_xpath(['//p/text()', '//div/text()']), ['paragraph', 'marta']) + self.assertEqual( + loader.get_xpath(["//p/text()", "//div/text()"]), ["paragraph", "marta"] + ) def test_replace_xpath_multi_fields(self): loader = CustomItemLoader(selector=self.selector) - loader.add_xpath(None, '//div/text()', TakeFirst(), lambda x: {'name': x}) - self.assertEqual(loader.get_output_value('name'), ['Marta']) - loader.replace_xpath(None, '//p/text()', TakeFirst(), lambda x: {'name': x}) - self.assertEqual(loader.get_output_value('name'), ['Paragraph']) + loader.add_xpath(None, "//div/text()", TakeFirst(), lambda x: {"name": x}) + self.assertEqual(loader.get_output_value("name"), ["Marta"]) + loader.replace_xpath(None, "//p/text()", TakeFirst(), lambda x: {"name": x}) + self.assertEqual(loader.get_output_value("name"), ["Paragraph"]) def test_replace_xpath_re(self): loader = CustomItemLoader(selector=self.selector) self.assertTrue(loader.selector) - loader.add_xpath('name', '//div/text()') - self.assertEqual(loader.get_output_value('name'), ['Marta']) - loader.replace_xpath('name', '//div/text()', re='ma') - self.assertEqual(loader.get_output_value('name'), ['Ma']) + loader.add_xpath("name", "//div/text()") + self.assertEqual(loader.get_output_value("name"), ["Marta"]) + loader.replace_xpath("name", "//div/text()", re="ma") + self.assertEqual(loader.get_output_value("name"), ["Ma"]) def test_add_css_re(self): loader = CustomItemLoader(selector=self.selector) - loader.add_css('name', 'div::text', re='ma') - self.assertEqual(loader.get_output_value('name'), ['Ma']) + loader.add_css("name", "div::text", re="ma") + self.assertEqual(loader.get_output_value("name"), ["Ma"]) - loader.add_css('url', 'a::attr(href)', re='http://(.+)') - self.assertEqual(loader.get_output_value('url'), ['www.scrapy.org']) + loader.add_css("url", "a::attr(href)", re="http://(.+)") + self.assertEqual(loader.get_output_value("url"), ["www.scrapy.org"]) loader = CustomItemLoader(selector=self.selector) - loader.add_css('name', 'div::text', re=re.compile('ma')) - self.assertEqual(loader.get_output_value('name'), ['Ma']) + loader.add_css("name", "div::text", re=re.compile("ma")) + self.assertEqual(loader.get_output_value("name"), ["Ma"]) - loader.add_css('url', 'a::attr(href)', re=re.compile('http://(.+)')) - self.assertEqual(loader.get_output_value('url'), ['www.scrapy.org']) + loader.add_css("url", "a::attr(href)", re=re.compile("http://(.+)")) + self.assertEqual(loader.get_output_value("url"), ["www.scrapy.org"]) def test_replace_css(self): loader = CustomItemLoader(selector=self.selector) self.assertTrue(loader.selector) - loader.add_css('name', 'div::text') - self.assertEqual(loader.get_output_value('name'), ['Marta']) - loader.replace_css('name', 'p::text') - self.assertEqual(loader.get_output_value('name'), ['Paragraph']) + loader.add_css("name", "div::text") + self.assertEqual(loader.get_output_value("name"), ["Marta"]) + loader.replace_css("name", "p::text") + self.assertEqual(loader.get_output_value("name"), ["Paragraph"]) - loader.replace_css('name', ['p::text', 'div::text']) - self.assertEqual(loader.get_output_value('name'), ['Paragraph', 'Marta']) + loader.replace_css("name", ["p::text", "div::text"]) + self.assertEqual(loader.get_output_value("name"), ["Paragraph", "Marta"]) - loader.add_css('url', 'a::attr(href)', re='http://(.+)') - self.assertEqual(loader.get_output_value('url'), ['www.scrapy.org']) - loader.replace_css('url', 'img::attr(src)') - self.assertEqual(loader.get_output_value('url'), ['/images/logo.png']) + loader.add_css("url", "a::attr(href)", re="http://(.+)") + self.assertEqual(loader.get_output_value("url"), ["www.scrapy.org"]) + loader.replace_css("url", "img::attr(src)") + self.assertEqual(loader.get_output_value("url"), ["/images/logo.png"]) def test_get_css(self): loader = CustomItemLoader(selector=self.selector) - self.assertEqual(loader.get_css('p::text'), [u'paragraph']) - self.assertEqual(loader.get_css('p::text', TakeFirst()), 'paragraph') - self.assertEqual(loader.get_css('p::text', TakeFirst(), re='pa'), u'pa') + self.assertEqual(loader.get_css("p::text"), ["paragraph"]) + self.assertEqual(loader.get_css("p::text", TakeFirst()), "paragraph") + self.assertEqual(loader.get_css("p::text", TakeFirst(), re="pa"), "pa") - self.assertEqual(loader.get_css(['p::text', 'div::text']), ['paragraph', 'marta']) - self.assertEqual(loader.get_css(['a::attr(href)', 'img::attr(src)']), - [u'http://www.scrapy.org', '/images/logo.png']) + self.assertEqual( + loader.get_css(["p::text", "div::text"]), ["paragraph", "marta"] + ) + self.assertEqual( + loader.get_css(["a::attr(href)", "img::attr(src)"]), + ["http://www.scrapy.org", "/images/logo.png"], + ) def test_replace_css_multi_fields(self): loader = CustomItemLoader(selector=self.selector) - loader.add_css(None, 'div::text', TakeFirst(), lambda x: {'name': x}) - self.assertEqual(loader.get_output_value('name'), ['Marta']) - loader.replace_css(None, 'p::text', TakeFirst(), lambda x: {'name': x}) - self.assertEqual(loader.get_output_value('name'), ['Paragraph']) + loader.add_css(None, "div::text", TakeFirst(), lambda x: {"name": x}) + self.assertEqual(loader.get_output_value("name"), ["Marta"]) + loader.replace_css(None, "p::text", TakeFirst(), lambda x: {"name": x}) + self.assertEqual(loader.get_output_value("name"), ["Paragraph"]) - loader.add_css(None, 'a::attr(href)', TakeFirst(), lambda x: {'url': x}) - self.assertEqual(loader.get_output_value('url'), ['http://www.scrapy.org']) - loader.replace_css(None, 'img::attr(src)', TakeFirst(), lambda x: {'url': x}) - self.assertEqual(loader.get_output_value('url'), ['/images/logo.png']) + loader.add_css(None, "a::attr(href)", TakeFirst(), lambda x: {"url": x}) + self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"]) + loader.replace_css(None, "img::attr(src)", TakeFirst(), lambda x: {"url": x}) + self.assertEqual(loader.get_output_value("url"), ["/images/logo.png"]) def test_replace_css_re(self): loader = CustomItemLoader(selector=self.selector) self.assertTrue(loader.selector) - loader.add_css('url', 'a::attr(href)') - self.assertEqual(loader.get_output_value('url'), ['http://www.scrapy.org']) - loader.replace_css('url', 'a::attr(href)', re=r'http://www\.(.+)') - self.assertEqual(loader.get_output_value('url'), ['scrapy.org']) + loader.add_css("url", "a::attr(href)") + self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"]) + loader.replace_css("url", "a::attr(href)", re=r"http://www\.(.+)") + self.assertEqual(loader.get_output_value("url"), ["scrapy.org"]) def test_jmes_not_installed(self): selector = MagicMock(spec=Selector) @@ -193,7 +205,9 @@ def test_jmes_not_installed(self): with self.assertRaises(AttributeError) as err: loader.add_jmes("name", "name", re="ma") - self.assertEqual(str(err.exception), "Please install parsel >= 1.8.1 to get jmespath support") + self.assertEqual( + str(err.exception), "Please install parsel >= 1.8.1 to get jmespath support" + ) def test_add_jmes_re(self): loader = CustomItemLoader(selector=self.jmes_selector) @@ -242,20 +256,20 @@ def test_replace_jmes(self): def test_replace_jmes_multi_fields(self): loader = CustomItemLoader(selector=self.jmes_selector) - loader.add_jmes(None, 'name', TakeFirst(), lambda x: {'name': x}) - self.assertEqual(loader.get_output_value('name'), ['Marta']) - loader.replace_jmes(None, 'description', TakeFirst(), lambda x: {'name': x}) - self.assertEqual(loader.get_output_value('name'), ['Paragraph']) + loader.add_jmes(None, "name", TakeFirst(), lambda x: {"name": x}) + self.assertEqual(loader.get_output_value("name"), ["Marta"]) + loader.replace_jmes(None, "description", TakeFirst(), lambda x: {"name": x}) + self.assertEqual(loader.get_output_value("name"), ["Paragraph"]) - loader.add_jmes(None, 'website.url', TakeFirst(), lambda x: {'url': x}) - self.assertEqual(loader.get_output_value('url'), ['http://www.scrapy.org']) - loader.replace_jmes(None, 'logo', TakeFirst(), lambda x: {'url': x}) - self.assertEqual(loader.get_output_value('url'), ['/images/logo.png']) + loader.add_jmes(None, "website.url", TakeFirst(), lambda x: {"url": x}) + self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"]) + loader.replace_jmes(None, "logo", TakeFirst(), lambda x: {"url": x}) + self.assertEqual(loader.get_output_value("url"), ["/images/logo.png"]) def test_replace_jmes_re(self): loader = CustomItemLoader(selector=self.jmes_selector) self.assertTrue(loader.selector) - loader.add_jmes('url', 'website.url') - self.assertEqual(loader.get_output_value('url'), ['http://www.scrapy.org']) - loader.replace_jmes('url', 'website.url', re=r'http://www\.(.+)') - self.assertEqual(loader.get_output_value('url'), ['scrapy.org']) + loader.add_jmes("url", "website.url") + self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"]) + loader.replace_jmes("url", "website.url", re=r"http://www\.(.+)") + self.assertEqual(loader.get_output_value("url"), ["scrapy.org"]) diff --git a/tests/test_utils_misc.py b/tests/test_utils_misc.py index 36f7c80..6c9e9b6 100644 --- a/tests/test_utils_misc.py +++ b/tests/test_utils_misc.py @@ -4,20 +4,19 @@ class UtilsMiscTestCase(unittest.TestCase): - def test_arg_to_iter(self): - assert hasattr(arg_to_iter(None), '__iter__') - assert hasattr(arg_to_iter(100), '__iter__') - assert hasattr(arg_to_iter('lala'), '__iter__') - assert hasattr(arg_to_iter([1, 2, 3]), '__iter__') - assert hasattr(arg_to_iter(l for l in 'abcd'), '__iter__') + assert hasattr(arg_to_iter(None), "__iter__") + assert hasattr(arg_to_iter(100), "__iter__") + assert hasattr(arg_to_iter("lala"), "__iter__") + assert hasattr(arg_to_iter([1, 2, 3]), "__iter__") + assert hasattr(arg_to_iter(letter for letter in "abcd"), "__iter__") self.assertEqual(list(arg_to_iter(None)), []) - self.assertEqual(list(arg_to_iter('lala')), ['lala']) + self.assertEqual(list(arg_to_iter("lala")), ["lala"]) self.assertEqual(list(arg_to_iter(100)), [100]) - self.assertEqual(list(arg_to_iter(l for l in 'abc')), ['a', 'b', 'c']) + self.assertEqual(list(arg_to_iter(letter for letter in "abc")), ["a", "b", "c"]) self.assertEqual(list(arg_to_iter([1, 2, 3])), [1, 2, 3]) - self.assertEqual(list(arg_to_iter({'a': 1})), [{'a': 1}]) + self.assertEqual(list(arg_to_iter({"a": 1})), [{"a": 1}]) if __name__ == "__main__": diff --git a/tests/test_utils_python.py b/tests/test_utils_python.py index 7d4a1cd..cd7cf85 100644 --- a/tests/test_utils_python.py +++ b/tests/test_utils_python.py @@ -2,13 +2,11 @@ import operator import platform import unittest -from datetime import datetime from itemloaders.utils import get_func_args class UtilsPythonTestCase(unittest.TestCase): - def test_get_func_args(self): def f1(a, b, c): pass @@ -27,7 +25,6 @@ def method(self, a, b, c): pass class Callable: - def __call__(self, a, b, c): pass