diff --git a/src/tribler/core/components/libtorrent/download_manager/download_manager.py b/src/tribler/core/components/libtorrent/download_manager/download_manager.py index feb99710704..a86710f84ee 100644 --- a/src/tribler/core/components/libtorrent/download_manager/download_manager.py +++ b/src/tribler/core/components/libtorrent/download_manager/download_manager.py @@ -11,7 +11,7 @@ from binascii import unhexlify from copy import deepcopy from shutil import rmtree -from typing import Callable, Dict, List, Optional +from typing import Callable, Dict, List, Optional, Union from ipv8.taskmanager import TaskManager @@ -489,7 +489,7 @@ def update_ip_filter(self, lt_session, ip_addresses): lt_session.set_ip_filter(ip_filter) async def get_metainfo(self, infohash: bytes, timeout: float = 30, hops: Optional[int] = None, - url: Optional[str] = None, raise_errors: bool = False) -> Optional[Dict]: + url: Optional[Union[str, bytes]] = None, raise_errors: bool = False) -> Optional[Dict]: """ Lookup metainfo for a given infohash. The mechanism works by joining the swarm for the infohash connecting to a few peers, and downloading the metadata for the torrent. @@ -583,14 +583,12 @@ async def start_download_from_uri(self, uri, config=None): self._logger.info('Magnet scheme detected') name, infohash, _ = parse_magnetlink(uri) self._logger.info(f'Name: {name}. Infohash: {infohash}') - if infohash is None: - raise RuntimeError("Missing infohash") if infohash in self.metainfo_cache: self._logger.info('Metainfo found in cache') tdef = TorrentDef.load_from_dict(self.metainfo_cache[infohash]['meta_info']) else: self._logger.info('Metainfo not found in cache') - tdef = TorrentDefNoMetainfo(infohash, "Unknown name" if name is None else name, url=uri) + tdef = TorrentDefNoMetainfo(infohash, "Unknown name" if not name else name, url=uri) return await self.start_download(tdef=tdef, config=config) if scheme == FILE_SCHEME: self._logger.info('File scheme detected') diff --git a/src/tribler/core/components/libtorrent/restapi/tests/test_torrentinfo_endpoint.py b/src/tribler/core/components/libtorrent/restapi/tests/test_torrentinfo_endpoint.py index f3434054e98..35b2a43d1fd 100644 --- a/src/tribler/core/components/libtorrent/restapi/tests/test_torrentinfo_endpoint.py +++ b/src/tribler/core/components/libtorrent/restapi/tests/test_torrentinfo_endpoint.py @@ -7,17 +7,17 @@ from urllib.parse import quote_plus, unquote_plus import pytest -from aiohttp import ServerConnectionError, ClientResponseError, ClientConnectorError +from aiohttp import ClientConnectorError, ClientResponseError, ServerConnectionError from ipv8.util import succeed from tribler.core import notifications +from tribler.core.components.database.db.orm_bindings.torrent_metadata import tdef_to_metadata_dict from tribler.core.components.libtorrent.download_manager.download_manager import DownloadManager from tribler.core.components.libtorrent.restapi.torrentinfo_endpoint import TorrentInfoEndpoint from tribler.core.components.libtorrent.settings import DownloadDefaultsSettings, LibtorrentSettings from tribler.core.components.libtorrent.torrentdef import TorrentDef -from tribler.core.components.database.db.orm_bindings.torrent_metadata import tdef_to_metadata_dict from tribler.core.components.restapi.rest.base_api_test import do_request -from tribler.core.components.restapi.rest.rest_endpoint import HTTP_INTERNAL_SERVER_ERROR +from tribler.core.components.restapi.rest.rest_endpoint import HTTP_BAD_REQUEST, HTTP_INTERNAL_SERVER_ERROR from tribler.core.tests.tools.common import TESTS_DATA_DIR, TESTS_DIR, TORRENT_UBUNTU_FILE, UBUNTU_1504_INFOHASH from tribler.core.utilities.rest_utils import path_to_url from tribler.core.utilities.unicode import hexlify @@ -77,17 +77,18 @@ def verify_valid_dict(json_data): assert 'info' in metainfo_dict url = 'torrentinfo' - await do_request(rest_api, url, expected_code=400) - await do_request(rest_api, url, params={'uri': 'def'}, expected_code=400) + await do_request(rest_api, url, expected_code=HTTP_BAD_REQUEST) + await do_request(rest_api, url, params={'uri': 'def'}, expected_code=HTTP_BAD_REQUEST) response = await do_request(rest_api, url, params={'uri': _path('bak_single.torrent')}, expected_code=200) verify_valid_dict(response) # Corrupt file - await do_request(rest_api, url, params={'uri': _path('test_rss.xml')}, expected_code=500) + await do_request(rest_api, url, params={'uri': _path('test_rss.xml')}, expected_code=HTTP_INTERNAL_SERVER_ERROR) # Non-existing file - await do_request(rest_api, url, params={'uri': _path('non_existing.torrent')}, expected_code=500) + await do_request(rest_api, url, params={'uri': _path('non_existing.torrent')}, + expected_code=HTTP_INTERNAL_SERVER_ERROR) path = "http://localhost:1234/ubuntu.torrent" @@ -119,11 +120,11 @@ async def get_metainfo(infohash, timeout=20, hops=None, url=None): # pylint: di verify_valid_dict(await do_request(rest_api, f'torrentinfo?uri={path}', expected_code=200)) path = 'magnet:?xt=urn:ed2k:354B15E68FB8F36D7CD88FF94116CDC1' # No infohash - await do_request(rest_api, f'torrentinfo?uri={path}', expected_code=400) + await do_request(rest_api, f'torrentinfo?uri={path}', expected_code=HTTP_BAD_REQUEST) path = quote_plus(f"magnet:?xt=urn:btih:{'a' * 40}&dn=test torrent") download_manager.get_metainfo = lambda *_, **__: succeed(None) - await do_request(rest_api, f'torrentinfo?uri={path}', expected_code=500) + await do_request(rest_api, f'torrentinfo?uri={path}', expected_code=HTTP_INTERNAL_SERVER_ERROR) # Ensure that correct torrent metadata was sent through notifier (to MetadataStore) download_manager.notifier[notifications.torrent_metadata_added].assert_called_with(metainfo_dict) @@ -134,10 +135,10 @@ async def get_metainfo(infohash, timeout=20, hops=None, url=None): # pylint: di await do_request(rest_api, f'torrentinfo?uri={path}&hops=0', expected_code=200) assert [0] == hops_list - await do_request(rest_api, f'torrentinfo?uri={path}&hops=foo', expected_code=400) + await do_request(rest_api, f'torrentinfo?uri={path}&hops=foo', expected_code=HTTP_BAD_REQUEST) path = 'http://fdsafksdlafdslkdksdlfjs9fsafasdf7lkdzz32.n38/324.torrent' - await do_request(rest_api, f'torrentinfo?uri={path}', expected_code=500) + await do_request(rest_api, f'torrentinfo?uri={path}', expected_code=HTTP_INTERNAL_SERVER_ERROR) mock_download = MagicMock( stop=AsyncMock(), @@ -162,23 +163,35 @@ async def get_metainfo(infohash, timeout=20, hops=None, url=None): # pylint: di assert result["download_exists"] +async def test_get_torrentinfo_invalid_magnet(rest_api): + # Test that invalid magnet link casues an error + mocked_query_http_uri = AsyncMock(return_value=b'magnet:?xt=urn:ed2k:' + b"any hash") + params = {'uri': 'http://any.uri'} + + with patch('tribler.core.components.libtorrent.restapi.torrentinfo_endpoint.query_http_uri', mocked_query_http_uri): + result = await do_request(rest_api, 'torrentinfo', params=params, expected_code=HTTP_INTERNAL_SERVER_ERROR) + + assert 'error' in result + + async def test_on_got_invalid_metainfo(rest_api): """ Test whether the right operations happen when we receive an invalid metainfo object """ path = f"magnet:?xt=urn:btih:{hexlify(UBUNTU_1504_INFOHASH)}&dn={quote_plus('test torrent')}" - res = await do_request(rest_api, f'torrentinfo?uri={path}', expected_code=500) + res = await do_request(rest_api, f'torrentinfo?uri={path}', expected_code=HTTP_INTERNAL_SERVER_ERROR) assert "error" in res + # These are the exceptions that are handled by torrent info endpoint when querying an HTTP URI. caught_exceptions = [ - ServerConnectionError(), - ClientResponseError(Mock(), Mock()), - SSLError(), - ClientConnectorError(Mock(), Mock()), - AsyncTimeoutError() - ] + ServerConnectionError(), + ClientResponseError(Mock(), Mock()), + SSLError(), + ClientConnectorError(Mock(), Mock()), + AsyncTimeoutError() +] @patch("tribler.core.components.libtorrent.restapi.torrentinfo_endpoint.query_http_uri") diff --git a/src/tribler/core/components/libtorrent/restapi/torrentinfo_endpoint.py b/src/tribler/core/components/libtorrent/restapi/torrentinfo_endpoint.py index 0465ec41172..343a19d2ed2 100644 --- a/src/tribler/core/components/libtorrent/restapi/torrentinfo_endpoint.py +++ b/src/tribler/core/components/libtorrent/restapi/torrentinfo_endpoint.py @@ -10,10 +10,10 @@ from marshmallow.fields import String from tribler.core import notifications +from tribler.core.components.database.db.orm_bindings.torrent_metadata import tdef_to_metadata_dict from tribler.core.components.libtorrent.download_manager.download_manager import DownloadManager from tribler.core.components.libtorrent.torrentdef import TorrentDef from tribler.core.components.libtorrent.utils.libtorrent_helper import libtorrent as lt -from tribler.core.components.database.db.orm_bindings.torrent_metadata import tdef_to_metadata_dict from tribler.core.components.restapi.rest.rest_endpoint import ( HTTP_BAD_REQUEST, HTTP_INTERNAL_SERVER_ERROR, @@ -106,15 +106,28 @@ async def get_torrent_info(self, request): return RESTResponse({"error": str(e)}, status=HTTP_INTERNAL_SERVER_ERROR) if response.startswith(b'magnet'): - _, infohash, _ = parse_magnetlink(response) - if infohash: - metainfo = await self.download_manager.get_metainfo(infohash, timeout=60, hops=hops, url=response) + try: + _, infohash, _ = parse_magnetlink(response) + except RuntimeError as e: + return RESTResponse( + {"error": f'Error while getting an ingo hash from magnet: {e.__class__.__name__}: {e}'}, + status=HTTP_INTERNAL_SERVER_ERROR + ) + + metainfo = await self.download_manager.get_metainfo(infohash, timeout=60, hops=hops, url=response) else: metainfo = bdecode_compat(response) elif scheme == MAGNET_SCHEME: - infohash = parse_magnetlink(uri)[1] - if infohash is None: - return RESTResponse({"error": "missing infohash"}, status=HTTP_BAD_REQUEST) + self._logger.info(f'{MAGNET_SCHEME} scheme detected') + + try: + _, infohash, _ = parse_magnetlink(uri) + except RuntimeError as e: + return RESTResponse( + {"error": f'Error while getting an ingo hash from magnet: {e.__class__.__name__}: {e}'}, + status=HTTP_BAD_REQUEST + ) + metainfo = await self.download_manager.get_metainfo(infohash, timeout=60, hops=hops, url=uri) else: return RESTResponse({"error": "invalid uri"}, status=HTTP_BAD_REQUEST) diff --git a/src/tribler/core/components/libtorrent/tests/test_download_manager.py b/src/tribler/core/components/libtorrent/tests/test_download_manager.py index 3bf37bcbf3a..ee0c2d57522 100644 --- a/src/tribler/core/components/libtorrent/tests/test_download_manager.py +++ b/src/tribler/core/components/libtorrent/tests/test_download_manager.py @@ -2,8 +2,7 @@ import functools import itertools from asyncio import Future -from unittest.mock import MagicMock, Mock -from unittest.mock import AsyncMock +from unittest.mock import AsyncMock, MagicMock, Mock import pytest from ipv8.util import succeed @@ -18,6 +17,8 @@ from tribler.core.utilities.unicode import hexlify +# pylint: disable=redefined-outer-name + def create_fake_download_and_state(): """ Create a fake download and state which can be passed to the global download callback. @@ -503,6 +504,13 @@ async def test_check_for_dht_ready(fake_dlmgr): await fake_dlmgr._check_dht_ready() +async def test_start_download_from_magnet_no_name(fake_dlmgr: DownloadManager): + # Test whether a download is started with `Unknown name` name when the magnet has no name + magnet = f'magnet:?xt=urn:btih:{"A" * 40}' + download = await fake_dlmgr.start_download_from_uri(magnet) + assert download.tdef.get_name() == 'Unknown name' + + def test_update_trackers(fake_dlmgr) -> None: fake_download, _ = create_fake_download_and_state() fake_dlmgr.downloads[fake_download.infohash] = fake_download diff --git a/src/tribler/core/utilities/tests/test_utilities.py b/src/tribler/core/utilities/tests/test_utilities.py index f57a7b83c42..c1378a7d117 100644 --- a/src/tribler/core/utilities/tests/test_utilities.py +++ b/src/tribler/core/utilities/tests/test_utilities.py @@ -1,4 +1,3 @@ -import binascii import logging from unittest.mock import MagicMock, Mock, patch @@ -54,6 +53,12 @@ def test_parse_magnetlink_lowercase(): assert hashed == b"\x03\xc58\x16\xcdu\xa8\x1b\xe5\xc8\x182`'A\x07\x8b/&\x82" +def test_parse_magnetlink_wrong_hash(): + url = 'magnet:?xt=urn:sha1:apctqfwnowubxzoidazgaj2ba6fs6juc&xt=urn:ed2khash:apctqfwnowubxzoidazgaj2ba6fs6ju1' + with pytest.raises(RuntimeError): + parse_magnetlink(url) + + def test_parse_magnetlink_uppercase(): """ Test if an uppercase magnet link can be parsed @@ -62,24 +67,29 @@ def test_parse_magnetlink_uppercase(): assert hashed == b"\x03\xc58\x16\xcdu\xa8\x1b\xe5\xc8\x182`'A\x07\x8b/&\x82" +def test_parse_magnetlink_bytes(): + """ + Test if an bytes magnet link can be parsed + """ + _, hashed, _ = parse_magnetlink(b'magnet:?xt=urn:btih:APCTQFWNOWUBXZOIDAZGAJ2BA6FS6JUC') + + assert hashed == b"\x03\xc58\x16\xcdu\xa8\x1b\xe5\xc8\x182`'A\x07\x8b/&\x82" + def test_parse_invalid_magnetlink_short(): """ Test if a magnet link with invalid and short infohash (v1) can be parsed """ - _, hashed, _ = parse_magnetlink('magnet:?xt=urn:btih:APCTQFWNOWUBXZOIDA') - - assert hashed is None + with pytest.raises(RuntimeError): + parse_magnetlink('magnet:?xt=urn:btih:APCTQFWNOWUBXZOIDA') def test_parse_invalid_magnetlink_long(): """ Test if a magnet link with invalid and long infohash (v1) can be parsed """ - _, hashed, _ = parse_magnetlink( - 'magnet:?xt=urn:btih:APCTQFWNOWUBXZOIDAZGAJ2BA6FS6JUCAPCTQFWNOWUBXZOIDAZGAJ2BA6FS6JUC') - - assert hashed is None + with pytest.raises(RuntimeError): + parse_magnetlink('magnet:?xt=urn:btih:APCTQFWNOWUBXZOIDAZGAJ2BA6FS6JUCAPCTQFWNOWUBXZOIDAZGAJ2BA6FS6JUC') def test_valid_url(): @@ -283,8 +293,8 @@ def test_parse_magnetlink_valid(): def test_parse_magnetlink_nomagnet(): - result = parse_magnetlink("http://") - assert result == (None, None, []) + with pytest.raises(RuntimeError): + parse_magnetlink("http://") def test_add_url_param_some_present(): @@ -295,22 +305,6 @@ def test_add_url_param_some_present(): assert "answers=false" in result -@patch('tribler.core.utilities.utilities.b32decode', new=Mock(side_effect=binascii.Error)) -def test_parse_magnetlink_binascii_error_32(caplog): - # Test that binascii.Error exceptions are logged for 32 symbol hash - infohash_32 = 'A' * 32 - parse_magnetlink(f'magnet:?xt=urn:btih:{infohash_32}') - assert f'Invalid infohash: {infohash_32}' in caplog.text - - -@patch('binascii.unhexlify', new=Mock(side_effect=binascii.Error)) -def test_parse_magnetlink_binascii_error_40(caplog): - # Test that binascii.Error exceptions are logged for 40 symbol hash - infohash_40 = 'B' * 40 - parse_magnetlink(f'magnet:?xt=urn:btih:{infohash_40}') - assert f'Invalid infohash: {infohash_40}' in caplog.text - - def test_add_url_param_clean(): url = 'http://stackoverflow.com/test' new_params = {'data': ['some', 'values']} diff --git a/src/tribler/core/utilities/utilities.py b/src/tribler/core/utilities/utilities.py index fa41bfaaa0c..b5efb667255 100644 --- a/src/tribler/core/utilities/utilities.py +++ b/src/tribler/core/utilities/utilities.py @@ -3,7 +3,6 @@ provides a method for HTTP GET requests as well as a function to translate peers into health. Author(s): Jie Yang """ -import binascii import itertools import logging import os @@ -12,12 +11,11 @@ import re import sys import threading -from base64 import b32decode from contextlib import contextmanager from dataclasses import dataclass, field from functools import wraps -from typing import Dict, Optional, Set, Tuple -from urllib.parse import parse_qsl, urlsplit +from typing import Dict, List, Optional, Set, Tuple, Union +from urllib.parse import urlsplit from tribler.core.components.libtorrent.utils.libtorrent_helper import libtorrent as lt from tribler.core.utilities.sentinels import sentinel @@ -88,60 +86,19 @@ def is_valid_url(url): return not (split_url[0] == '' or split_url[1] == '') -def parse_magnetlink(url): +def parse_magnetlink(url: Union[str, bytes]) -> Tuple[str, bytes, List[str]]: """ Parses the magnet link provided by the given URL. The output of this file consists of: - - dn: The display name of the magnet link - - xt: The URI containing the file hash of the magnet link - - trs: The list of Tracker URLs - :param url: the URL at which the magnet link can be found - :return: (dn, xt, trs) tuple, which will be left (None, None, []) if the - given URL does not lead to a magnet link + - name: The display name of the magnet link + - infohash: The URI containing the file hash of the magnet link + - trackers: The list of Tracker URLs + + The RuntimeError is raised when the magnet link is invalid. """ - dn = None - xt = None - trs = [] - - logger.debug("parse_magnetlink() %s", url) - - schema, netloc, path, query, fragment = urlsplit(url) - if schema == "magnet": - # magnet url's do not conform to regular url syntax (they - # do not have a netloc.) This causes path to contain the - # query part. - if "?" in path: - pre, post = path.split("?", 1) - if query: - query = "&".join((post, query)) - else: - query = post - - for key, value in parse_qsl(query): - if key == "dn": - # convert to Unicode - dn = value.decode('utf-8') if not isinstance(value, str) else value - - elif key == "xt" and value.startswith("urn:btih:"): - # vliegendhart: Adding support for base32 in magnet links (BEP 0009) - encoded_infohash = value[9:] - try: - if len(encoded_infohash) == 32: - xt = b32decode(encoded_infohash.upper()) - elif len(encoded_infohash) == 40: - xt = binascii.unhexlify(encoded_infohash) - except binascii.Error as codec_error: - logger.warning("Invalid infohash: %s; Error: %s", encoded_infohash, codec_error) - - elif key == "tr": - trs.append(value) - - logger.debug("parse_magnetlink() NAME: %s", dn) - logger.debug("parse_magnetlink() HASH: %s", xt) - logger.debug("parse_magnetlink() TRACS: %s", trs) - - return dn, xt, trs + params = lt.parse_magnet_uri(url) + return params.name, params.info_hash.to_bytes(), params.trackers def is_simple_match_query(query):