diff --git a/src/app_download.py b/src/app_download.py index 280304d..47f27bb 100644 --- a/src/app_download.py +++ b/src/app_download.py @@ -226,8 +226,13 @@ def _fetch_task_items(self, tag_str: str) -> None: trace(f'Total {self.total_count:d} item(s) found across {self._num_pages():d} page(s)') if 0 < self._get_max_search_depth() < self.total_count: - trace('\nFATAL: too many possible matches, won\'t be able to fetch html for all the pages!\nTry adding an ID filter.') + if self._has_native_id_filter(): + trace('\nFATAL: too many possible matches, won\'t be able to fetch html for all the pages!\nTry adding an ID filter.') return + elif 0 < self._get_max_search_depth() == self.total_count: + pages_depth = (self._get_max_search_depth() + self._get_items_per_page() - 1) // self._get_items_per_page() + trace(f'\nWarning (W3): too many possible matches, can only fetch html for {pages_depth:d} pages!\n') + thread_sleep(4.0) self.total_pages = self._num_pages() diff --git a/src/app_download_base.py b/src/app_download_base.py index 6dafd7a..aa316fd 100644 --- a/src/app_download_base.py +++ b/src/app_download_base.py @@ -89,6 +89,10 @@ def _is_fav_search_conversion_required(self) -> bool: def _is_fav_search_single_step(self) -> bool: ... + @abstractmethod + def _has_native_id_filter(self) -> bool: + ... + @abstractmethod def _get_module_abbr(self) -> str: ... diff --git a/src/app_download_rn.py b/src/app_download_rn.py index 7b0cfc4..684de4d 100644 --- a/src/app_download_rn.py +++ b/src/app_download_rn.py @@ -50,6 +50,9 @@ def _is_fav_search_conversion_required(self) -> bool: def _is_fav_search_single_step(self) -> bool: return False + def _has_native_id_filter(self) -> bool: + return True + def _get_sitename(self) -> str: return SITENAME diff --git a/src/app_download_rs.py b/src/app_download_rs.py index 9043014..4385877 100644 --- a/src/app_download_rs.py +++ b/src/app_download_rs.py @@ -47,6 +47,9 @@ def _is_fav_search_conversion_required(self) -> bool: def _is_fav_search_single_step(self) -> bool: return False + def _has_native_id_filter(self) -> bool: + return True + def _get_sitename(self) -> str: return SITENAME diff --git a/src/app_download_rx.py b/src/app_download_rx.py index eb19ef3..f012041 100644 --- a/src/app_download_rx.py +++ b/src/app_download_rx.py @@ -51,6 +51,9 @@ def _is_fav_search_conversion_required(self) -> bool: def _is_fav_search_single_step(self) -> bool: return False + def _has_native_id_filter(self) -> bool: + return True + def _get_sitename(self) -> str: return SITENAME.replace('api.', '') if self.favorites_search_user or self.pool_search_id else SITENAME diff --git a/src/app_download_rz.py b/src/app_download_rz.py index 1af7131..d91146f 100644 --- a/src/app_download_rz.py +++ b/src/app_download_rz.py @@ -3,6 +3,7 @@ Author: trickerer (https://github.com/trickerer, https://github.com/trickerer01) """ ######################################### +# TODO: fix sources extraction in normal search (not just fav) # # @@ -35,7 +36,7 @@ SITENAME = b64decode(SITENAME_B_RZ).decode() ITEMS_PER_PAGE = ITEMS_PER_PAGE_RZ -MAX_SEARCH_DEPTH = 0 +MAX_SEARCH_DEPTH = 12000 # 200 pages item_info_fields = {'likes': 'score', 'comments': 'comments_'} @@ -66,6 +67,9 @@ def _is_fav_search_conversion_required(self) -> bool: def _is_fav_search_single_step(self) -> bool: return True + def _has_native_id_filter(self) -> bool: + return False + def _get_sitename(self) -> str: return SITENAME @@ -93,7 +97,7 @@ def _form_page_num_address(self, n: int) -> str: return f'{self.url}&Skip={n * self._get_items_per_page():d}' def _get_all_post_tags(self, raw_html_page: BeautifulSoup) -> list: - return loads(raw_html_page.text)['items'] + return self.parse_json(raw_html_page.text)['items'] def _local_addr_from_string(self, h: str) -> str: return h @@ -140,7 +144,7 @@ def _get_items_query_size_or_html(self, url: str, tries: int = None) -> int: if raw_html is None: thread_exit('ERROR: GetItemsQueSize: unable to retreive html', code=-444) - return int(loads(raw_html.text)['totalCount']) + return int(self.parse_json(raw_html.text)['totalCount']) def _get_image_address(self, h: str) -> Tuple[str, str]: item_json = self.parse_json(h) @@ -227,6 +231,7 @@ def _consume_custom_module_tags(self, tags: str) -> str: idx: int for idx in reversed(range(len(taglist))): ctag = taglist[idx] + # ctag = ctag.replace('%2b', '+') if ctag.startswith('-'): if len(ctag) > 1: self.negative_tags.append(ctag[1:]) @@ -327,10 +332,14 @@ def _form_tags_search_address(self, tags: str, maxlim: int = None) -> str: @staticmethod def parse_json(raw: str) -> dict: item_json_base = ( - raw.replace('"', '\"') + raw.replace('{"', '{\'').replace('"}', '\'}').replace('["', '[\'').replace('"]', '\']') + .replace('": ', '\': ').replace(': "', ': \'').replace('", ', '\', ').replace(', "', ', \'') + .replace('":', '\':').replace(':"', ':\'').replace('",', '\',').replace(',"', ',\'') + .replace('\\', '/').replace('"', '\'') .replace('{\'', '{"').replace('\'}', '"}').replace('[\'', '["').replace('\']', '"]') .replace('\': ', '": ').replace(': \'', ': "').replace('\', ', '", ').replace(', \'', ', "') - .replace(': None,', ': "None",').replace('\\', '/') + .replace('\':', '":').replace(':\'', ':"').replace('\',', '",').replace(',\'', ',"') + .replace(': None,', ': "None",').replace(', ":"', ', ":\'').replace(',":"', ',":\'') ) try: parsed_json = loads(item_json_base) @@ -369,8 +378,10 @@ def _expand_tags(self, pwtag: str, explode: bool) -> Iterable[str]: nvtag = no_validation_tag(pwtag) if not is_w else '' if nvtag: expanded_tags.add(nvtag) - elif pwtag in TAG_NUMS_DECODED_RZ: - expanded_tags.add(pwtag) + else: + pwntag = pwtag.replace('%2b', '+') + if pwntag in TAG_NUMS_DECODED_RZ: + expanded_tags.add(pwtag) else: trace(f'Expanding tags from wtag \'{pwtag}\'...') if pwtag in self.expand_cache: diff --git a/src/app_revision.py b/src/app_revision.py index 4707732..13219a3 100644 --- a/src/app_revision.py +++ b/src/app_revision.py @@ -9,7 +9,7 @@ APP_NAME = 'Ruxx' APP_VER_MAJOR = '1' APP_VER_SUB = '4' -APP_REVISION = '608' +APP_REVISION = '612' APP_IS_BETA = False APP_IS_BETA_TEXT = 'b' * APP_IS_BETA APP_REV_DATE = '30 Jul 2024'