Rev 612: Give a warning instead of error if total count of results is…

… equal to max depth in case when current module doesn't support id filter natively. Add RZ max search depth (now known). Fix (most) RZ json parsing errors. Fix RZ validation of tags containing `+` in their names
trickerer01 · Jul 30, 2024 · 3b71347 · 3b71347
1 parent eea8c2c
commit 3b71347
Show file tree

Hide file tree

Showing 7 changed files with 38 additions and 9 deletions.
diff --git a/src/app_download.py b/src/app_download.py
@@ -226,8 +226,13 @@ def _fetch_task_items(self, tag_str: str) -> None:
             trace(f'Total {self.total_count:d} item(s) found across {self._num_pages():d} page(s)')
 
             if 0 < self._get_max_search_depth() < self.total_count:
-                trace('\nFATAL: too many possible matches, won\'t be able to fetch html for all the pages!\nTry adding an ID filter.')
+                if self._has_native_id_filter():
+                    trace('\nFATAL: too many possible matches, won\'t be able to fetch html for all the pages!\nTry adding an ID filter.')
                 return
+            elif 0 < self._get_max_search_depth() == self.total_count:
+                pages_depth = (self._get_max_search_depth() + self._get_items_per_page() - 1) // self._get_items_per_page()
+                trace(f'\nWarning (W3): too many possible matches, can only fetch html for {pages_depth:d} pages!\n')
+                thread_sleep(4.0)
 
             self.total_pages = self._num_pages()
 

diff --git a/src/app_download_base.py b/src/app_download_base.py
@@ -89,6 +89,10 @@ def _is_fav_search_conversion_required(self) -> bool:
     def _is_fav_search_single_step(self) -> bool:
         ...
 
+    @abstractmethod
+    def _has_native_id_filter(self) -> bool:
+        ...
+
     @abstractmethod
     def _get_module_abbr(self) -> str:
         ...

diff --git a/src/app_download_rn.py b/src/app_download_rn.py
@@ -50,6 +50,9 @@ def _is_fav_search_conversion_required(self) -> bool:
     def _is_fav_search_single_step(self) -> bool:
         return False
 
+    def _has_native_id_filter(self) -> bool:
+        return True
+
     def _get_sitename(self) -> str:
         return SITENAME
 

diff --git a/src/app_download_rs.py b/src/app_download_rs.py
@@ -47,6 +47,9 @@ def _is_fav_search_conversion_required(self) -> bool:
     def _is_fav_search_single_step(self) -> bool:
         return False
 
+    def _has_native_id_filter(self) -> bool:
+        return True
+
     def _get_sitename(self) -> str:
         return SITENAME
 

diff --git a/src/app_download_rx.py b/src/app_download_rx.py
@@ -51,6 +51,9 @@ def _is_fav_search_conversion_required(self) -> bool:
     def _is_fav_search_single_step(self) -> bool:
         return False
 
+    def _has_native_id_filter(self) -> bool:
+        return True
+
     def _get_sitename(self) -> str:
         return SITENAME.replace('api.', '') if self.favorites_search_user or self.pool_search_id else SITENAME
 

diff --git a/src/app_download_rz.py b/src/app_download_rz.py
@@ -3,6 +3,7 @@
 Author: trickerer (https://github.com/trickerer, https://github.com/trickerer01)
 """
 #########################################
+# TODO: fix sources extraction in normal search (not just fav)
 #
 #
 
@@ -35,7 +36,7 @@
 
 SITENAME = b64decode(SITENAME_B_RZ).decode()
 ITEMS_PER_PAGE = ITEMS_PER_PAGE_RZ
-MAX_SEARCH_DEPTH = 0
+MAX_SEARCH_DEPTH = 12000  # 200 pages
 
 item_info_fields = {'likes': 'score', 'comments': 'comments_'}
 
@@ -66,6 +67,9 @@ def _is_fav_search_conversion_required(self) -> bool:
     def _is_fav_search_single_step(self) -> bool:
         return True
 
+    def _has_native_id_filter(self) -> bool:
+        return False
+
     def _get_sitename(self) -> str:
         return SITENAME
 
@@ -93,7 +97,7 @@ def _form_page_num_address(self, n: int) -> str:
         return f'{self.url}&Skip={n * self._get_items_per_page():d}'
 
     def _get_all_post_tags(self, raw_html_page: BeautifulSoup) -> list:
-        return loads(raw_html_page.text)['items']
+        return self.parse_json(raw_html_page.text)['items']
 
     def _local_addr_from_string(self, h: str) -> str:
         return h
@@ -140,7 +144,7 @@ def _get_items_query_size_or_html(self, url: str, tries: int = None) -> int:
         if raw_html is None:
             thread_exit('ERROR: GetItemsQueSize: unable to retreive html', code=-444)
 
-        return int(loads(raw_html.text)['totalCount'])
+        return int(self.parse_json(raw_html.text)['totalCount'])
 
     def _get_image_address(self, h: str) -> Tuple[str, str]:
         item_json = self.parse_json(h)
@@ -227,6 +231,7 @@ def _consume_custom_module_tags(self, tags: str) -> str:
         idx: int
         for idx in reversed(range(len(taglist))):
             ctag = taglist[idx]
+            # ctag = ctag.replace('%2b', '+')
             if ctag.startswith('-'):
                 if len(ctag) > 1:
                     self.negative_tags.append(ctag[1:])
@@ -327,10 +332,14 @@ def _form_tags_search_address(self, tags: str, maxlim: int = None) -> str:
     @staticmethod
     def parse_json(raw: str) -> dict:
         item_json_base = (
-            raw.replace('"', '\"')
+            raw.replace('{"', '{\'').replace('"}', '\'}').replace('["', '[\'').replace('"]', '\']')
+               .replace('": ', '\': ').replace(': "', ': \'').replace('", ', '\', ').replace(', "', ', \'')
+               .replace('":', '\':').replace(':"', ':\'').replace('",', '\',').replace(',"', ',\'')
+               .replace('\\', '/').replace('"', '\'')
                .replace('{\'', '{"').replace('\'}', '"}').replace('[\'', '["').replace('\']', '"]')
                .replace('\': ', '": ').replace(': \'', ': "').replace('\', ', '", ').replace(', \'', ', "')
-               .replace(': None,', ': "None",').replace('\\', '/')
+               .replace('\':', '":').replace(':\'', ':"').replace('\',', '",').replace(',\'', ',"')
+               .replace(': None,', ': "None",').replace(', ":"', ', ":\'').replace(',":"', ',":\'')
         )
         try:
             parsed_json = loads(item_json_base)
@@ -369,8 +378,10 @@ def _expand_tags(self, pwtag: str, explode: bool) -> Iterable[str]:
             nvtag = no_validation_tag(pwtag) if not is_w else ''
             if nvtag:
                 expanded_tags.add(nvtag)
-            elif pwtag in TAG_NUMS_DECODED_RZ:
-                expanded_tags.add(pwtag)
+            else:
+                pwntag = pwtag.replace('%2b', '+')
+                if pwntag in TAG_NUMS_DECODED_RZ:
+                    expanded_tags.add(pwtag)
         else:
             trace(f'Expanding tags from wtag \'{pwtag}\'...')
             if pwtag in self.expand_cache:

diff --git a/src/app_revision.py b/src/app_revision.py
@@ -9,7 +9,7 @@
 APP_NAME = 'Ruxx'
 APP_VER_MAJOR = '1'
 APP_VER_SUB = '4'
-APP_REVISION = '608'
+APP_REVISION = '612'
 APP_IS_BETA = False
 APP_IS_BETA_TEXT = 'b' * APP_IS_BETA
 APP_REV_DATE = '30 Jul 2024'