Fix comment reaction extraction, run black

kevinzg · Feb 17, 2022 · 8397f43 · 8397f43
1 parent 6ac53d7
commit 8397f43
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 12 deletions.
diff --git a/facebook_scraper/extractors.py b/facebook_scraper/extractors.py
@@ -719,7 +719,7 @@ def extract_sharers(self):
             else:
                 share_url = None
 
-    def extract_reactions(self, post_id=None) -> PartialPost:
+    def extract_reactions(self, post_id=None, force_parse_HTML=False) -> PartialPost:
         """Fetch share and reactions information with a existing post obtained by `get_posts`.
         Return a merged post that has some new fields including `reactions`, `w3_fb_url`,
         `fetched_time`, and reactions fields `LIKE`, `ANGER`, `SORRY`, `WOW`, `LOVE`, `HAHA` if
@@ -756,8 +756,9 @@ def extract_reactions(self, post_id=None) -> PartialPost:
             reaction_url = f'https://m.facebook.com/ufi/reaction/profile/browser/?ft_ent_identifier={post_id}'
             logger.debug(f"Fetching {reaction_url}")
             response = self.request(reaction_url)
-
-            if not reactions:
+            if not reactions or force_parse_HTML:
+                reactions = {}
+                reaction_count = 0
                 for sigil in response.html.find("span[data-sigil='reaction_profile_sigil']"):
                     k = str(demjson.decode(sigil.attrs.get("data-store"))["reactionType"])
                     v = sigil.find(
@@ -769,6 +770,8 @@ def extract_reactions(self, post_id=None) -> PartialPost:
                     elif k in reaction_lookup:
                         name = reaction_lookup[k]["display_name"].lower()
                         reactions[name] = v
+                if not reaction_count:
+                    reaction_count = sum(reactions.values())
             reactors = self.extract_reactors(response, reaction_lookup)
 
         if reactions:
@@ -1015,7 +1018,7 @@ def parse_comment(self, comment):
                 'a[href^="/ufi/reaction/profile/browser/?ft_ent_identifier="] i', first=True
             )
             if reactors:
-                reactions = self.extract_reactions(comment_id)
+                reactions = self.extract_reactions(comment_id, force_parse_HTML=True)
                 if comment_reactors_opt != "generator":
                     reactions["reactors"] = utils.safe_consume(reactions.get("reactors", []))
 

diff --git a/facebook_scraper/facebook_scraper.py b/facebook_scraper/facebook_scraper.py
@@ -697,15 +697,11 @@ def get_group_info(self, group, **kwargs) -> Profile:
                 "/groups/members/search",
                 respAdmins.find(
                     "div:nth-child(1)>div:nth-child(1) a:not(.touchable)", first=True
-                ).attrs.get('href')
+                ).attrs.get('href'),
             ):
-                admins = respAdmins.find(
-                    "div:nth-of-type(2)>div.touchable a:not(.touchable)"
-                )
+                admins = respAdmins.find("div:nth-of-type(2)>div.touchable a:not(.touchable)")
             else:
-                admins = respAdmins.find(
-                    "div:first-child>div.touchable a:not(.touchable)"
-                )
+                admins = respAdmins.find("div:first-child>div.touchable a:not(.touchable)")
             result["admins"] = [
                 {
                     "name": e.text,

diff --git a/facebook_scraper/page_iterators.py b/facebook_scraper/page_iterators.py
@@ -148,7 +148,9 @@ def __init__(self, response: Response):
 
     def get_page(self) -> Page:
         # Select only elements that have the data-ft attribute
-        return self._get_page('[data-ft*="top_level_post_id"]:not([data-sigil="m-see-translate-link"])', 'article')
+        return self._get_page(
+            '[data-ft*="top_level_post_id"]:not([data-sigil="m-see-translate-link"])', 'article'
+        )
 
     def get_raw_page(self) -> RawPage:
         return self.html