diff --git a/facebook_scraper/extractors.py b/facebook_scraper/extractors.py index e2828264..31a59d15 100644 --- a/facebook_scraper/extractors.py +++ b/facebook_scraper/extractors.py @@ -719,7 +719,7 @@ def extract_sharers(self): else: share_url = None - def extract_reactions(self, post_id=None) -> PartialPost: + def extract_reactions(self, post_id=None, force_parse_HTML=False) -> PartialPost: """Fetch share and reactions information with a existing post obtained by `get_posts`. Return a merged post that has some new fields including `reactions`, `w3_fb_url`, `fetched_time`, and reactions fields `LIKE`, `ANGER`, `SORRY`, `WOW`, `LOVE`, `HAHA` if @@ -756,8 +756,9 @@ def extract_reactions(self, post_id=None) -> PartialPost: reaction_url = f'https://m.facebook.com/ufi/reaction/profile/browser/?ft_ent_identifier={post_id}' logger.debug(f"Fetching {reaction_url}") response = self.request(reaction_url) - - if not reactions: + if not reactions or force_parse_HTML: + reactions = {} + reaction_count = 0 for sigil in response.html.find("span[data-sigil='reaction_profile_sigil']"): k = str(demjson.decode(sigil.attrs.get("data-store"))["reactionType"]) v = sigil.find( @@ -769,6 +770,8 @@ def extract_reactions(self, post_id=None) -> PartialPost: elif k in reaction_lookup: name = reaction_lookup[k]["display_name"].lower() reactions[name] = v + if not reaction_count: + reaction_count = sum(reactions.values()) reactors = self.extract_reactors(response, reaction_lookup) if reactions: @@ -1015,7 +1018,7 @@ def parse_comment(self, comment): 'a[href^="/ufi/reaction/profile/browser/?ft_ent_identifier="] i', first=True ) if reactors: - reactions = self.extract_reactions(comment_id) + reactions = self.extract_reactions(comment_id, force_parse_HTML=True) if comment_reactors_opt != "generator": reactions["reactors"] = utils.safe_consume(reactions.get("reactors", [])) diff --git a/facebook_scraper/facebook_scraper.py b/facebook_scraper/facebook_scraper.py index 2b8b30db..5fcb9033 100755 --- a/facebook_scraper/facebook_scraper.py +++ b/facebook_scraper/facebook_scraper.py @@ -697,15 +697,11 @@ def get_group_info(self, group, **kwargs) -> Profile: "/groups/members/search", respAdmins.find( "div:nth-child(1)>div:nth-child(1) a:not(.touchable)", first=True - ).attrs.get('href') + ).attrs.get('href'), ): - admins = respAdmins.find( - "div:nth-of-type(2)>div.touchable a:not(.touchable)" - ) + admins = respAdmins.find("div:nth-of-type(2)>div.touchable a:not(.touchable)") else: - admins = respAdmins.find( - "div:first-child>div.touchable a:not(.touchable)" - ) + admins = respAdmins.find("div:first-child>div.touchable a:not(.touchable)") result["admins"] = [ { "name": e.text, diff --git a/facebook_scraper/page_iterators.py b/facebook_scraper/page_iterators.py index 0ab3909c..07458697 100644 --- a/facebook_scraper/page_iterators.py +++ b/facebook_scraper/page_iterators.py @@ -148,7 +148,9 @@ def __init__(self, response: Response): def get_page(self) -> Page: # Select only elements that have the data-ft attribute - return self._get_page('[data-ft*="top_level_post_id"]:not([data-sigil="m-see-translate-link"])', 'article') + return self._get_page( + '[data-ft*="top_level_post_id"]:not([data-sigil="m-see-translate-link"])', 'article' + ) def get_raw_page(self) -> RawPage: return self.html