Skip to content

Commit

Permalink
Fix comment reaction extraction, run black
Browse files Browse the repository at this point in the history
  • Loading branch information
neon-ninja committed Feb 17, 2022
1 parent 6ac53d7 commit 8397f43
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 12 deletions.
11 changes: 7 additions & 4 deletions facebook_scraper/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,7 @@ def extract_sharers(self):
else:
share_url = None

def extract_reactions(self, post_id=None) -> PartialPost:
def extract_reactions(self, post_id=None, force_parse_HTML=False) -> PartialPost:
"""Fetch share and reactions information with a existing post obtained by `get_posts`.
Return a merged post that has some new fields including `reactions`, `w3_fb_url`,
`fetched_time`, and reactions fields `LIKE`, `ANGER`, `SORRY`, `WOW`, `LOVE`, `HAHA` if
Expand Down Expand Up @@ -756,8 +756,9 @@ def extract_reactions(self, post_id=None) -> PartialPost:
reaction_url = f'https://m.facebook.com/ufi/reaction/profile/browser/?ft_ent_identifier={post_id}'
logger.debug(f"Fetching {reaction_url}")
response = self.request(reaction_url)

if not reactions:
if not reactions or force_parse_HTML:
reactions = {}
reaction_count = 0
for sigil in response.html.find("span[data-sigil='reaction_profile_sigil']"):
k = str(demjson.decode(sigil.attrs.get("data-store"))["reactionType"])
v = sigil.find(
Expand All @@ -769,6 +770,8 @@ def extract_reactions(self, post_id=None) -> PartialPost:
elif k in reaction_lookup:
name = reaction_lookup[k]["display_name"].lower()
reactions[name] = v
if not reaction_count:
reaction_count = sum(reactions.values())
reactors = self.extract_reactors(response, reaction_lookup)

if reactions:
Expand Down Expand Up @@ -1015,7 +1018,7 @@ def parse_comment(self, comment):
'a[href^="/ufi/reaction/profile/browser/?ft_ent_identifier="] i', first=True
)
if reactors:
reactions = self.extract_reactions(comment_id)
reactions = self.extract_reactions(comment_id, force_parse_HTML=True)
if comment_reactors_opt != "generator":
reactions["reactors"] = utils.safe_consume(reactions.get("reactors", []))

Expand Down
10 changes: 3 additions & 7 deletions facebook_scraper/facebook_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,15 +697,11 @@ def get_group_info(self, group, **kwargs) -> Profile:
"/groups/members/search",
respAdmins.find(
"div:nth-child(1)>div:nth-child(1) a:not(.touchable)", first=True
).attrs.get('href')
).attrs.get('href'),
):
admins = respAdmins.find(
"div:nth-of-type(2)>div.touchable a:not(.touchable)"
)
admins = respAdmins.find("div:nth-of-type(2)>div.touchable a:not(.touchable)")
else:
admins = respAdmins.find(
"div:first-child>div.touchable a:not(.touchable)"
)
admins = respAdmins.find("div:first-child>div.touchable a:not(.touchable)")
result["admins"] = [
{
"name": e.text,
Expand Down
4 changes: 3 additions & 1 deletion facebook_scraper/page_iterators.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,9 @@ def __init__(self, response: Response):

def get_page(self) -> Page:
# Select only elements that have the data-ft attribute
return self._get_page('[data-ft*="top_level_post_id"]:not([data-sigil="m-see-translate-link"])', 'article')
return self._get_page(
'[data-ft*="top_level_post_id"]:not([data-sigil="m-see-translate-link"])', 'article'
)

def get_raw_page(self) -> RawPage:
return self.html
Expand Down

0 comments on commit 8397f43

Please sign in to comment.