Skip to content

Commit

Permalink
Merge branch 'release/1.3.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
wuyue92tree committed Dec 7, 2018
2 parents 850e030 + c7b1d36 commit de684ff
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 4 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Crwy

[![PyPI Version](https://img.shields.io/pypi/v/Crwy.svg)](https://pypi.python.org/pypi/Crwy)
[![Build Status](https://travis-ci.org/wuyue92tree/crwy.svg?branch=1.2.0)](https://travis-ci.org/wuyue92tree/crwy)
[![Build Status](https://travis-ci.org/wuyue92tree/crwy.svg?branch=1.3.0)](https://travis-ci.org/wuyue92tree/crwy)

# 简介

Expand Down Expand Up @@ -34,7 +34,7 @@ pip install crwy
```

or
前往下载: https://pypi.python.org/pypi/Crwy/1.2.0/
前往下载: https://pypi.python.org/pypi/Crwy/1.3.0/

# 使用手册

Expand Down
2 changes: 1 addition & 1 deletion crwy/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.2.0
1.3.0
35 changes: 34 additions & 1 deletion crwy/utils/scrapy_plugs/dupefilters.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def request_seen(self, request):
return False

if (datetime.datetime.utcfromtimestamp(now) -
datetime.datetime.utcfromtimestamp(last_time)).days > \
datetime.datetime.utcfromtimestamp(last_time)).days >= \
self.duperliter_delay_day:
z.zadd(now, dupefilter_key)
return False
Expand All @@ -127,3 +127,36 @@ def log(self, request, spider): # log that a request has been filtered
self.logdupes = False

spider.crawler.stats.inc_value('dupefilter/filtered', spider=spider)


class ReleaseDupefilterKey(object):
"""
rm dupefilter_key from redis, when call response
"""

def call(self, spider, dupefilter_key):

if not dupefilter_key:
return

obj = RedisRFPDupeFilter().from_settings(spider.settings)

dupefilter_key = hashlib.sha1(dupefilter_key).hexdigest() if \
obj.do_hash else dupefilter_key

# SPIDER_NAME for dupefilter
key = '{bot_name}:{spider_name}'.format(
bot_name=obj.bot_name,
spider_name=obj.spider_name)

if obj.duperliter_delay_day == 0:
s = RedisSet(key, server=obj.server)
s.srem(dupefilter_key)
else:
z = RedisSortedSet(key, server=obj.server)
z.zrem(dupefilter_key)
obj.logger.info('dupefilter_key: {} released.'.format(
dupefilter_key))


release_dupefilter_key = ReleaseDupefilterKey()

0 comments on commit de684ff

Please sign in to comment.