Skip to content

Commit

Permalink
Merge pull request #24 from wuyue92tree/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
wuyue92tree authored Sep 20, 2018
2 parents 1d44fc5 + dbb8e9c commit e7addf1
Show file tree
Hide file tree
Showing 10 changed files with 188 additions and 112 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Crwy

[![PyPI Version](https://img.shields.io/pypi/v/Crwy.svg)](https://pypi.python.org/pypi/Crwy)
[![Build Status](https://travis-ci.org/wuyue92tree/crwy.svg?branch=1.1.1)](https://travis-ci.org/wuyue92tree/crwy)
[![Build Status](https://travis-ci.org/wuyue92tree/crwy.svg?branch=1.1.2)](https://travis-ci.org/wuyue92tree/crwy)

# 简介

Expand Down Expand Up @@ -34,7 +34,7 @@ pip install crwy
```

or
前往下载: https://pypi.python.org/pypi/Crwy/1.1.1/
前往下载: https://pypi.python.org/pypi/Crwy/1.1.2/

# 使用手册

Expand Down
2 changes: 1 addition & 1 deletion crwy/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.1.1
1.1.2
7 changes: 5 additions & 2 deletions crwy/utils/data/RedisHash.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@
class RedisHash(object):
"""Simple Hash with Redis Backend"""

def __init__(self, name, **redis_kwargs):
def __init__(self, name, server=None, **redis_kwargs):
"""
The default connection parameters are:
host='localhost', port=6379, db=0
"""
self.__db = get_redis_client(**redis_kwargs)
if server:
self.__db = server
else:
self.__db = get_redis_client(**redis_kwargs)
self.key = name

def hget(self, item):
Expand Down
109 changes: 109 additions & 0 deletions crwy/utils/extend/xunma.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
@author: wuyue
@contact: wuyue92tree@163.com
@software: IntelliJ IDEA
@file: xunma.py
@create at: 2018-09-14 11:41
这一行开始写关于本文件的说明与解释
"""


from __future__ import print_function

from crwy.spider import Spider
from crwy.exceptions import CrwyExtendException


class XunMa(Spider):
def __init__(self, username, password, item_id):
super(XunMa, self).__init__()
if username and password and item_id:
self.username = username
self.password = password
self.item_id = item_id
else:
raise CrwyExtendException("[XunMa] params not valid.")

def login(self):
"""
XunMa 登录
:return: 登录token
"""
try:
url = "http://xapi.xunma.net/Login?uName={username}" \
"&pWord={password}&Code=UTF8".format(username=self.username,
password=self.password)
res = self.html_downloader.download(url)

return res.content.strip().split("&")[0]
except Exception as e:
raise CrwyExtendException(e)

def get_phone(self, token, phone_type='', phone=''):
"""
获取手机号
:param token: 登录token
:param phone_type: 运营商 1 [移动] 2 [联通] 3 [电信]
:param phone: 指定号码
:return: 手机号码
"""
try:
url = "http://xapi.xunma.net/getPhone?ItemId=" \
"{item_id}&token={token}&" \
"PhoneType={phone_type}&Code=UTF8&" \
"Phone={phone}".format(token=token, item_id=self.item_id,
phone_type=phone_type, phone=phone)

res = self.html_downloader.download(url)
# if 'success' not in res.content:
# raise MfExtendException("[XunMa] get phone failed.")
#
# # print(res.content)
return res.content.strip().split(';')[0]

except Exception as e:
raise CrwyExtendException(e)

def get_message(self, token, phone):
"""
获取短信消息
:param token: 登录token
:param phone: 手机号
:return:
"""
try:
# http://xapi.xunma.net/getMessage?token=登陆token&itemId=项目ID&phone=手机号码
url = "http://xapi.xunma.net/getMessage?" \
"token={token}&itemId={item_id}&phone={phone}" \
"&Code=UTF8".format(token=token,
item_id=self.item_id, phone=phone)
res = self.html_downloader.download(url)

return res.content.strip().split('&')[-1]

except Exception as e:
raise CrwyExtendException(e)

def release_phone(self, token, phone):
try:
# http://xapi.xunma.net/releasePhone?token=登陆token&phoneList=phone-itemId;phone-itemId;
url = "http://xapi.xunma.net/releasePhone?" \
"token={token}&phoneList={phone};" \
"&Code=UTF8".format(token=token, phone=phone)
self.html_downloader.download(url)

except Exception as e:
raise CrwyExtendException(e)

def add_black(self, token, phone):
try:
url = "http://xapi.xunma.net/addBlack?" \
"token={token}&phoneList={phone};" \
"&Code=UTF8".format(token=token, phone=phone)
self.html_downloader.download(url)

except Exception as e:
raise CrwyExtendException(e)
8 changes: 6 additions & 2 deletions crwy/utils/filter/RedisSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,16 @@
class RedisSet(object):
"""Simple Deduplicate with Redis Backend"""

def __init__(self, name, namespace='deduplicate', **redis_kwargs):
def __init__(self, name, namespace='deduplicate', server=None,
**redis_kwargs):
"""
The default connection parameters are:
host='localhost', port=6379, db=0
"""
self.__db = get_redis_client(**redis_kwargs)
if server:
self.__db = server
else:
self.__db = get_redis_client(**redis_kwargs)
self.key = '%s:%s' % (namespace, name)

def sadd(self, item):
Expand Down
8 changes: 6 additions & 2 deletions crwy/utils/filter/RedisSortedSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,16 @@
class RedisSortedSet(object):
"""Simple Sorted Deduplicate with Redis Backend"""

def __init__(self, name, namespace='deduplicate_sorted', **redis_kwargs):
def __init__(self, name, namespace='deduplicate_sorted', server=None,
**redis_kwargs):
"""
The default connection parameters are:
host='localhost', port=6379, db=0
"""
self.__db = get_redis_client(**redis_kwargs)
if server:
self.__db = server
else:
self.__db = get_redis_client(**redis_kwargs)
self.key = '%s:%s' % (namespace, name)

def zadd(self, score, item):
Expand Down
7 changes: 6 additions & 1 deletion crwy/utils/no_sql/redis_m.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,12 @@
@cls2singleton
class RedisDb(object):
def __init__(self, **kwargs):
self.pool = redis.ConnectionPool(**kwargs)
if 'url' in kwargs.keys():
url = kwargs.get('url')
db = kwargs.get('db', 0)
self.pool = redis.ConnectionPool.from_url(url, db=db, **kwargs)
else:
self.pool = redis.ConnectionPool(**kwargs)
self.db = redis.StrictRedis(connection_pool=self.pool)


Expand Down
7 changes: 5 additions & 2 deletions crwy/utils/queue/RedisQueue.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@
class RedisQueue(object):
"""Simple Queue with Redis Backend"""

def __init__(self, name, namespace='queue', **redis_kwargs):
def __init__(self, name, namespace='queue', server=None, **redis_kwargs):
"""The default connection parameters are:
host='localhost', port=6379, db=0"""
self.__db = get_redis_client(**redis_kwargs)
if server:
self.__db = server
else:
self.__db = get_redis_client(**redis_kwargs)
self.key = '%s:%s' % (namespace, name)

def qsize(self):
Expand Down
47 changes: 14 additions & 33 deletions crwy/utils/scrapy_plugs/dupefilters.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,60 +18,50 @@
from crwy.utils.filter.RedisSortedSet import RedisSortedSet
from scrapy.dupefilters import BaseDupeFilter
from scrapy.exceptions import NotConfigured
from scrapy_redis.connection import get_redis_from_settings

logger = logging.getLogger(__name__)


class RedisRFPDupeFilter(BaseDupeFilter):
"""
dupefilter by redis
dupefilter by redis, redis connect base on scrapy-redis connect
warning:
config SPIDER_NAME in settings before use
default:
DUPEFILTER_REDIS_HOST = '127.0.0.1'
DUPEFILTER_REDIS_PORT = 6379
DUPEFILTER_REDIS_DB = 0
DUPEFILTER_REDIS_PASSWORD = ''
DUPEFILTER_DEBUG = False
DUPEFILTER_DELAY_DAY = 0
"""
logger = logger

def __init__(self, debug=False,
redis_host=None,
redis_port=None,
redis_db=None,
redis_password=None,
server=None,
bot_name=None,
spider_name=None,
duperliter_delay_day=None,
do_hash=None):
self.debug = debug
self.logdupes = True
self.redis_host = redis_host
self.redis_port = redis_port
self.redis_db = redis_db
self.redis_password = redis_password
self.server = server
self.bot_name = bot_name
self.spider_name = spider_name
self.duperliter_delay_day = duperliter_delay_day
self.do_hash = do_hash
self.logger = logging.getLogger(__name__)

@classmethod
def from_settings(cls, settings):
server = get_redis_from_settings(settings)
debug = settings.getbool('DUPEFILTER_DEBUG')
redis_host = settings.get('DUPEFILTER_REDIS_HOST', '127.0.0.1')
redis_port = settings.get('DUPEFILTER_REDIS_PORT', 6379)
redis_db = settings.get('DUPEFILTER_REDIS_DB', 0)
redis_password = settings.get('DUPEFILTER_REDIS_PASSWORD', '')
bot_name = settings.get('BOT_NAME')
spider_name = settings.get('SPIDER_NAME')
duperliter_delay_day = settings.getint('DUPEFILTER_DELAY_DAY', 0)
do_hash = settings.getbool('DUPEFILTER_DO_HASH', True)
if not spider_name:
raise NotConfigured('%s - "SPIDER_NAME" is not found.' %
cls.__name__)
return cls(debug=debug, redis_host=redis_host, redis_port=redis_port,
redis_db=redis_db, redis_password=redis_password,
bot_name=bot_name, spider_name=spider_name,
return cls(debug=debug, server=server, bot_name=bot_name,
spider_name=spider_name,
duperliter_delay_day=duperliter_delay_day,
do_hash=do_hash)

Expand All @@ -93,23 +83,14 @@ def request_seen(self, request):
'duperliter_delay_day'))

if self.duperliter_delay_day == 0:
s = RedisSet(key,
host=self.redis_host,
port=self.redis_port,
db=self.redis_db,
password=self.redis_password)
s = RedisSet(key, server=self.server)
if s.sadd(dupefilter_key) is True:
return False
self.logger.info('Filtered dupefilter_key: %s' %
dupefilter_key)
return True
else:
z = RedisSortedSet(key,
host=self.redis_host,
port=self.redis_port,
db=self.redis_db,
password=self.redis_password)

z = RedisSortedSet(key, server=self.server)
now = time.time()
last_time = z.zscore(dupefilter_key)

Expand Down
Loading

0 comments on commit e7addf1

Please sign in to comment.