Skip to content

Commit

Permalink
Merge pull request #133 from creekorful/develop
Browse files Browse the repository at this point in the history
Release 0.11.0
  • Loading branch information
creekorful committed Jan 8, 2021
2 parents 4075dfc + b657123 commit 3cc47a0
Show file tree
Hide file tree
Showing 22 changed files with 696 additions and 248 deletions.
41 changes: 23 additions & 18 deletions deployments/docker/docker-compose.yml
Expand Up @@ -32,29 +32,33 @@ services:
image: creekorful/tdsh-crawler:latest
command: >
--log-level debug
--hub-uri amqp://guest:guest@rabbitmq:5672
--tor-uri torproxy:9050
--config-api-uri http://configapi:8080
--event-srv amqp://guest:guest@rabbitmq:5672
--tor-proxy torproxy:9050
--config-api http://configapi:8080
restart: always
depends_on:
- rabbitmq
- torproxy
- configapi
scheduler:
image: creekorful/tdsh-scheduler:latest
command: >
--log-level debug
--hub-uri amqp://guest:guest@rabbitmq:5672
--config-api-uri http://configapi:8080
--redis-uri redis:6379
--event-srv amqp://guest:guest@rabbitmq:5672
--event-prefetch 20
--config-api http://configapi:8080
--redis redis:6379
restart: always
depends_on:
- rabbitmq
- configapi
- redis
indexer-local:
image: creekorful/tdsh-indexer:latest
command: >
--log-level debug
--hub-uri amqp://guest:guest@rabbitmq:5672
--config-api-uri http://configapi:8080
--event-srv amqp://guest:guest@rabbitmq:5672
--config-api http://configapi:8080
--index-driver local
--index-dest /archive
restart: always
Expand All @@ -67,8 +71,9 @@ services:
image: creekorful/tdsh-indexer:latest
command: >
--log-level debug
--hub-uri amqp://guest:guest@rabbitmq:5672
--config-api-uri http://configapi:8080
--event-srv amqp://guest:guest@rabbitmq:5672
--event-prefetch 20
--config-api http://configapi:8080
--index-driver elastic
--index-dest http://elasticsearch:9200
restart: always
Expand All @@ -80,12 +85,12 @@ services:
image: creekorful/tdsh-configapi:latest
command: >
--log-level debug
--hub-uri amqp://guest:guest@rabbitmq:5672
--redis-uri redis:6379
--event-srv amqp://guest:guest@rabbitmq:5672
--redis redis:6379
--default-value forbidden-hostnames="[]"
--default-value allowed-mime-types="[{\"content-type\":\"text/\",\"extensions\":[\"html\",\"php\",\"aspx\", \"htm\"]}]"
--default-value refresh-delay="{\"delay\": -1}"
--default-value blacklist-threshold="{\"threshold\": 5}"
--default-value refresh-delay="{\"delay\": 0}"
--default-value blacklist-config="{\"threshold\": 5, \"ttl\": 1200}"
restart: always
depends_on:
- rabbitmq
Expand All @@ -96,10 +101,10 @@ services:
image: creekorful/tdsh-blacklister:latest
command: >
--log-level debug
--hub-uri amqp://guest:guest@rabbitmq:5672
--config-api-uri http://configapi:8080
--redis-uri redis:6379
--tor-uri torproxy:9050
--event-srv amqp://guest:guest@rabbitmq:5672
--config-api http://configapi:8080
--redis redis:6379
--tor-proxy torproxy:9050
restart: always
depends_on:
- rabbitmq
Expand Down
34 changes: 24 additions & 10 deletions internal/blacklister/blacklister.go
Expand Up @@ -27,9 +27,9 @@ func (state *State) Name() string {
return "blacklister"
}

// CommonFlags return process common flags
func (state *State) CommonFlags() []string {
return []string{process.HubURIFlag, process.ConfigAPIURIFlag, process.RedisURIFlag, process.UserAgentFlag, process.TorURIFlag}
// Features return the process features
func (state *State) Features() []process.Feature {
return []process.Feature{process.EventFeature, process.ConfigFeature, process.CacheFeature, process.CrawlingFeature}
}

// CustomFlags return process custom flags
Expand All @@ -45,7 +45,7 @@ func (state *State) Initialize(provider process.Provider) error {
}
state.hostnameCache = hostnameCache

configClient, err := provider.ConfigClient([]string{configapi.ForbiddenHostnamesKey, configapi.BlackListThresholdKey})
configClient, err := provider.ConfigClient([]string{configapi.ForbiddenHostnamesKey, configapi.BlackListConfigKey})
if err != nil {
return err
}
Expand Down Expand Up @@ -104,27 +104,41 @@ func (state *State) handleTimeoutURLEvent(subscriber event.Subscriber, msg event

// Check by ourselves if the hostname doesn't respond
_, err = state.httpClient.Get(fmt.Sprintf("%s://%s", u.Scheme, u.Host))
if err == nil || err != chttp.ErrTimeout {
if err != nil && err != chttp.ErrTimeout {
return err
}

cacheKey := u.Hostname()

if err == nil {
log.Debug().
Str("hostname", u.Hostname()).
Msg("Response received.")

// Host is not down, remove it from cache
if err := state.hostnameCache.Remove(cacheKey); err != nil {
return err
}

return nil
}

log.Debug().
Str("hostname", u.Hostname()).
Msg("Timeout confirmed")

threshold, err := state.configClient.GetBlackListThreshold()
blackListConfig, err := state.configClient.GetBlackListConfig()
if err != nil {
return err
}

cacheKey := u.Hostname()
count, err := state.hostnameCache.GetInt64(cacheKey)
if err != nil && err != cache.ErrNIL {
if err != nil {
return err
}
count++

if count >= threshold.Threshold {
if count >= blackListConfig.Threshold {
forbiddenHostnames, err := state.configClient.GetForbiddenHostnames()
if err != nil {
return err
Expand Down Expand Up @@ -155,7 +169,7 @@ func (state *State) handleTimeoutURLEvent(subscriber event.Subscriber, msg event
}

// Update count
if err := state.hostnameCache.SetInt64(cacheKey, count, cache.NoTTL); err != nil {
if err := state.hostnameCache.SetInt64(cacheKey, count, blackListConfig.TTL); err != nil {
return err
}

Expand Down
28 changes: 19 additions & 9 deletions internal/blacklister/blacklister_test.go
Expand Up @@ -2,7 +2,6 @@ package blacklister

import (
"errors"
"github.com/creekorful/trandoshan/internal/cache"
"github.com/creekorful/trandoshan/internal/cache_mock"
configapi "github.com/creekorful/trandoshan/internal/configapi/client"
"github.com/creekorful/trandoshan/internal/configapi/client_mock"
Expand All @@ -15,6 +14,7 @@ import (
"github.com/creekorful/trandoshan/internal/test"
"github.com/golang/mock/gomock"
"testing"
"time"
)

func TestState_Name(t *testing.T) {
Expand All @@ -24,9 +24,9 @@ func TestState_Name(t *testing.T) {
}
}

func TestState_CommonFlags(t *testing.T) {
func TestState_Features(t *testing.T) {
s := State{}
test.CheckProcessCommonFlags(t, &s, []string{process.HubURIFlag, process.ConfigAPIURIFlag, process.RedisURIFlag, process.UserAgentFlag, process.TorURIFlag})
test.CheckProcessFeatures(t, &s, []process.Feature{process.EventFeature, process.ConfigFeature, process.CacheFeature, process.CrawlingFeature})
}

func TestState_CustomFlags(t *testing.T) {
Expand All @@ -37,7 +37,7 @@ func TestState_CustomFlags(t *testing.T) {
func TestState_Initialize(t *testing.T) {
test.CheckInitialize(t, &State{}, func(p *process_mock.MockProviderMockRecorder) {
p.Cache("down-hostname")
p.ConfigClient([]string{configapi.ForbiddenHostnamesKey, configapi.BlackListThresholdKey})
p.ConfigClient([]string{configapi.ForbiddenHostnamesKey, configapi.BlackListConfigKey})
p.HTTPClient()
})
}
Expand Down Expand Up @@ -69,6 +69,8 @@ func TestHandleTimeoutURLEventNoTimeout(t *testing.T) {
httpClientMock.EXPECT().Get("https://down-example.onion:8080").Return(httpResponseMock, nil)
configClientMock.EXPECT().GetForbiddenHostnames().Return([]configapi.ForbiddenHostname{}, nil)

hostnameCacheMock.EXPECT().Remove("down-example.onion")

s := State{configClient: configClientMock, hostnameCache: hostnameCacheMock, httpClient: httpClientMock}
if err := s.handleTimeoutURLEvent(subscriberMock, msg); err != nil {
t.Fail()
Expand All @@ -94,10 +96,13 @@ func TestHandleTimeoutURLEventNoDispatch(t *testing.T) {

httpClientMock.EXPECT().Get("https://down-example.onion").Return(httpResponseMock, http.ErrTimeout)
configClientMock.EXPECT().GetForbiddenHostnames().Return([]configapi.ForbiddenHostname{}, nil)
configClientMock.EXPECT().GetBlackListThreshold().Return(configapi.BlackListThreshold{Threshold: 10}, nil)
configClientMock.EXPECT().GetBlackListConfig().Return(configapi.BlackListConfig{
Threshold: 10,
TTL: 5,
}, nil)

hostnameCacheMock.EXPECT().GetInt64("down-example.onion").Return(int64(0), cache.ErrNIL)
hostnameCacheMock.EXPECT().SetInt64("down-example.onion", int64(1), cache.NoTTL).Return(nil)
hostnameCacheMock.EXPECT().GetInt64("down-example.onion").Return(int64(0), nil)
hostnameCacheMock.EXPECT().SetInt64("down-example.onion", int64(1), time.Duration(5)).Return(nil)

s := State{configClient: configClientMock, hostnameCache: hostnameCacheMock, httpClient: httpClientMock}
if err := s.handleTimeoutURLEvent(subscriberMock, msg); err != nil {
Expand All @@ -124,7 +129,10 @@ func TestHandleTimeoutURLEvent(t *testing.T) {

httpClientMock.EXPECT().Get("https://down-example.onion").Return(httpResponseMock, http.ErrTimeout)
configClientMock.EXPECT().GetForbiddenHostnames().Return([]configapi.ForbiddenHostname{}, nil)
configClientMock.EXPECT().GetBlackListThreshold().Return(configapi.BlackListThreshold{Threshold: 10}, nil)
configClientMock.EXPECT().GetBlackListConfig().Return(configapi.BlackListConfig{
Threshold: 10,
TTL: 5,
}, nil)

hostnameCacheMock.EXPECT().GetInt64("down-example.onion").Return(int64(9), nil)

Expand All @@ -138,7 +146,9 @@ func TestHandleTimeoutURLEvent(t *testing.T) {
}).
Return(nil)

hostnameCacheMock.EXPECT().SetInt64("down-example.onion", int64(10), cache.NoTTL).Return(nil)
hostnameCacheMock.EXPECT().
SetInt64("down-example.onion", int64(10), time.Duration(5)).
Return(nil)

s := State{configClient: configClientMock, hostnameCache: hostnameCacheMock, httpClient: httpClientMock}
if err := s.handleTimeoutURLEvent(subscriberMock, msg); err != nil {
Expand Down
8 changes: 5 additions & 3 deletions internal/cache/cache.go
Expand Up @@ -3,15 +3,12 @@ package cache
//go:generate mockgen -destination=../cache_mock/cache_mock.go -package=cache_mock . Cache

import (
"errors"
"time"
)

var (
// NoTTL define an entry that lives forever
NoTTL = time.Duration(0)
// ErrNIL is returned when there's no value for given key
ErrNIL = errors.New("value is nil")
)

// Cache represent a KV database
Expand All @@ -21,4 +18,9 @@ type Cache interface {

GetInt64(key string) (int64, error)
SetInt64(key string, value int64, TTL time.Duration) error

GetManyInt64(keys []string) (map[string]int64, error)
SetManyInt64(values map[string]int64, TTL time.Duration) error

Remove(key string) error
}
59 changes: 53 additions & 6 deletions internal/cache/redis.go
Expand Up @@ -25,11 +25,11 @@ func NewRedisCache(URI string, keyPrefix string) (Cache, error) {

func (rc *redisCache) GetBytes(key string) ([]byte, error) {
val, err := rc.client.Get(context.Background(), rc.getKey(key)).Bytes()
if err == redis.Nil {
err = ErrNIL
if err != nil && err != redis.Nil {
return nil, err
}

return val, err
return val, nil
}

func (rc *redisCache) SetBytes(key string, value []byte, TTL time.Duration) error {
Expand All @@ -38,17 +38,64 @@ func (rc *redisCache) SetBytes(key string, value []byte, TTL time.Duration) erro

func (rc *redisCache) GetInt64(key string) (int64, error) {
val, err := rc.client.Get(context.Background(), rc.getKey(key)).Int64()
if err == redis.Nil {
err = ErrNIL
if err != nil && err != redis.Nil {
return 0, err
}

return val, err
return val, nil
}

func (rc *redisCache) SetInt64(key string, value int64, TTL time.Duration) error {
return rc.client.Set(context.Background(), rc.getKey(key), value, TTL).Err()
}

func (rc *redisCache) GetManyInt64(keys []string) (map[string]int64, error) {
pipeline := rc.client.Pipeline()

// Execute commands and keep pointer to them
commands := map[string]*redis.StringCmd{}
for _, key := range keys {
commands[key] = pipeline.Get(context.Background(), rc.getKey(key))
}

// Execute pipeline
if _, err := pipeline.Exec(context.Background()); err != nil && err != redis.Nil {
return nil, err
}

// Get back values
values := map[string]int64{}
for _, key := range keys {
val, err := commands[key].Int64()
if err != nil {
// If it's a real error
if err != redis.Nil {
return nil, err
}
} else {
// Only returns entry if there's one
values[key] = val
}
}

return values, nil
}

func (rc *redisCache) SetManyInt64(values map[string]int64, TTL time.Duration) error {
pipeline := rc.client.TxPipeline()

for key, value := range values {
pipeline.Set(context.Background(), rc.getKey(key), value, TTL)
}

_, err := pipeline.Exec(context.Background())
return err
}

func (rc *redisCache) Remove(key string) error {
return rc.client.Del(context.Background(), rc.getKey(key)).Err()
}

func (rc *redisCache) getKey(key string) string {
if rc.keyPrefix == "" {
return key
Expand Down

0 comments on commit 3cc47a0

Please sign in to comment.