Skip to content

Commit 29ffe67

Browse files
committed
FEAT: rebuild legacy rank and store
1 parent eaf9a31 commit 29ffe67

File tree

8 files changed

+191
-157
lines changed

8 files changed

+191
-157
lines changed

checks/checks.go

+4
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@ package checks
33
import (
44
"net/http"
55
"time"
6+
7+
"github.com/xray-web/web-check-api/checks/store/legacyrank"
68
)
79

810
type Checks struct {
911
Carbon *Carbon
12+
LegacyRank *LegacyRank
1013
Rank *Rank
1114
SocialTags *SocialTags
1215
Tls *Tls
@@ -18,6 +21,7 @@ func NewChecks() *Checks {
1821
}
1922
return &Checks{
2023
Carbon: NewCarbon(client),
24+
LegacyRank: NewLegacyRank(legacyrank.NewInMemoryStore()),
2125
Rank: NewRank(client),
2226
SocialTags: NewSocialTags(client),
2327
Tls: NewTls(client),

checks/legacy_rank.go

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package checks
2+
3+
import "github.com/xray-web/web-check-api/checks/store/legacyrank"
4+
5+
type DomainRank struct {
6+
Domain string `json:"domain"`
7+
Rank int `json:"rank"`
8+
}
9+
10+
type LegacyRank struct {
11+
data legacyrank.Getter
12+
}
13+
14+
func NewLegacyRank(lrg legacyrank.Getter) *LegacyRank {
15+
return &LegacyRank{data: lrg}
16+
}
17+
18+
func (lr *LegacyRank) LegacyRank(domain string) (*DomainRank, error) {
19+
rank, err := lr.data.GetLegacyRank(domain)
20+
if err != nil {
21+
return nil, err
22+
}
23+
return &DomainRank{
24+
Domain: domain,
25+
Rank: rank,
26+
}, nil
27+
}

checks/legacy_rank_test.go

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package checks
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
"github.com/xray-web/web-check-api/checks/store/legacyrank"
8+
)
9+
10+
func TestLegacyRank(t *testing.T) {
11+
t.Parallel()
12+
13+
t.Run("get rank", func(t *testing.T) {
14+
t.Parallel()
15+
lr := NewLegacyRank(legacyrank.GetterFunc(func(domain string) (int, error) {
16+
return 1, nil
17+
}))
18+
dr, err := lr.LegacyRank("example.com")
19+
assert.NoError(t, err)
20+
assert.Equal(t, 1, dr.Rank)
21+
assert.Equal(t, "example.com", dr.Domain)
22+
})
23+
}
+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
package legacyrank
2+
3+
import (
4+
"archive/zip"
5+
"bytes"
6+
"context"
7+
"encoding/csv"
8+
"errors"
9+
"io"
10+
"log"
11+
"net/http"
12+
"strconv"
13+
"sync"
14+
"time"
15+
)
16+
17+
var ErrNotFound = errors.New("domain not found")
18+
19+
type Getter interface {
20+
GetLegacyRank(domain string) (int, error)
21+
}
22+
23+
type GetterFunc func(domain string) (int, error)
24+
25+
func (f GetterFunc) GetLegacyRank(domain string) (int, error) {
26+
return f(domain)
27+
}
28+
29+
type InMemoryStore struct{}
30+
31+
var once sync.Once
32+
var data map[string]int //map of domain to rank
33+
34+
func NewInMemoryStore() *InMemoryStore {
35+
return &InMemoryStore{}
36+
}
37+
38+
func (s *InMemoryStore) GetLegacyRank(url string) (int, error) {
39+
once.Do(func() {
40+
var err error
41+
data, err = load()
42+
if err != nil {
43+
log.Println(err)
44+
}
45+
})
46+
47+
rank, ok := data[url]
48+
if !ok {
49+
return -1, ErrNotFound
50+
}
51+
return rank, nil
52+
}
53+
54+
func load() (map[string]int, error) {
55+
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
56+
defer cancel()
57+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip", nil)
58+
if err != nil {
59+
return nil, err
60+
}
61+
client := &http.Client{
62+
Timeout: time.Second * 10,
63+
}
64+
resp, err := client.Do(req)
65+
if err != nil {
66+
return nil, err
67+
}
68+
defer resp.Body.Close()
69+
b, err := io.ReadAll(resp.Body)
70+
if err != nil {
71+
return nil, err
72+
}
73+
zf, err := zip.NewReader(bytes.NewReader(b), int64(len(b)))
74+
if err != nil {
75+
return nil, err
76+
}
77+
f, err := zf.Open("top-1m.csv")
78+
if err != nil {
79+
return nil, err
80+
}
81+
defer f.Close()
82+
r := csv.NewReader(f)
83+
data := make(map[string]int)
84+
for {
85+
record, err := r.Read()
86+
if err == io.EOF {
87+
break
88+
}
89+
if err != nil {
90+
return nil, err
91+
}
92+
rank, err := strconv.Atoi(record[0])
93+
if err != nil {
94+
return nil, err
95+
}
96+
data[record[1]] = rank
97+
}
98+
return data, nil
99+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package legacyrank_test
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
"github.com/xray-web/web-check-api/checks/store/legacyrank"
8+
)
9+
10+
func TestInMemoryStore(t *testing.T) {
11+
t.Parallel()
12+
13+
t.Run("get google rank", func(t *testing.T) {
14+
t.Parallel()
15+
ims := legacyrank.NewInMemoryStore()
16+
dr, err := ims.GetLegacyRank("google.com")
17+
assert.NoError(t, err, dr)
18+
})
19+
20+
t.Run("get microsoft rank", func(t *testing.T) {
21+
t.Parallel()
22+
ims := legacyrank.NewInMemoryStore()
23+
dr, err := ims.GetLegacyRank("microsoft.com")
24+
assert.NoError(t, err, dr)
25+
})
26+
}

handlers/legacy_rank.go

+3-143
Original file line numberDiff line numberDiff line change
@@ -1,160 +1,20 @@
11
package handlers
22

33
import (
4-
"archive/zip"
5-
"encoding/csv"
6-
"fmt"
7-
"io"
84
"net/http"
9-
"net/url"
10-
"os"
11-
"path/filepath"
12-
"strings"
13-
)
145

15-
const (
16-
fileURL = "https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"
17-
tempFilePath = "/tmp/top-1m.csv"
6+
"github.com/xray-web/web-check-api/checks"
187
)
198

20-
type RankResponse struct {
21-
Domain string `json:"domain"`
22-
Rank string `json:"rank"`
23-
IsFound bool `json:"isFound"`
24-
}
25-
26-
func checkLegacyRank(urlStr string) (RankResponse, error) {
27-
var domain string
28-
var err error
29-
30-
// Parse the URL to extract the domain
31-
u, err := url.Parse(urlStr)
32-
if err != nil {
33-
return RankResponse{}, fmt.Errorf("invalid URL")
34-
}
35-
36-
// Extract the domain from the parsed URL
37-
if u.Host != "" {
38-
domain = u.Host
39-
} else {
40-
// If Host is empty, try to extract the domain from the Path
41-
parts := strings.Split(u.Path, "/")
42-
if len(parts) > 0 {
43-
domain = parts[0]
44-
} else {
45-
return RankResponse{}, fmt.Errorf("unable to extract domain from URL")
46-
}
47-
}
48-
49-
// Download and unzip the file if not in cache
50-
if _, err := os.Stat(tempFilePath); os.IsNotExist(err) {
51-
if err := downloadAndUnzip(fileURL); err != nil {
52-
return RankResponse{}, err
53-
}
54-
}
55-
56-
// Parse the CSV and find the rank
57-
file, err := os.Open(tempFilePath)
58-
if err != nil {
59-
return RankResponse{}, fmt.Errorf("error opening CSV file: %s", err)
60-
}
61-
defer file.Close()
62-
63-
reader := csv.NewReader(file)
64-
for {
65-
record, err := reader.Read()
66-
if err == io.EOF {
67-
break
68-
}
69-
if err != nil {
70-
return RankResponse{}, fmt.Errorf("error reading CSV record: %s", err)
71-
}
72-
73-
if record[1] == domain {
74-
return RankResponse{
75-
Domain: domain,
76-
Rank: record[0],
77-
IsFound: true,
78-
}, nil
79-
}
80-
}
81-
82-
return RankResponse{
83-
Domain: domain,
84-
IsFound: false,
85-
}, nil
86-
}
87-
88-
func downloadAndUnzip(url string) error {
89-
resp, err := http.Get(url)
90-
if err != nil {
91-
return fmt.Errorf("error downloading file: %s", err)
92-
}
93-
defer resp.Body.Close()
94-
95-
zipFile, err := os.Create(tempFilePath + ".zip")
96-
if err != nil {
97-
return fmt.Errorf("error creating zip file: %s", err)
98-
}
99-
defer zipFile.Close()
100-
101-
_, err = io.Copy(zipFile, resp.Body)
102-
if err != nil {
103-
return fmt.Errorf("error writing zip file: %s", err)
104-
}
105-
106-
err = unzip(tempFilePath+".zip", "/tmp")
107-
if err != nil {
108-
return fmt.Errorf("error unzipping file: %s", err)
109-
}
110-
111-
return nil
112-
}
113-
114-
func unzip(src, dest string) error {
115-
r, err := zip.OpenReader(src)
116-
if err != nil {
117-
return err
118-
}
119-
defer r.Close()
120-
121-
for _, f := range r.File {
122-
rc, err := f.Open()
123-
if err != nil {
124-
return err
125-
}
126-
defer rc.Close()
127-
128-
path := filepath.Join(dest, f.Name)
129-
if f.FileInfo().IsDir() {
130-
os.MkdirAll(path, f.Mode())
131-
} else {
132-
os.MkdirAll(filepath.Dir(path), os.ModePerm)
133-
f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
134-
if err != nil {
135-
return err
136-
}
137-
defer f.Close()
138-
139-
_, err = io.Copy(f, rc)
140-
if err != nil {
141-
return err
142-
}
143-
}
144-
}
145-
146-
return nil
147-
}
148-
149-
func HandleLegacyRank() http.Handler {
9+
func HandleLegacyRank(l *checks.LegacyRank) http.Handler {
15010
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
15111
rawURL, err := extractURL(r)
15212
if err != nil {
15313
JSONError(w, ErrMissingURLParameter, http.StatusBadRequest)
15414
return
15515
}
15616

157-
result, err := checkLegacyRank(rawURL.String())
17+
result, err := l.LegacyRank(rawURL.Hostname())
15818
if err != nil {
15919
JSONError(w, err, http.StatusInternalServerError)
16020
return

0 commit comments

Comments
 (0)