-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.go
125 lines (117 loc) · 3.43 KB
/
parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
package keralalottery
import (
"regexp"
"strconv"
"strings"
"github.com/ledongthuc/pdf"
)
func prizeString(prizeCount int) string {
var postfix string
prizeCountString := strconv.Itoa(prizeCount)
switch prizeCountString[len(prizeCountString)-1:] {
case "1":
postfix = "st"
case "2":
postfix = "nd"
case "3":
postfix = "rd"
default:
postfix = "th"
}
return `(` + prizeCountString + postfix + `|Consolation)`
}
// ParseLocalPDF extracts lottery results from a local pdf file
func ParseLocalPDF(path string) ([]Prize, error) {
file, reader, err := pdf.Open(path)
defer func() {
_ = file.Close()
}()
if err != nil {
return nil, err
}
return ParsePDFContents(reader)
}
// ParsePDFContents extracts lottery results from PDF contents
func ParsePDFContents(reader *pdf.Reader) ([]Prize, error) {
totalPage := reader.NumPage()
prizeCount := 1
prizeStarted := false
consolationStarted := false
prizeStopped := true
var prizes []Prize
for pageIndex := 1; pageIndex <= totalPage; pageIndex++ {
p := reader.Page(pageIndex)
if p.V.IsNull() {
continue
}
rows, _ := p.GetTextByRow()
for _, row := range rows {
for _, word := range row.Content {
trimmed := strings.TrimSpace(word.S)
if trimmed != "" {
if !prizeStarted {
re := regexp.MustCompile(prizeString(prizeCount) + ` Prize- (.+)`)
match := re.FindStringSubmatch(trimmed)
// First prize
if len(match) != 0 {
prizeStarted = true
prizeStopped = false
var prize Prize
prize.PrizeAmount = match[2]
prizes = append(prizes, prize)
}
// fmt.Println(prizeCount, match)
} else if !prizeStopped {
re1 := regexp.MustCompile(`^(\w\w \d+)$`)
matched1 := re1.FindStringSubmatch(trimmed)
if len(matched1) == 0 {
re2 := regexp.MustCompile(`^(\d+)$`)
matched2 := re2.FindStringSubmatch(trimmed)
if len(matched2) == 0 {
re3 := regexp.MustCompile(`^\(\w+\)$`)
matched3 := re3.FindStringSubmatch(trimmed)
if len(matched3) == 0 {
consolationStarted = false
re := regexp.MustCompile(prizeString(prizeCount+1) + ` Prize- (.+)`)
match := re.FindStringSubmatch(trimmed)
if len(match) == 0 {
if trimmed != "FOR THE TICKETS ENDING WITH THE FOLLOWING NUMBERS" {
return prizes, nil
}
} else {
if match[1] == "Consolation" {
consolationStarted = true
prizes[len(prizes)-1].ConsolationPresent = true
prizes[len(prizes)-1].Consolation.PrizeAmount = match[2]
} else {
prizeCount++
var prize Prize
prize.PrizeAmount = match[2]
prizes = append(prizes, prize)
}
prizeStarted = true
prizeStopped = false
}
}
// re2 := regexp.MustCompile(`^(\d+)$`)
} else {
if consolationStarted {
prizes[len(prizes)-1].Consolation.Winners = append(prizes[len(prizes)-1].Consolation.Winners, matched2[0])
} else {
prizes[len(prizes)-1].Winners = append(prizes[len(prizes)-1].Winners, matched2[0])
}
}
} else {
if consolationStarted {
prizes[len(prizes)-1].Consolation.Winners = append(prizes[len(prizes)-1].Consolation.Winners, matched1[0])
} else {
prizes[len(prizes)-1].Winners = append(prizes[len(prizes)-1].Winners, matched1[0])
}
}
}
}
}
}
}
return prizes, nil
}