-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathwatch.ss
175 lines (152 loc) · 5.84 KB
/
watch.ss
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
;; -*- Gerbil -*-
;;;; Watching and restarting OS-spawned subprocesses
;;
;; TODO:
;; V1:
;; Store the PID in a watch file
;; Don't worry too much about atomicity, there will be a human operator watching
;;
;; V2:
;; Have a poor man's ORB store the communication end-points and PIDs in a SQLite DB.
;; Use two-phase commit so the new process only actually starts after its PID is registered.
;; When gracefully killing an old process, minimize the time during which neither is active.
;;
;; V3:
;; Go get Erlang documentation, do the same things that they do.
;; Make sure that spawning and registering actors in the "process tree" is atomic.
;; Have an ORB for services to find each other? Use the actual Erlang protocol? dbus or similar?
;;
;; V4:
;; Have a migration protocol so the old process can migrate state (snapshot + increments)
;; to the new one before the new one becomes master and it dies, ensuring no connection is lost.
;;
(export #t)
(import
:gerbil/gambit
:std/actor
:std/format
:std/logger
:std/misc/list
:std/misc/ports
:std/misc/process
(only-in :std/parser/ll1
ll1/port ll1-skip-space* ll1-uint ll1-separated ll1-eof)
(only-in :std/pregexp pregexp-match)
(only-in :std/srfi/13 string-every)
(only-in :std/sugar while try finally hash ignore-errors)
./base
./timestamp
./ffi
./files
./json
./list
./logger
./path-config)
;; Class Daemon-Status-Register
(defclass daemon-status-register ())
;; : <- Daemon-Status-Register Daemon-Status
(defmethod {write daemon-status-register} undefined)
;; : Daemon-Status <- Daemon-Status-Register
(defmethod {read daemon-status-register} undefined)
;; Store the status in a file.
;; Class Trivial-Daemon-Status-Register
(defclass (trivial-daemon-status-register daemon-status-register)
(path))
(defmethod {write trivial-daemon-status-register}
(λ (self status) (write-file-json (@ self path) status)))
(defmethod {read trivial-daemon-status-register}
(λ (self) (read-file-json (@ self path))))
;; Schema for status:
;; "previous" => previous process PID
;; "current" => current process PID
(defonce (daemon-watch-logger) (json-run-logger "daemon-watch"))
;; Kill a daemon of given pid.
;; But first check that the PID is still assigned to a relevant daemon process,
;; by calling the daemon-pid? predicate, so we don't kill a random process with same PID.
;; First kill with -1 (SIGHUP). Then Give the process a 5 second grace delay before killing with kill -9.
(def (kill-daemon name pid (daemon-pid? true))
(def (kill-it signal message)
((daemon-watch-logger) [message pid (command-line<-pid pid)])
(kill pid signal))
(cond
((not pid)
(void))
((not (daemon-pid? pid))
((daemon-watch-logger)
["Not killing" name pid (command-line<-pid pid)]))
(else
(kill-it SIGTERM "Daemon please die (SIGTERM)")
(spawn/name
['coup-de-grace pid]
(λ ()
(sleep (* 5 one-second))
(when (daemon-pid? pid)
(kill-it SIGKILL "Die vile daemon scum (SIGKILL)"))))))
(void))
(def (process-running? process)
(not (ignore-errors (process-status process))))
(def (daemon-watcher
name: name
launch-daemon: launch-daemon
daemon-status-register: daemon-status-register
grace-period: grace-period ;; time after startup that the process isn't watched
watch-period: watch-period ;; time after which to test again
daemon-pid?: daemon-pid? ;; does the PID still refer to the daemon?
daemon-healthy?: healthy?) ;; is the daemon process healthy?
;; TODO: have some locking mechanism
(def current-status (or (ignore-errors {read daemon-status-register}) (hash)))
(kill-daemon name (hash-get current-status "previous") daemon-pid?)
(kill-daemon name (hash-get current-status "current") daemon-pid?)
;; TODO: have a handshake so we only kill the present when the new one is ready
;; TODO: make the current one the previous one after you launch the new one
;; but before you complete the handshake.
;; TODO: make it an asynchronous actor that can immediately sense process death, etc.
(while #t
(let* ((process (launch-daemon))
(pid (process-pid process)))
(try
{write daemon-status-register (hash ("current" pid))}
((daemon-watch-logger) ["I summon thee daemon" name pid (command-line<-pid pid)])
(sleep grace-period)
(while (and (process-running? process) (healthy? process))
(sleep watch-period))
(kill-daemon name pid daemon-pid?)
(finally
(close-port process))))))
(def (read-null-delimited-string-list port)
(nest
(and port)
(with-list-builder (c))
(let loop ())
(let ((arg (read-line port (integer->char 0)))))
(unless (eof-object? arg)
(c arg)
(loop))))
(def (read-integer-list port)
(and port (ll1/port (ll1-separated ll1-uint ll1-skip-space* ll1-eof) port)))
;; TODO: make it portable beyond Linux. At least make it error out outside Linux.
;; TODO: is ignore-errors working? Should we use it?
(def (command-line<-pid pid)
(ignore-errors
(call-with-input-file [path: (format "/proc/~d/cmdline" pid)]
(λ (port)
(and port (read-null-delimited-string-list port))))))
;; TODO: find a home for this function
(def (string-all-digits? s)
(string-every char-numeric? s))
;; Linux specific!
(def (all-pids)
(map string->number (filter string-all-digits? (directory-files "/proc"))))
;; Linux specific!
(def (pid-statm pid)
(ignore-errors
(call-with-input-file [path: (format "/proc/~d/statm" pid)] read-integer-list)))
;; Linux specific!
(def (pid-fds pid)
(ignore-errors
(map string->number (filter string-all-digits? (directory-files (format "/proc/~d/fd" pid))))))
;; Linux specific!
(def (meminfo)
(map (λ (line) (match (pregexp-match "^([A-Za-z0-9_()]+): +([0-9]+)(:? kB)?$" line)
([_ name num unit] [name (string->number num)])))
(read-file-lines "/proc/meminfo")))