Skip to content

Commit d79bb5f

Browse files
committed
Add new options to encode paths and query values, add div metatable to add path segments
1 parent 120a9ae commit d79bb5f

File tree

3 files changed

+84
-41
lines changed

3 files changed

+84
-41
lines changed

lib/net/url.lua

+57-36
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,34 @@
55
-- @alias M
66

77
local M = {}
8-
M.version = "1.0.0"
8+
M.version = "1.1.0"
99

1010
--- url options
1111
-- - `separator` is set to `&` by default but could be anything like `&` or `;`
1212
-- - `cumulative_parameters` is false by default. If true, query parameters with the same name will be stored in a table.
13+
-- - `legal_in_path` is a table of characters that will not be url encoded in path components
14+
-- - `legal_in_query` is a table of characters that will not be url encoded in query values. Query parameters only support a small set of legal characters (-_.).
15+
-- - `query_plus_is_space` is true by default, so a plus sign in a query value will be converted to %20 (space), not %2B (plus)
1316
-- @todo Add option to limit the size of the argument table
1417
-- @todo Add option to limit the depth of the argument table
1518
-- @todo Add option to process dots in parameter names, ie. `param.filter=1`
1619
M.options = {
1720
separator = '&',
18-
cumulative_parameters = false
21+
cumulative_parameters = false,
22+
legal_in_path = {
23+
[":"] = true, ["-"] = true, ["_"] = true, ["."] = true,
24+
["!"] = true, ["~"] = true, ["*"] = true, ["'"] = true,
25+
["("] = true, [")"] = true, ["@"] = true, ["&"] = true,
26+
["="] = true, ["$"] = true, [","] = true,
27+
[";"] = true
28+
},
29+
legal_in_query = {
30+
[":"] = true, ["-"] = true, ["_"] = true, ["."] = true,
31+
[","] = true, ["!"] = true, ["~"] = true, ["*"] = true,
32+
["'"] = true, [";"] = true, ["("] = true, [")"] = true,
33+
["@"] = true, ["$"] = true,
34+
},
35+
query_plus_is_space = true
1936
}
2037

2138
--- list of known and common scheme ports
@@ -51,41 +68,42 @@ M.services = {
5168
videotex = 516
5269
}
5370

54-
local legal = {
55-
["-"] = true, ["_"] = true, ["."] = true, ["!"] = true,
56-
["~"] = true, ["*"] = true, ["'"] = true, ["("] = true,
57-
[")"] = true, [":"] = true, ["@"] = true, ["&"] = true,
58-
["="] = true, ["+"] = true, ["$"] = true, [","] = true,
59-
[";"] = true -- can be used for parameters in path
60-
}
61-
6271
local function decode(str)
63-
local str = str:gsub('+', ' ')
6472
return (str:gsub("%%(%x%x)", function(c)
65-
return string.char(tonumber(c, 16))
73+
return string.char(tonumber(c, 16))
6674
end))
6775
end
6876

69-
local function encode(str)
70-
return (str:gsub("([^A-Za-z0-9%_%.%-%~])", function(v)
71-
return string.upper(string.format("%%%02x", string.byte(v)))
77+
local function encode(str, legal)
78+
return (str:gsub("([^%w])", function(v)
79+
if legal[v] then
80+
return v
81+
end
82+
return string.upper(string.format("%%%02x", string.byte(v)))
7283
end))
7384
end
7485

75-
-- for query values, prefer + instead of %20 for spaces
76-
local function encodeValue(str)
77-
local str = encode(str)
78-
return str:gsub('%%20', '+')
86+
-- for query values, + can mean space if configured as such
87+
local function decodeValue(str)
88+
if M.options.query_plus_is_space then
89+
str = str:gsub('+', ' ')
90+
end
91+
return decode(str)
7992
end
8093

81-
local function encodeSegment(s)
82-
local legalEncode = function(c)
83-
if legal[c] then
84-
return c
85-
end
86-
return encode(c)
94+
local function concat(a, b)
95+
if type(a) == 'table' then
96+
return a:build() .. b
97+
else
98+
return a .. b:build()
8799
end
88-
return s:gsub('([^a-zA-Z0-9])', legalEncode)
100+
end
101+
102+
function M:addSegment(path)
103+
if type(path) == 'string' then
104+
self.path = self.path .. '/' .. encode(path:gsub("^/+", ""), M.options.legal_in_path)
105+
end
106+
return self
89107
end
90108

91109
--- builds the url
@@ -154,7 +172,7 @@ function M.buildQuery(tab, sep, key)
154172
end)
155173
for _,name in ipairs(keys) do
156174
local value = tab[name]
157-
name = encode(tostring(name))
175+
name = encode(tostring(name), {["-"] = true, ["_"] = true, ["."] = true})
158176
if key then
159177
if M.options.cumulative_parameters and string.find(name, '^%d+$') then
160178
name = tostring(key)
@@ -165,7 +183,7 @@ function M.buildQuery(tab, sep, key)
165183
if type(value) == 'table' then
166184
query[#query+1] = M.buildQuery(value, sep, name)
167185
else
168-
local value = encodeValue(decode(tostring(value)))
186+
local value = encode(tostring(value), M.options.legal_in_query)
169187
if value ~= "" then
170188
query[#query+1] = string.format('%s=%s', name, value)
171189
else
@@ -190,14 +208,14 @@ function M.parseQuery(str, sep)
190208

191209
local values = {}
192210
for key,val in str:gmatch(string.format('([^%q=]+)(=*[^%q=]*)', sep, sep)) do
193-
local key = decode(key)
211+
local key = decodeValue(key)
194212
local keys = {}
195213
key = key:gsub('%[([^%]]*)%]', function(v)
196214
-- extract keys between balanced brackets
197215
if string.find(v, "^-?%d+$") then
198216
v = tonumber(v)
199217
else
200-
v = decode(v)
218+
v = decodeValue(v)
201219
end
202220
table.insert(keys, v)
203221
return "="
@@ -212,11 +230,11 @@ function M.parseQuery(str, sep)
212230
if #keys > 0 and type(values[key]) ~= 'table' then
213231
values[key] = {}
214232
elseif #keys == 0 and type(values[key]) == 'table' then
215-
values[key] = decode(val)
233+
values[key] = decodeValue(val)
216234
elseif M.options.cumulative_parameters
217235
and type(values[key]) == 'string' then
218236
values[key] = { values[key] }
219-
table.insert(values[key], decode(val))
237+
table.insert(values[key], decodeValue(val))
220238
end
221239

222240
local t = values[key]
@@ -231,10 +249,11 @@ function M.parseQuery(str, sep)
231249
t[k] = {}
232250
end
233251
if i == #keys then
234-
t[k] = decode(val)
252+
t[k] = val
235253
end
236254
t = t[k]
237255
end
256+
238257
end
239258
setmetatable(values, { __tostring = M.buildQuery })
240259
return values
@@ -363,12 +382,14 @@ function M.parse(url)
363382
return ''
364383
end)
365384

366-
comp.path = url:gsub("([^/]+)", function (s) return encodeSegment(decode(s)) end)
385+
comp.path = url:gsub("([^/]+)", function (s) return encode(decode(s), M.options.legal_in_path) end)
367386

368387
setmetatable(comp, {
369388
__index = M,
370-
__tostring = M.build}
371-
)
389+
__tostring = M.build,
390+
__concat = concat,
391+
__div = M.addSegment
392+
})
372393
return comp
373394
end
374395

tests/query_test.lua

0 Bytes
Binary file not shown.

tests/url_test.lua

+27-5
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,15 @@ local url = require 'net.url'
66
local s
77
local q
88

9-
plan(127)
9+
plan(134)
1010

1111
local u = url.parse("http://www.example.com")
1212
u.query.net = "url"
1313
is("http://www.example.com/?net=url", tostring(u), "Test new query variable")
1414

1515
u.query.net = "url 2nd try"
16-
is("net=url+2nd+try", tostring(u.query), "Test build query")
17-
is("http://www.example.com/?net=url+2nd+try", tostring(u), "Test new query variable 2")
16+
is("net=url%202nd%20try", tostring(u.query), "Test build query")
17+
is("http://www.example.com/?net=url%202nd%20try", tostring(u), "Test new query variable 2")
1818

1919
u = url.parse("http://www.example.com/?last=mansion&first=bertrand&test=more")
2020
is("http://www.example.com/?first=bertrand&last=mansion&test=more", tostring(u), "Test sorted query")
@@ -134,7 +134,6 @@ local test2 = {
134134
["http://www.foo.com/%7ebar"] = "http://www.foo.com/~bar",
135135
["http://www.foo.com/%7Ebar"] = "http://www.foo.com/~bar",
136136
["http://www.foo.com/?p=529&cpage=1#comment-783"] = "http://www.foo.com/?p=529#038;cpage=1#comment-783",
137-
["http://www.foo.com/some +path/?args=foo%2Bbar"] = "http://www.foo.com/some%20%20path/?args=foo+bar",
138137
["/foo/bar/../../../baz"] = "/baz",
139138
["/foo/bar/../../../../baz"] = "/baz",
140139
["/./../foo"] = "/foo",
@@ -184,13 +183,36 @@ local test2 = {
184183
["http://example.com/A/./B"] = "http://example.com/A/./B", -- don't convert path case
185184
["/test"] = "/test", -- keep absolute paths
186185
["foo/bar"] = "foo/bar", -- keep relative paths
186+
-- encoding tests
187+
["https://google.com/Link with a space in it/"] = "https://google.com/Link%20with%20a%20space%20in%20it/",
187188
["https://google.com/Link%20with%20a%20space%20in%20it/"] = "https://google.com/Link%20with%20a%20space%20in%20it/",
188189
["https://google.com/a%2fb%2fc/"] = "https://google.com/a%2Fb%2Fc/",
189-
['//lua.org/path?query=1:2'] = "//lua.org/path?query=1%3A2",
190+
['//lua.org/path?query=1:2'] = "//lua.org/path?query=1:2",
191+
["http://www.foo.com/some +path/?args=foo%2Bbar"] = "http://www.foo.com/some%20%2Bpath/?args=foo%2Bbar",
192+
-- by default, a "plus" sign in query value is encoded as %20
193+
["http://www.foo.com/?args=foo+bar"] = "http://www.foo.com/?args=foo%20bar",
194+
-- by default, a space in query value is encoded as %20
195+
["http://www.foo.com/?args=foo bar"] = "http://www.foo.com/?args=foo%20bar",
196+
["http://www.foo.com/some%20%20path/?args=foo%20bar"] = "http://www.foo.com/some%20%20path/?args=foo%20bar",
197+
["http://www.foo.com/some%2B%2Bpath/?args=foo%2Bbar"] = "http://www.foo.com/some%2B%2Bpath/?args=foo%2Bbar",
190198
}
191199

192200
for k,v in pairs(test2) do
193201
local u = url.parse(k)
194202
is(tostring(u), v, "Test rebuild and clean '".. k.."' => '"..v..' => '..tostring(u))
195203
end
196204

205+
206+
local test3 = {
207+
-- can also encode plus sign as %2B instead of space (option)
208+
["http://www.foo.com/?args=foo+bar"] = "http://www.foo.com/?args=foo%2Bbar",
209+
-- can also leave plus sign alone in path (option)
210+
["http://www.foo.com/some +path/?args=foo+bar"] = "http://www.foo.com/some%20+path/?args=foo%2Bbar",
211+
}
212+
213+
for k,v in pairs(test3) do
214+
url.options.legal_in_path["+"] = true;
215+
url.options.query_plus_is_space = false;
216+
local u = url.parse(k)
217+
is(tostring(u), v, "Test plus sign '".. k.."' => '"..v..' => '..tostring(u))
218+
end

0 commit comments

Comments
 (0)