mirror of
https://github.com/honojs/hono.git
synced 2024-11-21 10:08:58 +01:00
feat: decode percent-encoded path in getPath (#2714)
* feat: decode percent-encoded path in `getPath`
* refactor: Stop decoding URIs in the `param()` method, since they are already decoded in `getPath()`
* test: add tests for decoding URI in path
* chore: denoify
* Revert "refactor: Stop decoding URIs in the `param()` method, since they are already decoded in `getPath()`"
This reverts commit 7192497e69
.
* refactor: Replace "%25" before applying decodeURI() for avoid double decoding
Co-authored-by: Szymon Marczak <36894700+szmarczak@users.noreply.github.com>
* chore: denoify
* refactor(utils): check existence of "%25" before replacing it with "%2525"
Co-authored-by: Szymon Marczak <36894700+szmarczak@users.noreply.github.com>
* feat(utils/url): Changed URL decoding to skip invalid sequences and decode as much as possible
* chore: denoify
---------
Co-authored-by: Szymon Marczak <36894700+szmarczak@users.noreply.github.com>
This commit is contained in:
parent
8cb59e400f
commit
778ac10e84
@ -5,15 +5,61 @@ bench('noop', () => {})
|
||||
const request = new Request('http://localhost/about/me')
|
||||
|
||||
group('getPath', () => {
|
||||
bench('slice + indexOf : w/o decodeURI', () => {
|
||||
const url = request.url
|
||||
const queryIndex = url.indexOf('?', 8)
|
||||
return url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
|
||||
})
|
||||
|
||||
bench('regexp : w/o decodeURI', () => {
|
||||
const match = request.url.match(/^https?:\/\/[^/]+(\/[^?]*)/)
|
||||
return match ? match[1] : ''
|
||||
})
|
||||
|
||||
bench('slice + indexOf', () => {
|
||||
const url = request.url
|
||||
const queryIndex = url.indexOf('?', 8)
|
||||
url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
|
||||
const path = url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
|
||||
return path.includes('%') ? decodeURIComponent(path) : path
|
||||
})
|
||||
|
||||
bench('regexp', () => {
|
||||
const match = request.url.match(/^https?:\/\/[^/]+(\/[^?]*)/)
|
||||
match ? match[1] : ''
|
||||
bench('slice + for-loop + flag', () => {
|
||||
const url = request.url
|
||||
let start = url.indexOf('/', 8)
|
||||
let i = start
|
||||
let hasPercentEncoding = false
|
||||
for (; i < url.length; i++) {
|
||||
const charCode = url.charCodeAt(i)
|
||||
if (charCode === 37) {
|
||||
// '%'
|
||||
hasPercentEncoding = true
|
||||
} else if (charCode === 63) {
|
||||
// '?'
|
||||
break
|
||||
}
|
||||
}
|
||||
return hasPercentEncoding ? decodeURIComponent(url.slice(start, i)) : url.slice(start, i)
|
||||
})
|
||||
|
||||
bench('slice + for-loop + immediate return', () => {
|
||||
const url = request.url
|
||||
const start = url.indexOf('/', 8)
|
||||
let i = start
|
||||
for (; i < url.length; i++) {
|
||||
const charCode = url.charCodeAt(i)
|
||||
if (charCode === 37) {
|
||||
// '%'
|
||||
// If the path contains percent encoding, use `indexOf()` to find '?' and return the result immediately.
|
||||
// Although this is a performance disadvantage, it is acceptable since we prefer cases that do not include percent encoding.
|
||||
const queryIndex = url.indexOf('?', i)
|
||||
const path = url.slice(start, queryIndex === -1 ? undefined : queryIndex)
|
||||
return decodeURI(path.includes('%25') ? path.replace(/%25/g, '%2525') : path)
|
||||
} else if (charCode === 63) {
|
||||
// '?'
|
||||
break
|
||||
}
|
||||
}
|
||||
return url.slice(start, i)
|
||||
})
|
||||
})
|
||||
|
||||
|
@ -69,11 +69,48 @@ export const getPattern = (label: string): Pattern | null => {
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to apply decodeURI() to given string.
|
||||
* If it fails, skip invalid percent encoding or invalid UTF-8 sequences, and apply decodeURI() to the rest as much as possible.
|
||||
* @param str The string to decode.
|
||||
* @returns The decoded string that sometimes contains undecodable percent encoding.
|
||||
* @example
|
||||
* tryDecodeURI('Hello%20World') // 'Hello World'
|
||||
* tryDecodeURI('Hello%20World/%A4%A2') // 'Hello World/%A4%A2'
|
||||
*/
|
||||
const tryDecodeURI = (str: string): string => {
|
||||
try {
|
||||
return decodeURI(str)
|
||||
} catch {
|
||||
return str.replace(/(?:%[0-9A-Fa-f]{2})+/g, (match) => {
|
||||
try {
|
||||
return decodeURI(match)
|
||||
} catch {
|
||||
return match
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
export const getPath = (request: Request): string => {
|
||||
// Optimized: indexOf() + slice() is faster than RegExp
|
||||
const url = request.url
|
||||
const queryIndex = url.indexOf('?', 8)
|
||||
return url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
|
||||
const start = url.indexOf('/', 8)
|
||||
let i = start
|
||||
for (; i < url.length; i++) {
|
||||
const charCode = url.charCodeAt(i)
|
||||
if (charCode === 37) {
|
||||
// '%'
|
||||
// If the path contains percent encoding, use `indexOf()` to find '?' and return the result immediately.
|
||||
// Although this is a performance disadvantage, it is acceptable since we prefer cases that do not include percent encoding.
|
||||
const queryIndex = url.indexOf('?', i)
|
||||
const path = url.slice(start, queryIndex === -1 ? undefined : queryIndex)
|
||||
return tryDecodeURI(path.includes('%25') ? path.replace(/%25/g, '%2525') : path)
|
||||
} else if (charCode === 63) {
|
||||
// '?'
|
||||
break
|
||||
}
|
||||
}
|
||||
return url.slice(start, i)
|
||||
}
|
||||
|
||||
export const getQueryStrings = (url: string): string => {
|
||||
|
@ -732,6 +732,89 @@ describe('Routing', () => {
|
||||
expect(res.status).toBe(404)
|
||||
})
|
||||
})
|
||||
|
||||
describe('Encoded path', () => {
|
||||
let app: Hono
|
||||
beforeEach(() => {
|
||||
app = new Hono()
|
||||
})
|
||||
|
||||
it('should decode path parameter', async () => {
|
||||
app.get('/users/:id', (c) => c.text(`id is ${c.req.param('id')}`))
|
||||
|
||||
const res = await app.request('http://localhost/users/%C3%A7awa%20y%C3%AE%3F')
|
||||
expect(res.status).toBe(200)
|
||||
expect(await res.text()).toBe('id is çawa yî?')
|
||||
})
|
||||
|
||||
it('should decode "/"', async () => {
|
||||
app.get('/users/:id', (c) => c.text(`id is ${c.req.param('id')}`))
|
||||
|
||||
const res = await app.request('http://localhost/users/hono%2Fposts') // %2F is '/'
|
||||
expect(res.status).toBe(200)
|
||||
expect(await res.text()).toBe('id is hono/posts')
|
||||
})
|
||||
|
||||
it('should decode alphabets', async () => {
|
||||
app.get('/users/static', (c) => c.text('static'))
|
||||
|
||||
const res = await app.request('http://localhost/users/%73tatic') // %73 is 's'
|
||||
expect(res.status).toBe(200)
|
||||
expect(await res.text()).toBe('static')
|
||||
})
|
||||
|
||||
it('should decode alphabets with invalid UTF-8 sequence', async () => {
|
||||
app.get('/static/:path', (c) => {
|
||||
try {
|
||||
return c.text(`by c.req.param: ${c.req.param('path')}`) // this should throw an error
|
||||
} catch (e) {
|
||||
return c.text(`by c.req.url: ${c.req.url.replace(/.*\//, '')}`)
|
||||
}
|
||||
})
|
||||
|
||||
const res = await app.request('http://localhost/%73tatic/%A4%A2') // %73 is 's', %A4%A2 is invalid UTF-8 sequence
|
||||
expect(res.status).toBe(200)
|
||||
expect(await res.text()).toBe('by c.req.url: %A4%A2')
|
||||
})
|
||||
|
||||
it('should decode alphabets with invalid percent encoding', async () => {
|
||||
app.get('/static/:path', (c) => {
|
||||
try {
|
||||
return c.text(`by c.req.param: ${c.req.param('path')}`) // this should throw an error
|
||||
} catch (e) {
|
||||
return c.text(`by c.req.url: ${c.req.url.replace(/.*\//, '')}`)
|
||||
}
|
||||
})
|
||||
|
||||
const res = await app.request('http://localhost/%73tatic/%a') // %73 is 's', %a is invalid percent encoding
|
||||
expect(res.status).toBe(200)
|
||||
expect(await res.text()).toBe('by c.req.url: %a')
|
||||
})
|
||||
|
||||
it('should be able to catch URIError', async () => {
|
||||
app.onError((err, c) => {
|
||||
if (err instanceof URIError) {
|
||||
return c.text(err.message, 400)
|
||||
}
|
||||
throw err
|
||||
})
|
||||
app.get('/static/:path', (c) => {
|
||||
return c.text(`by c.req.param: ${c.req.param('path')}`) // this should throw an error
|
||||
})
|
||||
|
||||
const res = await app.request('http://localhost/%73tatic/%a') // %73 is 's', %a is invalid percent encoding
|
||||
expect(res.status).toBe(400)
|
||||
expect(await res.text()).toBe('URI malformed')
|
||||
})
|
||||
|
||||
it('should not double decode', async () => {
|
||||
app.get('/users/:id', (c) => c.text(`posts of ${c.req.param('id')}`))
|
||||
|
||||
const res = await app.request('http://localhost/users/%2525') // %25 is '%'
|
||||
expect(res.status).toBe(200)
|
||||
expect(await res.text()).toBe('posts of %25')
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('param and query', () => {
|
||||
|
@ -69,11 +69,48 @@ export const getPattern = (label: string): Pattern | null => {
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to apply decodeURI() to given string.
|
||||
* If it fails, skip invalid percent encoding or invalid UTF-8 sequences, and apply decodeURI() to the rest as much as possible.
|
||||
* @param str The string to decode.
|
||||
* @returns The decoded string that sometimes contains undecodable percent encoding.
|
||||
* @example
|
||||
* tryDecodeURI('Hello%20World') // 'Hello World'
|
||||
* tryDecodeURI('Hello%20World/%A4%A2') // 'Hello World/%A4%A2'
|
||||
*/
|
||||
const tryDecodeURI = (str: string): string => {
|
||||
try {
|
||||
return decodeURI(str)
|
||||
} catch {
|
||||
return str.replace(/(?:%[0-9A-Fa-f]{2})+/g, (match) => {
|
||||
try {
|
||||
return decodeURI(match)
|
||||
} catch {
|
||||
return match
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
export const getPath = (request: Request): string => {
|
||||
// Optimized: indexOf() + slice() is faster than RegExp
|
||||
const url = request.url
|
||||
const queryIndex = url.indexOf('?', 8)
|
||||
return url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
|
||||
const start = url.indexOf('/', 8)
|
||||
let i = start
|
||||
for (; i < url.length; i++) {
|
||||
const charCode = url.charCodeAt(i)
|
||||
if (charCode === 37) {
|
||||
// '%'
|
||||
// If the path contains percent encoding, use `indexOf()` to find '?' and return the result immediately.
|
||||
// Although this is a performance disadvantage, it is acceptable since we prefer cases that do not include percent encoding.
|
||||
const queryIndex = url.indexOf('?', i)
|
||||
const path = url.slice(start, queryIndex === -1 ? undefined : queryIndex)
|
||||
return tryDecodeURI(path.includes('%25') ? path.replace(/%25/g, '%2525') : path)
|
||||
} else if (charCode === 63) {
|
||||
// '?'
|
||||
break
|
||||
}
|
||||
}
|
||||
return url.slice(start, i)
|
||||
}
|
||||
|
||||
export const getQueryStrings = (url: string): string => {
|
||||
|
Loading…
Reference in New Issue
Block a user