0
0
mirror of https://github.com/honojs/hono.git synced 2024-11-21 10:08:58 +01:00

feat: decode percent-encoded path in getPath (#2714)

* feat: decode percent-encoded path in `getPath`

* refactor: Stop decoding URIs in the `param()` method, since they are already decoded in `getPath()`

* test: add tests for decoding URI in path

* chore: denoify

* Revert "refactor: Stop decoding URIs in the `param()` method, since they are already decoded in `getPath()`"

This reverts commit 7192497e69.

* refactor: Replace "%25" before applying decodeURI() for avoid double decoding

Co-authored-by: Szymon Marczak <36894700+szmarczak@users.noreply.github.com>

* chore: denoify

* refactor(utils): check existence of "%25" before replacing it with "%2525"

Co-authored-by: Szymon Marczak <36894700+szmarczak@users.noreply.github.com>

* feat(utils/url): Changed URL decoding to skip invalid sequences and decode as much as possible

* chore: denoify

---------

Co-authored-by: Szymon Marczak <36894700+szmarczak@users.noreply.github.com>
This commit is contained in:
Taku Amano 2024-05-23 05:44:00 +09:00 committed by GitHub
parent 8cb59e400f
commit 778ac10e84
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 213 additions and 10 deletions

View File

@ -5,15 +5,61 @@ bench('noop', () => {})
const request = new Request('http://localhost/about/me')
group('getPath', () => {
bench('slice + indexOf : w/o decodeURI', () => {
const url = request.url
const queryIndex = url.indexOf('?', 8)
return url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
})
bench('regexp : w/o decodeURI', () => {
const match = request.url.match(/^https?:\/\/[^/]+(\/[^?]*)/)
return match ? match[1] : ''
})
bench('slice + indexOf', () => {
const url = request.url
const queryIndex = url.indexOf('?', 8)
url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
const path = url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
return path.includes('%') ? decodeURIComponent(path) : path
})
bench('regexp', () => {
const match = request.url.match(/^https?:\/\/[^/]+(\/[^?]*)/)
match ? match[1] : ''
bench('slice + for-loop + flag', () => {
const url = request.url
let start = url.indexOf('/', 8)
let i = start
let hasPercentEncoding = false
for (; i < url.length; i++) {
const charCode = url.charCodeAt(i)
if (charCode === 37) {
// '%'
hasPercentEncoding = true
} else if (charCode === 63) {
// '?'
break
}
}
return hasPercentEncoding ? decodeURIComponent(url.slice(start, i)) : url.slice(start, i)
})
bench('slice + for-loop + immediate return', () => {
const url = request.url
const start = url.indexOf('/', 8)
let i = start
for (; i < url.length; i++) {
const charCode = url.charCodeAt(i)
if (charCode === 37) {
// '%'
// If the path contains percent encoding, use `indexOf()` to find '?' and return the result immediately.
// Although this is a performance disadvantage, it is acceptable since we prefer cases that do not include percent encoding.
const queryIndex = url.indexOf('?', i)
const path = url.slice(start, queryIndex === -1 ? undefined : queryIndex)
return decodeURI(path.includes('%25') ? path.replace(/%25/g, '%2525') : path)
} else if (charCode === 63) {
// '?'
break
}
}
return url.slice(start, i)
})
})

View File

@ -69,11 +69,48 @@ export const getPattern = (label: string): Pattern | null => {
return null
}
/**
* Try to apply decodeURI() to given string.
* If it fails, skip invalid percent encoding or invalid UTF-8 sequences, and apply decodeURI() to the rest as much as possible.
* @param str The string to decode.
* @returns The decoded string that sometimes contains undecodable percent encoding.
* @example
* tryDecodeURI('Hello%20World') // 'Hello World'
* tryDecodeURI('Hello%20World/%A4%A2') // 'Hello World/%A4%A2'
*/
const tryDecodeURI = (str: string): string => {
try {
return decodeURI(str)
} catch {
return str.replace(/(?:%[0-9A-Fa-f]{2})+/g, (match) => {
try {
return decodeURI(match)
} catch {
return match
}
})
}
}
export const getPath = (request: Request): string => {
// Optimized: indexOf() + slice() is faster than RegExp
const url = request.url
const queryIndex = url.indexOf('?', 8)
return url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
const start = url.indexOf('/', 8)
let i = start
for (; i < url.length; i++) {
const charCode = url.charCodeAt(i)
if (charCode === 37) {
// '%'
// If the path contains percent encoding, use `indexOf()` to find '?' and return the result immediately.
// Although this is a performance disadvantage, it is acceptable since we prefer cases that do not include percent encoding.
const queryIndex = url.indexOf('?', i)
const path = url.slice(start, queryIndex === -1 ? undefined : queryIndex)
return tryDecodeURI(path.includes('%25') ? path.replace(/%25/g, '%2525') : path)
} else if (charCode === 63) {
// '?'
break
}
}
return url.slice(start, i)
}
export const getQueryStrings = (url: string): string => {

View File

@ -732,6 +732,89 @@ describe('Routing', () => {
expect(res.status).toBe(404)
})
})
describe('Encoded path', () => {
let app: Hono
beforeEach(() => {
app = new Hono()
})
it('should decode path parameter', async () => {
app.get('/users/:id', (c) => c.text(`id is ${c.req.param('id')}`))
const res = await app.request('http://localhost/users/%C3%A7awa%20y%C3%AE%3F')
expect(res.status).toBe(200)
expect(await res.text()).toBe('id is çawa yî?')
})
it('should decode "/"', async () => {
app.get('/users/:id', (c) => c.text(`id is ${c.req.param('id')}`))
const res = await app.request('http://localhost/users/hono%2Fposts') // %2F is '/'
expect(res.status).toBe(200)
expect(await res.text()).toBe('id is hono/posts')
})
it('should decode alphabets', async () => {
app.get('/users/static', (c) => c.text('static'))
const res = await app.request('http://localhost/users/%73tatic') // %73 is 's'
expect(res.status).toBe(200)
expect(await res.text()).toBe('static')
})
it('should decode alphabets with invalid UTF-8 sequence', async () => {
app.get('/static/:path', (c) => {
try {
return c.text(`by c.req.param: ${c.req.param('path')}`) // this should throw an error
} catch (e) {
return c.text(`by c.req.url: ${c.req.url.replace(/.*\//, '')}`)
}
})
const res = await app.request('http://localhost/%73tatic/%A4%A2') // %73 is 's', %A4%A2 is invalid UTF-8 sequence
expect(res.status).toBe(200)
expect(await res.text()).toBe('by c.req.url: %A4%A2')
})
it('should decode alphabets with invalid percent encoding', async () => {
app.get('/static/:path', (c) => {
try {
return c.text(`by c.req.param: ${c.req.param('path')}`) // this should throw an error
} catch (e) {
return c.text(`by c.req.url: ${c.req.url.replace(/.*\//, '')}`)
}
})
const res = await app.request('http://localhost/%73tatic/%a') // %73 is 's', %a is invalid percent encoding
expect(res.status).toBe(200)
expect(await res.text()).toBe('by c.req.url: %a')
})
it('should be able to catch URIError', async () => {
app.onError((err, c) => {
if (err instanceof URIError) {
return c.text(err.message, 400)
}
throw err
})
app.get('/static/:path', (c) => {
return c.text(`by c.req.param: ${c.req.param('path')}`) // this should throw an error
})
const res = await app.request('http://localhost/%73tatic/%a') // %73 is 's', %a is invalid percent encoding
expect(res.status).toBe(400)
expect(await res.text()).toBe('URI malformed')
})
it('should not double decode', async () => {
app.get('/users/:id', (c) => c.text(`posts of ${c.req.param('id')}`))
const res = await app.request('http://localhost/users/%2525') // %25 is '%'
expect(res.status).toBe(200)
expect(await res.text()).toBe('posts of %25')
})
})
})
describe('param and query', () => {

View File

@ -69,11 +69,48 @@ export const getPattern = (label: string): Pattern | null => {
return null
}
/**
* Try to apply decodeURI() to given string.
* If it fails, skip invalid percent encoding or invalid UTF-8 sequences, and apply decodeURI() to the rest as much as possible.
* @param str The string to decode.
* @returns The decoded string that sometimes contains undecodable percent encoding.
* @example
* tryDecodeURI('Hello%20World') // 'Hello World'
* tryDecodeURI('Hello%20World/%A4%A2') // 'Hello World/%A4%A2'
*/
const tryDecodeURI = (str: string): string => {
try {
return decodeURI(str)
} catch {
return str.replace(/(?:%[0-9A-Fa-f]{2})+/g, (match) => {
try {
return decodeURI(match)
} catch {
return match
}
})
}
}
export const getPath = (request: Request): string => {
// Optimized: indexOf() + slice() is faster than RegExp
const url = request.url
const queryIndex = url.indexOf('?', 8)
return url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
const start = url.indexOf('/', 8)
let i = start
for (; i < url.length; i++) {
const charCode = url.charCodeAt(i)
if (charCode === 37) {
// '%'
// If the path contains percent encoding, use `indexOf()` to find '?' and return the result immediately.
// Although this is a performance disadvantage, it is acceptable since we prefer cases that do not include percent encoding.
const queryIndex = url.indexOf('?', i)
const path = url.slice(start, queryIndex === -1 ? undefined : queryIndex)
return tryDecodeURI(path.includes('%25') ? path.replace(/%25/g, '%2525') : path)
} else if (charCode === 63) {
// '?'
break
}
}
return url.slice(start, i)
}
export const getQueryStrings = (url: string): string => {