diff --git a/benchmarks/utils/src/get-path.ts b/benchmarks/utils/src/get-path.ts index f405338a..48eebe14 100644 --- a/benchmarks/utils/src/get-path.ts +++ b/benchmarks/utils/src/get-path.ts @@ -5,15 +5,61 @@ bench('noop', () => {}) const request = new Request('http://localhost/about/me') group('getPath', () => { + bench('slice + indexOf : w/o decodeURI', () => { + const url = request.url + const queryIndex = url.indexOf('?', 8) + return url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex) + }) + + bench('regexp : w/o decodeURI', () => { + const match = request.url.match(/^https?:\/\/[^/]+(\/[^?]*)/) + return match ? match[1] : '' + }) + bench('slice + indexOf', () => { const url = request.url const queryIndex = url.indexOf('?', 8) - url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex) + const path = url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex) + return path.includes('%') ? decodeURIComponent(path) : path }) - bench('regexp', () => { - const match = request.url.match(/^https?:\/\/[^/]+(\/[^?]*)/) - match ? match[1] : '' + bench('slice + for-loop + flag', () => { + const url = request.url + let start = url.indexOf('/', 8) + let i = start + let hasPercentEncoding = false + for (; i < url.length; i++) { + const charCode = url.charCodeAt(i) + if (charCode === 37) { + // '%' + hasPercentEncoding = true + } else if (charCode === 63) { + // '?' + break + } + } + return hasPercentEncoding ? decodeURIComponent(url.slice(start, i)) : url.slice(start, i) + }) + + bench('slice + for-loop + immediate return', () => { + const url = request.url + const start = url.indexOf('/', 8) + let i = start + for (; i < url.length; i++) { + const charCode = url.charCodeAt(i) + if (charCode === 37) { + // '%' + // If the path contains percent encoding, use `indexOf()` to find '?' and return the result immediately. + // Although this is a performance disadvantage, it is acceptable since we prefer cases that do not include percent encoding. + const queryIndex = url.indexOf('?', i) + const path = url.slice(start, queryIndex === -1 ? undefined : queryIndex) + return decodeURI(path.includes('%25') ? path.replace(/%25/g, '%2525') : path) + } else if (charCode === 63) { + // '?' + break + } + } + return url.slice(start, i) }) }) diff --git a/deno_dist/utils/url.ts b/deno_dist/utils/url.ts index b37a5539..58349064 100644 --- a/deno_dist/utils/url.ts +++ b/deno_dist/utils/url.ts @@ -69,11 +69,48 @@ export const getPattern = (label: string): Pattern | null => { return null } +/** + * Try to apply decodeURI() to given string. + * If it fails, skip invalid percent encoding or invalid UTF-8 sequences, and apply decodeURI() to the rest as much as possible. + * @param str The string to decode. + * @returns The decoded string that sometimes contains undecodable percent encoding. + * @example + * tryDecodeURI('Hello%20World') // 'Hello World' + * tryDecodeURI('Hello%20World/%A4%A2') // 'Hello World/%A4%A2' + */ +const tryDecodeURI = (str: string): string => { + try { + return decodeURI(str) + } catch { + return str.replace(/(?:%[0-9A-Fa-f]{2})+/g, (match) => { + try { + return decodeURI(match) + } catch { + return match + } + }) + } +} + export const getPath = (request: Request): string => { - // Optimized: indexOf() + slice() is faster than RegExp const url = request.url - const queryIndex = url.indexOf('?', 8) - return url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex) + const start = url.indexOf('/', 8) + let i = start + for (; i < url.length; i++) { + const charCode = url.charCodeAt(i) + if (charCode === 37) { + // '%' + // If the path contains percent encoding, use `indexOf()` to find '?' and return the result immediately. + // Although this is a performance disadvantage, it is acceptable since we prefer cases that do not include percent encoding. + const queryIndex = url.indexOf('?', i) + const path = url.slice(start, queryIndex === -1 ? undefined : queryIndex) + return tryDecodeURI(path.includes('%25') ? path.replace(/%25/g, '%2525') : path) + } else if (charCode === 63) { + // '?' + break + } + } + return url.slice(start, i) } export const getQueryStrings = (url: string): string => { diff --git a/src/hono.test.ts b/src/hono.test.ts index 3a79dac8..89607d2a 100644 --- a/src/hono.test.ts +++ b/src/hono.test.ts @@ -732,6 +732,89 @@ describe('Routing', () => { expect(res.status).toBe(404) }) }) + + describe('Encoded path', () => { + let app: Hono + beforeEach(() => { + app = new Hono() + }) + + it('should decode path parameter', async () => { + app.get('/users/:id', (c) => c.text(`id is ${c.req.param('id')}`)) + + const res = await app.request('http://localhost/users/%C3%A7awa%20y%C3%AE%3F') + expect(res.status).toBe(200) + expect(await res.text()).toBe('id is çawa yî?') + }) + + it('should decode "/"', async () => { + app.get('/users/:id', (c) => c.text(`id is ${c.req.param('id')}`)) + + const res = await app.request('http://localhost/users/hono%2Fposts') // %2F is '/' + expect(res.status).toBe(200) + expect(await res.text()).toBe('id is hono/posts') + }) + + it('should decode alphabets', async () => { + app.get('/users/static', (c) => c.text('static')) + + const res = await app.request('http://localhost/users/%73tatic') // %73 is 's' + expect(res.status).toBe(200) + expect(await res.text()).toBe('static') + }) + + it('should decode alphabets with invalid UTF-8 sequence', async () => { + app.get('/static/:path', (c) => { + try { + return c.text(`by c.req.param: ${c.req.param('path')}`) // this should throw an error + } catch (e) { + return c.text(`by c.req.url: ${c.req.url.replace(/.*\//, '')}`) + } + }) + + const res = await app.request('http://localhost/%73tatic/%A4%A2') // %73 is 's', %A4%A2 is invalid UTF-8 sequence + expect(res.status).toBe(200) + expect(await res.text()).toBe('by c.req.url: %A4%A2') + }) + + it('should decode alphabets with invalid percent encoding', async () => { + app.get('/static/:path', (c) => { + try { + return c.text(`by c.req.param: ${c.req.param('path')}`) // this should throw an error + } catch (e) { + return c.text(`by c.req.url: ${c.req.url.replace(/.*\//, '')}`) + } + }) + + const res = await app.request('http://localhost/%73tatic/%a') // %73 is 's', %a is invalid percent encoding + expect(res.status).toBe(200) + expect(await res.text()).toBe('by c.req.url: %a') + }) + + it('should be able to catch URIError', async () => { + app.onError((err, c) => { + if (err instanceof URIError) { + return c.text(err.message, 400) + } + throw err + }) + app.get('/static/:path', (c) => { + return c.text(`by c.req.param: ${c.req.param('path')}`) // this should throw an error + }) + + const res = await app.request('http://localhost/%73tatic/%a') // %73 is 's', %a is invalid percent encoding + expect(res.status).toBe(400) + expect(await res.text()).toBe('URI malformed') + }) + + it('should not double decode', async () => { + app.get('/users/:id', (c) => c.text(`posts of ${c.req.param('id')}`)) + + const res = await app.request('http://localhost/users/%2525') // %25 is '%' + expect(res.status).toBe(200) + expect(await res.text()).toBe('posts of %25') + }) + }) }) describe('param and query', () => { diff --git a/src/utils/url.ts b/src/utils/url.ts index b37a5539..58349064 100644 --- a/src/utils/url.ts +++ b/src/utils/url.ts @@ -69,11 +69,48 @@ export const getPattern = (label: string): Pattern | null => { return null } +/** + * Try to apply decodeURI() to given string. + * If it fails, skip invalid percent encoding or invalid UTF-8 sequences, and apply decodeURI() to the rest as much as possible. + * @param str The string to decode. + * @returns The decoded string that sometimes contains undecodable percent encoding. + * @example + * tryDecodeURI('Hello%20World') // 'Hello World' + * tryDecodeURI('Hello%20World/%A4%A2') // 'Hello World/%A4%A2' + */ +const tryDecodeURI = (str: string): string => { + try { + return decodeURI(str) + } catch { + return str.replace(/(?:%[0-9A-Fa-f]{2})+/g, (match) => { + try { + return decodeURI(match) + } catch { + return match + } + }) + } +} + export const getPath = (request: Request): string => { - // Optimized: indexOf() + slice() is faster than RegExp const url = request.url - const queryIndex = url.indexOf('?', 8) - return url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex) + const start = url.indexOf('/', 8) + let i = start + for (; i < url.length; i++) { + const charCode = url.charCodeAt(i) + if (charCode === 37) { + // '%' + // If the path contains percent encoding, use `indexOf()` to find '?' and return the result immediately. + // Although this is a performance disadvantage, it is acceptable since we prefer cases that do not include percent encoding. + const queryIndex = url.indexOf('?', i) + const path = url.slice(start, queryIndex === -1 ? undefined : queryIndex) + return tryDecodeURI(path.includes('%25') ? path.replace(/%25/g, '%2525') : path) + } else if (charCode === 63) { + // '?' + break + } + } + return url.slice(start, i) } export const getQueryStrings = (url: string): string => {