0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-28 18:26:15 +01:00

fix(historic-exports): $autocapture classnames parsing (#11626)

Re-ingesting historically exported autocaptured events currently fails: https://sentry.io/organizations/posthog2/issues/3552913543/?query=is%3Aunresolved+level%3Aerror

This is because of attr__class being sometimes there under attributes, other times not, so $elements.attr_class could be either a string or an array when leaving posthog in an export.

Rather than fix this at the export, making the parsing logic in ingestion more flexible
This commit is contained in:
Karl-Aksel Puulmann 2022-09-02 16:28:15 +03:00 committed by GitHub
parent cd406a88ba
commit 02bae96be5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 70 additions and 3 deletions

View File

@ -102,15 +102,26 @@ export function chainToElements(chain: string, options: { throwOnError?: boolean
return elements
}
export function extractElements(elements: Record<string, any>[]): Element[] {
export function extractElements(elements: Array<Record<string, any>>): Element[] {
return elements.map((el) => ({
text: el['$el_text']?.slice(0, 400),
tag_name: el['tag_name'],
href: el['attr__href']?.slice(0, 2048),
attr_class: el['attr__class']?.split(' '),
attr_class: extractAttrClass(el),
attr_id: el['attr__id'],
nth_child: el['nth_child'],
nth_of_type: el['nth_of_type'],
attributes: Object.fromEntries(Object.entries(el).filter(([key]) => key.startsWith('attr__'))),
}))
}
function extractAttrClass(el: Record<string, any>): Element['attr_class'] {
const attr_class = el['attr__class']
if (!attr_class) {
return undefined
} else if (Array.isArray(attr_class)) {
return attr_class
} else {
return attr_class.split(' ')
}
}

View File

@ -1,4 +1,4 @@
import { chainToElements, elementsToString } from '../../../src/utils/db/elements-chain'
import { chainToElements, elementsToString, extractElements } from '../../../src/utils/db/elements-chain'
describe('elementsToString and chainToElements', () => {
it('is reversible', () => {
@ -96,4 +96,60 @@ describe('elementsToString and chainToElements', () => {
})
)
})
it('handles multiple classNames', () => {
const element = {
attr_class: ['something', 'another'],
attributes: {
attr__class: 'something another',
},
}
const elementsString = elementsToString([element])
expect(elementsString).toEqual('.another.something:attr__class="something another"nth-child="0"nth-of-type="0"')
expect(chainToElements(elementsString)).toEqual([expect.objectContaining(element)])
})
})
describe('extractElements()', () => {
it('parses simple elements', () => {
const result = extractElements([
{ tag_name: 'a', nth_child: 1, nth_of_type: 2, attr__class: 'btn btn-sm' },
{ tag_name: 'div', nth_child: 1, nth_of_type: 2, $el_text: '💻' },
])
expect(result).toEqual([
{
text: undefined,
tag_name: 'a',
href: undefined,
attr_class: ['btn', 'btn-sm'],
attr_id: undefined,
nth_child: 1,
nth_of_type: 2,
attributes: { attr__class: 'btn btn-sm' },
},
{
text: '💻',
tag_name: 'div',
href: undefined,
attr_class: undefined,
attr_id: undefined,
nth_child: 1,
nth_of_type: 2,
attributes: {},
},
])
})
it('handles arrays for attr__class', () => {
const result = extractElements([{ attr__class: ['btn', 'btn-sm'] }])
expect(result[0]).toEqual(
expect.objectContaining({
attr_class: ['btn', 'btn-sm'],
attributes: { attr__class: ['btn', 'btn-sm'] },
})
)
})
})