0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-24 00:47:50 +01:00

fix(err): include raw js frame on frame content in pg (#26326)

This commit is contained in:
Oliver Browne 2024-11-21 12:54:41 +02:00 committed by GitHub
parent b8440d2754
commit 6354e8718f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 4062 additions and 30 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,28 @@
[
{
"uuid": "01932ad3-bd2a-796c-9f4c-f65ab1ff27ac",
"event": "$exception",
"properties": "{\"$exception_list\":[{\"type\":\"SyntaxError\",\"value\":\"Unexpected non-whitespace character after JSON at position 3708 (line 2 column 1)\",\"stacktrace\":{\"frames\":[{\"platform\":\"javascript\",\"filename\":\"https://unpkg.com/eventsource@2.0.2/example/eventsource-polyfill.js\",\"function\":\"drainQueue\",\"in_app\":true,\"lineno\":219,\"colno\":42},{\"platform\":\"javascript\",\"filename\":\"https://unpkg.com/eventsource@2.0.2/example/eventsource-polyfill.js\",\"function\":\"Item.run\",\"in_app\":true,\"lineno\":249,\"colno\":14},{\"platform\":\"javascript\",\"filename\":\"https://unpkg.com/eventsource@2.0.2/example/eventsource-polyfill.js\",\"function\":\"afterTickTwo\",\"in_app\":true,\"lineno\":2392,\"colno\":10},{\"platform\":\"javascript\",\"filename\":\"https://unpkg.com/eventsource@2.0.2/example/eventsource-polyfill.js\",\"function\":\"resume_\",\"in_app\":true,\"lineno\":4950,\"colno\":3},{\"platform\":\"javascript\",\"filename\":\"https://unpkg.com/eventsource@2.0.2/example/eventsource-polyfill.js\",\"function\":\"flow\",\"in_app\":true,\"lineno\":4967,\"colno\":34},{\"platform\":\"javascript\",\"filename\":\"https://unpkg.com/eventsource@2.0.2/example/eventsource-polyfill.js\",\"function\":\"Readable.read\",\"in_app\":true,\"lineno\":4623,\"colno\":26},{\"platform\":\"javascript\",\"filename\":\"https://unpkg.com/eventsource@2.0.2/example/eventsource-polyfill.js\",\"function\":\"exports.IncomingMessage.emit\",\"in_app\":true,\"lineno\":3377,\"colno\":5},{\"platform\":\"javascript\",\"filename\":\"https://unpkg.com/eventsource@2.0.2/example/eventsource-polyfill.js\",\"function\":\"exports.IncomingMessage.<anonymous>\",\"in_app\":true,\"lineno\":6756,\"colno\":11},{\"platform\":\"javascript\",\"filename\":\"https://unpkg.com/eventsource@2.0.2/example/eventsource-polyfill.js\",\"function\":\"parseEventStreamLine\",\"in_app\":true,\"lineno\":6799,\"colno\":9},{\"platform\":\"javascript\",\"filename\":\"https://unpkg.com/eventsource@2.0.2/example/eventsource-polyfill.js\",\"function\":\"_emit\",\"in_app\":true,\"lineno\":6784,\"colno\":17},{\"platform\":\"javascript\",\"filename\":\"https://unpkg.com/eventsource@2.0.2/example/eventsource-polyfill.js\",\"function\":\"EventSource.emit\",\"in_app\":true,\"lineno\":3377,\"colno\":5},{\"platform\":\"javascript\",\"filename\":\"https://app-static-prod.posthog.com/static/chunk-B4MXHXII.js\",\"function\":\"a.onmessage\",\"in_app\":true,\"lineno\":290,\"colno\":97793},{\"platform\":\"javascript\",\"filename\":\"<anonymous>\",\"function\":\"JSON.parse\",\"in_app\":true}],\"type\":\"raw\"},\"mechanism\":{\"handled\":true,\"synthetic\":false}}],\"$exception_level\":\"error\"}",
"timestamp": "2024-11-14 13:19:00.117",
"team_id": 2,
"project_id": 2,
"distinct_id": "cu5skLyhm2En4xb5ERxtMkmMtC99pdbZDcqO3VDaDqt",
"elements_chain": "",
"created_at": "2024-11-14 13:20:30.828",
"person_created_at": "2024-07-02 13:11:05",
"person_mode": "full"
},
{
"uuid": "01933c1f-f831-7f29-8b18-2bee05ce0125",
"event": "$exception",
"properties": "{\"$exception_list\":[{\"type\":\"Error\",\"value\":\"Unexpected usage\\n\\nError: Unexpected usage\\n at n.loadForeignModule (https://app-static-prod.posthog.com/static/chunk-PGUQKT6S.js:64:15003)\\n at https://app-static-prod.posthog.com/static/chunk-PGUQKT6S.js:64:25112\",\"stacktrace\":{\"frames\":[{\"filename\":\"https://app-static-prod.posthog.com/static/chunk-C6S33U6V.js\",\"function\":\"r\",\"in_app\":true,\"lineno\":19,\"colno\":2044},{\"filename\":\"https://app-static-prod.posthog.com/static/chunk-PGUQKT6S.js\",\"function\":\"?\",\"in_app\":true,\"lineno\":3,\"colno\":12},{\"filename\":\"https://app-static-prod.posthog.com/static/chunk-PGUQKT6S.js\",\"function\":\"?\",\"in_app\":true,\"lineno\":64,\"colno\":25112},{\"filename\":\"https://app-static-prod.posthog.com/static/chunk-PGUQKT6S.js\",\"function\":\"n.loadForeignModule\",\"in_app\":true,\"lineno\":64,\"colno\":15003}],\"type\":\"raw\"},\"mechanism\":{\"type\":\"generic\",\"handled\":true}}],\"$sentry_event_id\":\"c2954a0d25c643e4ac6b9aef7c3b39e4\",\"$sentry_exception\":{},\"$sentry_exception_message\":\"Unexpected usage\\n\\nError: Unexpected usage\\n at n.loadForeignModule (https://app-static-prod.posthog.com/static/chunk-PGUQKT6S.js:64:15003)\\n at https://app-static-prod.posthog.com/static/chunk-PGUQKT6S.js:64:25112\"}",
"timestamp": "2024-11-17 21:55:48.475",
"team_id": 2,
"project_id": 2,
"distinct_id": "KCVn0RwlQkwrWhSWU5j2CzvN9QTbwsZPdqz6cfSPcXH",
"elements_chain": "",
"created_at": "2024-11-17 21:55:51.928",
"person_created_at": "2023-03-21 17:54:03",
"person_mode": "full"
}
]

View File

@ -0,0 +1,86 @@
use std::sync::Arc;
use cymbal::{
config::Config,
langs::js::RawJSFrame,
symbol_store::{
caching::{Caching, SymbolSetCache},
sourcemap::SourcemapProvider,
Catalog,
},
};
use serde_json::Value;
use tokio::sync::Mutex;
/**
Input data gathered by running the following, then converting to json:
SELECT
symbol_set.ref as filename,
contents::json->>'mangled_name' as "function",
(contents::json->>'in_app')::boolean as in_app,
CASE
WHEN contents::json->>'line' IS NOT NULL
THEN (contents::json->>'line')::int
END as lineno,
CASE
WHEN contents::json->>'column' IS NOT NULL
THEN (contents::json->>'column')::int
END as colno
FROM posthog_errortrackingstackframe frame
LEFT JOIN posthog_errortrackingsymbolset symbol_set
ON frame.symbol_set_id = symbol_set.id
WHERE (contents::json->>'resolved_name') is null
AND contents::json->>'lang' = 'javascript'
AND symbol_set.storage_ptr IS NOT NULL;
This doesn't actually work - we don't have the original line and column number, and
so can't repeat the original resolution. I couldn't find a way to reverse that mapping
with sourcemaps, so instead I'm going to temporarily add the raw frame to the resolve
Frame.
*/
const NAMELESS_FRAMES_IN_RAW_FMT: &str = include_str!("./nameless_frames_in_raw_format.json");
#[tokio::main]
async fn main() {
let config = Config::init_with_defaults().unwrap();
let provider = SourcemapProvider::new(&config);
let cache = Arc::new(Mutex::new(SymbolSetCache::new(1_000_000_000)));
let provider = Caching::new(provider, cache);
let catalog = Catalog::new(provider);
let frames: Vec<Value> = serde_json::from_str(NAMELESS_FRAMES_IN_RAW_FMT).unwrap();
// Deal with metabase giving me string-only values
let frames: Vec<RawJSFrame> = frames
.into_iter()
.map(|f| {
let mut f = f;
let in_app = f["in_app"].as_str().unwrap() == "true";
f["in_app"] = Value::Bool(in_app);
let lineno: u32 = f["lineno"]
.as_str()
.unwrap()
.replace(",", "")
.parse()
.unwrap();
let colno: u32 = f["colno"]
.as_str()
.unwrap()
.replace(",", "")
.parse()
.unwrap();
f["lineno"] = Value::Number(lineno.into());
f["colno"] = Value::Number(colno.into());
serde_json::from_value(f).unwrap()
})
.collect();
for frame in frames {
let res = frame.resolve(0, &catalog).await.unwrap();
if res.resolved_name.is_none() {
panic!("Frame name not resolved: {:?}", frame);
}
}
}

View File

@ -44,6 +44,7 @@ mod test {
resolved: true,
resolve_failure: None,
lang: "javascript".to_string(),
junk_drawer: None,
context: None,
},
Frame {
@ -57,6 +58,7 @@ mod test {
resolved: true,
resolve_failure: None,
lang: "javascript".to_string(),
junk_drawer: None,
context: None,
},
];
@ -72,6 +74,7 @@ mod test {
resolved: false,
resolve_failure: None,
lang: "javascript".to_string(),
junk_drawer: None,
context: None,
};
@ -116,6 +119,7 @@ mod test {
resolved: false,
resolve_failure: None,
lang: "javascript".to_string(),
junk_drawer: None,
context: None,
},
Frame {
@ -129,6 +133,7 @@ mod test {
resolved: false,
resolve_failure: None,
lang: "javascript".to_string(),
junk_drawer: None,
context: None,
},
Frame {
@ -142,6 +147,7 @@ mod test {
resolved: false,
resolve_failure: None,
lang: "javascript".to_string(),
junk_drawer: None,
context: None,
},
];
@ -180,6 +186,7 @@ mod test {
resolved: false,
resolve_failure: None,
lang: "javascript".to_string(),
junk_drawer: None,
context: None,
}];
@ -194,6 +201,7 @@ mod test {
resolved: false,
resolve_failure: None,
lang: "javascript".to_string(),
junk_drawer: None,
context: None,
};

View File

@ -1,4 +1,7 @@
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use sha2::{Digest, Sha512};
use crate::{
@ -54,6 +57,7 @@ impl RawFrame {
// We emit a single, unified representation of a frame, which is what we pass on to users.
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub struct Frame {
// Properties used in processing
pub raw_id: String, // The raw frame id this was resolved from
pub mangled_name: String, // Mangled name of the function
pub line: Option<u32>, // Line the function is define on, if known
@ -64,6 +68,11 @@ pub struct Frame {
pub lang: String, // The language of the frame. Always known (I guess?)
pub resolved: bool, // Did we manage to resolve the frame?
pub resolve_failure: Option<String>, // If we failed to resolve the frame, why?
// Random extra/internal data we want to tag onto frames, e.g. the raw input. For debugging
// purposes, all production code should assume this is None
#[serde(skip_serializing_if = "Option::is_none")]
pub junk_drawer: Option<HashMap<String, Value>>,
// The lines of code surrounding the frame ptr, if known. We skip serialising this because
// it should never go in clickhouse / be queried over, but we do store it in PG for
// use in the frontend
@ -110,6 +119,19 @@ impl Frame {
h.update(self.lang.as_bytes());
}
pub fn add_junk<T>(&mut self, key: impl ToString, val: T) -> Result<(), serde_json::Error>
where
T: Serialize,
{
let key = key.to_string();
let val = serde_json::to_value(val)?;
self.junk_drawer
.get_or_insert_with(HashMap::new)
.insert(key, val);
Ok(())
}
}
impl ContextLine {
@ -120,3 +142,69 @@ impl ContextLine {
}
}
}
impl std::fmt::Display for Frame {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "Frame {}:", self.raw_id)?;
// Function name and location
write!(
f,
" {} (from {}) ",
self.resolved_name.as_deref().unwrap_or("unknown"),
self.mangled_name
)?;
if let Some(source) = &self.source {
write!(f, "in {}", source)?;
match (self.line, self.column) {
(Some(line), Some(column)) => writeln!(f, ":{line}:{column}"),
(Some(line), None) => writeln!(f, ":{line}"),
(None, Some(column)) => writeln!(f, ":?:{column}"),
(None, None) => writeln!(f),
}?;
} else {
writeln!(f, "in unknown location")?;
}
// Metadata
writeln!(f, " in_app: {}", self.in_app)?;
writeln!(f, " lang: {}", self.lang)?;
writeln!(f, " resolved: {}", self.resolved)?;
writeln!(
f,
" resolve_failure: {}",
self.resolve_failure.as_deref().unwrap_or("no failure")
)?;
// Context
writeln!(f, " context:")?;
if let Some(context) = &self.context {
for line in &context.before {
writeln!(f, " {}: {}", line.number, line.line)?;
}
writeln!(f, " > {}: {}", context.line.number, context.line.line)?;
for line in &context.after {
writeln!(f, " {}: {}", line.number, line.line)?;
}
} else {
writeln!(f, " no context")?;
}
// Junk drawer
writeln!(f, " junk drawer:")?;
if let Some(junk) = &self.junk_drawer {
if junk.is_empty() {
writeln!(f, " no junk")?;
} else {
for (key, value) in junk {
writeln!(f, " {}: {}", key, value)?;
}
}
} else {
writeln!(f, " no junk")?;
}
Ok(())
}
}

View File

@ -15,10 +15,8 @@ use crate::{
// TODO - how do we know if this frame is minified? If it isn't, we can skip a lot of work, but I think we have to guess? Based on whether we can get a sourcemap for it?
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct RawJSFrame {
#[serde(rename = "lineno")]
pub line: u32,
#[serde(rename = "colno")]
pub column: u32,
#[serde(flatten)]
pub location: Option<FrameLocation>, // Sometimes we get frames with no location information. We treat these as already resolved, or unminified
#[serde(rename = "filename")]
pub source_url: Option<String>, // The url the the script the frame was in was fetched from
pub in_app: bool,
@ -26,6 +24,14 @@ pub struct RawJSFrame {
pub fn_name: String,
}
#[derive(Debug, Clone, Deserialize, Serialize, Eq, PartialEq)]
pub struct FrameLocation {
#[serde(rename = "lineno")]
pub line: u32,
#[serde(rename = "colno")]
pub column: u32,
}
impl RawJSFrame {
pub async fn resolve<C>(&self, team_id: i32, catalog: &C) -> Result<Frame, UnhandledError>
where
@ -46,11 +52,18 @@ impl RawJSFrame {
{
let url = self.source_url()?;
let Some(location) = &self.location else {
return Ok(Frame::from(self)); // We're probably an unminified frame
};
let sourcemap = catalog.lookup(team_id, url).await?;
let Some(token) = sourcemap.lookup_token(self.line, self.column) else {
return Err(
JsResolveErr::TokenNotFound(self.fn_name.clone(), self.line, self.column).into(),
);
let Some(token) = sourcemap.lookup_token(location.line, location.column) else {
return Err(JsResolveErr::TokenNotFound(
self.fn_name.clone(),
location.line,
location.column,
)
.into());
};
Ok(Frame::from((self, token)))
@ -60,7 +73,10 @@ impl RawJSFrame {
pub fn handle_resolution_error(&self, e: JsResolveErr) -> Frame {
// If we failed to resolve the frame, we mark it as "not resolved" and add the error message,
// then return a Frame anyway, because frame handling is a best-effort thing.
(self, e).into()
let Some(location) = &self.location else {
return self.into();
};
(self, e, location).into()
}
pub fn source_url(&self) -> Result<Url, JsResolveErr> {
@ -106,8 +122,10 @@ impl RawJSFrame {
pub fn frame_id(&self) -> String {
let mut hasher = Sha512::new();
hasher.update(self.fn_name.as_bytes());
hasher.update(self.line.to_string().as_bytes());
hasher.update(self.column.to_string().as_bytes());
if let Some(location) = &self.location {
hasher.update(location.line.to_string().as_bytes());
hasher.update(location.column.to_string().as_bytes());
}
hasher.update(
self.source_url
.as_ref()
@ -123,7 +141,7 @@ impl From<(&RawJSFrame, Token<'_>)> for Frame {
let (raw_frame, token) = src;
metrics::counter!(FRAME_RESOLVED, "lang" => "javascript").increment(1);
Self {
let mut res = Self {
raw_id: String::new(), // We use placeholders here, as they're overriden at the RawFrame level
mangled_name: raw_frame.fn_name.clone(),
line: Some(token.get_src_line()),
@ -134,30 +152,90 @@ impl From<(&RawJSFrame, Token<'_>)> for Frame {
lang: "javascript".to_string(),
resolved: true,
resolve_failure: None,
junk_drawer: None,
context: get_context(&token),
}
};
add_raw_to_junk(&mut res, raw_frame);
res
}
}
impl From<(&RawJSFrame, JsResolveErr)> for Frame {
fn from((raw_frame, err): (&RawJSFrame, JsResolveErr)) -> Self {
// If we failed to resolve the frame, but it's a frame that has location information,
// mark it as a failed resolve and emit an "unresolved" frame
impl From<(&RawJSFrame, JsResolveErr, &FrameLocation)> for Frame {
fn from((raw_frame, err, location): (&RawJSFrame, JsResolveErr, &FrameLocation)) -> Self {
metrics::counter!(FRAME_NOT_RESOLVED, "lang" => "javascript").increment(1);
Self {
// TODO - extremely rough
let was_minified = match err {
JsResolveErr::NoSourceUrl => false, // This frame's `source` was None
JsResolveErr::NoSourcemap(_) => false, // A total guess
_ => true,
};
let resolved_name = if was_minified {
None
} else {
Some(raw_frame.fn_name.clone())
};
let mut res = Self {
raw_id: String::new(),
mangled_name: raw_frame.fn_name.clone(),
line: Some(raw_frame.line),
column: Some(raw_frame.column),
line: Some(location.line),
column: Some(location.column),
source: raw_frame.source_url().map(|u| u.path().to_string()).ok(),
in_app: raw_frame.in_app,
resolved_name: None,
resolved_name,
lang: "javascript".to_string(),
resolved: false,
resolved: !was_minified,
// Regardless of whather we think this was a minified frame or not, we still put
// the error message in resolve_failure, so if a user comes along and want to know
// why we thought a frame wasn't minified, they can see the error message
resolve_failure: Some(err.to_string()),
junk_drawer: None,
context: None,
}
};
add_raw_to_junk(&mut res, raw_frame);
res
}
}
// Finally, if we have a frame that has NO location information, we treat it as not minified, since it's
// probably a native function or something else weird
impl From<&RawJSFrame> for Frame {
fn from(raw_frame: &RawJSFrame) -> Self {
metrics::counter!(FRAME_NOT_RESOLVED, "lang" => "javascript").increment(1);
let mut res = Self {
raw_id: String::new(),
mangled_name: raw_frame.fn_name.clone(),
line: None,
column: None,
source: raw_frame.source_url().map(|u| u.path().to_string()).ok(),
in_app: raw_frame.in_app,
resolved_name: Some(raw_frame.fn_name.clone()),
lang: "javascript".to_string(),
resolved: true, // Without location information, we're assuming this is not minified
resolve_failure: None,
junk_drawer: None,
context: None,
};
add_raw_to_junk(&mut res, raw_frame);
res
}
}
fn add_raw_to_junk(frame: &mut Frame, raw: &RawJSFrame) {
// UNWRAP: raw JS frames are definitely representable as json
frame.add_junk("raw_frame", raw.clone()).unwrap();
}
fn get_context(token: &Token) -> Option<Context> {
let sv = token.get_source_view()?;
@ -196,8 +274,7 @@ mod test {
#[test]
fn source_ref_generation() {
let frame = super::RawJSFrame {
line: 1,
column: 2,
location: None,
source_url: Some("http://example.com/path/to/file.js:1:2".to_string()),
in_app: true,
fn_name: "main".to_string(),
@ -209,8 +286,7 @@ mod test {
);
let frame = super::RawJSFrame {
line: 1,
column: 2,
location: None,
source_url: Some("http://example.com/path/to/file.js".to_string()),
in_app: true,
fn_name: "main".to_string(),

View File

@ -53,7 +53,11 @@ pub async fn handle_event(
props.exception_list = results;
let fingerprinted = props.to_fingerprinted(fingerprint.clone());
let output = resolve_issue(&context.pool, event.team_id, fingerprinted).await?;
let mut output = resolve_issue(&context.pool, event.team_id, fingerprinted).await?;
// TODO - I'm not sure we actually want to do this? Maybe junk drawer stuff should end up in clickhouse, and
// be directly queryable by users? Stripping it for now, so it only ends up in postgres
output.strip_frame_junk();
event.properties = Some(serde_json::to_string(&output).unwrap());

View File

@ -136,6 +136,30 @@ impl FingerprintedErrProps {
}
}
impl OutputErrProps {
pub fn add_error_message(&mut self, msg: impl ToString) {
let mut errors = match self.other.remove("$cymbal_errors") {
Some(serde_json::Value::Array(errors)) => errors,
_ => Vec::new(),
};
errors.push(serde_json::Value::String(msg.to_string()));
self.other.insert(
"$cymbal_errors".to_string(),
serde_json::Value::Array(errors),
);
}
pub fn strip_frame_junk(&mut self) {
self.exception_list.iter_mut().for_each(|exception| {
if let Some(Stacktrace::Resolved { frames }) = &mut exception.stack {
frames.iter_mut().for_each(|frame| frame.junk_drawer = None);
}
});
}
}
#[cfg(test)]
mod test {
use common_types::ClickHouseEvent;
@ -181,8 +205,8 @@ mod test {
);
assert_eq!(frame.fn_name, "?".to_string());
assert!(frame.in_app);
assert_eq!(frame.line, 64);
assert_eq!(frame.column, 25112);
assert_eq!(frame.location.as_ref().unwrap().line, 64);
assert_eq!(frame.location.as_ref().unwrap().column, 25112);
let RawFrame::JavaScript(frame) = &frames[1];
assert_eq!(
@ -191,8 +215,8 @@ mod test {
);
assert_eq!(frame.fn_name, "n.loadForeignModule".to_string());
assert!(frame.in_app);
assert_eq!(frame.line, 64);
assert_eq!(frame.column, 15003);
assert_eq!(frame.location.as_ref().unwrap().line, 64);
assert_eq!(frame.location.as_ref().unwrap().column, 15003);
}
#[test]