From 4045a1bac9cc889510c46c97aec666a64d6efed7 Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Thu, 7 Nov 2024 16:45:16 +0100 Subject: [PATCH] feat: the best ever use of ai (#26046) --- ee/surveys/summaries/summarize_surveys.py | 135 ++++++++++++++++++ frontend/src/lib/api.ts | 7 + frontend/src/lib/constants.tsx | 1 + frontend/src/scenes/surveys/surveyLogic.tsx | 5 + frontend/src/scenes/surveys/surveyViewViz.tsx | 119 +++++++++++++-- posthog/api/survey.py | 70 ++++++++- 6 files changed, 326 insertions(+), 11 deletions(-) create mode 100644 ee/surveys/summaries/summarize_surveys.py diff --git a/ee/surveys/summaries/summarize_surveys.py b/ee/surveys/summaries/summarize_surveys.py new file mode 100644 index 00000000000..1e4b088484f --- /dev/null +++ b/ee/surveys/summaries/summarize_surveys.py @@ -0,0 +1,135 @@ +import json + +import openai + +from datetime import datetime +from typing import Optional, cast + +from posthog.hogql import ast +from posthog.hogql.parser import parse_select +from posthog.hogql_queries.insights.paginators import HogQLHasMorePaginator +from posthog.schema import HogQLQueryResponse +from posthog.utils import get_instance_region + +from prometheus_client import Histogram + +from posthog.api.activity_log import ServerTimingsGathered +from posthog.models import Team, User + +import structlog + +logger = structlog.get_logger(__name__) + +TOKENS_IN_PROMPT_HISTOGRAM = Histogram( + "posthog_survey_summary_tokens_in_prompt_histogram", + "histogram of the number of tokens in the prompt used to generate a survey summary", + buckets=[ + 0, + 10, + 50, + 100, + 500, + 1000, + 2000, + 3000, + 4000, + 5000, + 6000, + 7000, + 8000, + 10000, + 20000, + 30000, + 40000, + 50000, + 100000, + 128000, + float("inf"), + ], +) + + +def prepare_data(query_response: HogQLQueryResponse) -> list[str]: + response_values = [] + properties_list: list[dict] = [json.loads(x[1]) for x in query_response.results] + for props in properties_list: + response_values.extend([value for key, value in props.items() if key.startswith("$survey_response") and value]) + return response_values + + +def summarize_survey_responses( + survey_id: str, question_index: Optional[int], survey_start: datetime, survey_end: datetime, team: Team, user: User +): + timer = ServerTimingsGathered() + + with timer("prepare_query"): + paginator = HogQLHasMorePaginator(limit=100, offset=0) + q = parse_select( + """ + SELECT distinct_id, properties + FROM events + WHERE event == 'survey sent' + AND properties.$survey_id = {survey_id} + -- e.g. `$survey_response` or `$survey_response_2` + AND trim(JSONExtractString(properties, {survey_response_property})) != '' + AND timestamp >= {start_date} + AND timestamp <= {end_date} + """, + { + "survey_id": ast.Constant(value=survey_id), + "survey_response_property": ast.Constant( + value=f"$survey_response_{question_index}" if question_index else "$survey_response" + ), + "start_date": ast.Constant(value=survey_start), + "end_date": ast.Constant(value=survey_end), + }, + ) + + with timer("run_query"): + query_response = paginator.execute_hogql_query( + team=team, + query_type="survey_response_list_query", + query=cast(ast.SelectQuery, q), + ) + + with timer("llm_api_prep"): + instance_region = get_instance_region() or "HOBBY" + prepared_data = prepare_data(query_response) + + with timer("openai_completion"): + result = openai.chat.completions.create( + model="gpt-4o-mini", # allows 128k tokens + temperature=0.7, + messages=[ + { + "role": "system", + "content": """ + You are a product manager's assistant. You summarise survey responses from users for the product manager. + You don't do any other tasks. + """, + }, + { + "role": "user", + "content": f"""the survey responses are {prepared_data}.""", + }, + { + "role": "user", + "content": """ + generate a one or two paragraph summary of the survey response. + only summarize, the goal is to identify real user pain points and needs +use bullet points to identify the themes, and highlights of quotes to bring them to life +we're trying to identify what to work on + use as concise and simple language as is possible. + generate no text other than the summary. + the aim is to let people see themes in the responses received. return the text in github flavoured markdown format""", + }, + ], + user=f"{instance_region}/{user.pk}", + ) + + usage = result.usage.prompt_tokens if result.usage else None + if usage: + TOKENS_IN_PROMPT_HISTOGRAM.observe(usage) + + content: str = result.choices[0].message.content or "" + return {"content": content, "timings": timer.get_all_timings()} diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index 0d0ddd38d5e..d91ab7592de 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -2144,6 +2144,13 @@ const api = { async getResponsesCount(): Promise<{ [key: string]: number }> { return await new ApiRequest().surveysResponsesCount().get() }, + async summarize_responses(surveyId: Survey['id'], questionIndex: number | undefined): Promise { + let apiRequest = new ApiRequest().survey(surveyId).withAction('summarize_responses') + if (questionIndex !== undefined) { + apiRequest = apiRequest.withQueryString('questionIndex=' + questionIndex) + } + return await apiRequest.create() + }, }, dataWarehouseTables: { diff --git a/frontend/src/lib/constants.tsx b/frontend/src/lib/constants.tsx index 29873afc7d8..35ab2bcddc1 100644 --- a/frontend/src/lib/constants.tsx +++ b/frontend/src/lib/constants.tsx @@ -228,6 +228,7 @@ export const FEATURE_FLAGS = { DEAD_CLICKS_AUTOCAPTURE: 'dead-clicks-autocapture', // owner: @pauldambra #team-replay ONBOARDING_PRODUCT_MULTISELECT: 'onboarding-product-multiselect', // owner: @danielbachhuber #team-experiments EDIT_DWH_SOURCE_CONFIG: 'edit_dwh_source_config', // owner: @Gilbert09 #team-data-warehouse + AI_SURVEY_RESPONSE_SUMMARY: 'ai-survey-response-summary', // owner: @pauldambra } as const export type FeatureFlagKey = (typeof FEATURE_FLAGS)[keyof typeof FEATURE_FLAGS] diff --git a/frontend/src/scenes/surveys/surveyLogic.tsx b/frontend/src/scenes/surveys/surveyLogic.tsx index 12b4b20e862..528aac6db6e 100644 --- a/frontend/src/scenes/surveys/surveyLogic.tsx +++ b/frontend/src/scenes/surveys/surveyLogic.tsx @@ -181,6 +181,11 @@ export const surveyLogic = kea([ setFlagPropertyErrors: (errors: any) => ({ errors }), }), loaders(({ props, actions, values }) => ({ + responseSummary: { + summarize: async ({ questionIndex }: { questionIndex?: number }) => { + return api.surveys.summarize_responses(props.id, questionIndex) + }, + }, survey: { loadSurvey: async () => { if (props.id && props.id !== 'new') { diff --git a/frontend/src/scenes/surveys/surveyViewViz.tsx b/frontend/src/scenes/surveys/surveyViewViz.tsx index 324c53958dc..a2ab8db7c32 100644 --- a/frontend/src/scenes/surveys/surveyViewViz.tsx +++ b/frontend/src/scenes/surveys/surveyViewViz.tsx @@ -1,10 +1,21 @@ -import { IconInfo } from '@posthog/icons' -import { LemonTable } from '@posthog/lemon-ui' +import { + IconInfo, + IconSparkles, + IconThumbsDown, + IconThumbsDownFilled, + IconThumbsUp, + IconThumbsUpFilled, +} from '@posthog/icons' +import { LemonButton, LemonTable, Spinner } from '@posthog/lemon-ui' import { BindLogic, useActions, useValues } from 'kea' +import { FlaggedFeature } from 'lib/components/FlaggedFeature' +import { FEATURE_FLAGS } from 'lib/constants' import { dayjs } from 'lib/dayjs' import { LemonDivider } from 'lib/lemon-ui/LemonDivider' +import { LemonMarkdown } from 'lib/lemon-ui/LemonMarkdown' import { Tooltip } from 'lib/lemon-ui/Tooltip' import { humanFriendlyNumber } from 'lib/utils' +import posthog from 'posthog-js' import { useEffect, useState } from 'react' import { insightLogic } from 'scenes/insights/insightLogic' import { LineGraph } from 'scenes/insights/views/LineGraph/LineGraph' @@ -577,15 +588,19 @@ export function OpenTextViz({ <> ) : ( <> - -
-
Open text
- -
random selection
- -
-
+
+ +
+
Open text
+ +
random selection
+ +
+
+ +
{question.question}
+
{surveyOpenTextResults[questionIndex].events.map((event, i) => { const personProp = { @@ -617,3 +632,87 @@ export function OpenTextViz({
) } + +function ResponseSummariesButton({ questionIndex }: { questionIndex: number | undefined }): JSX.Element { + const { summarize } = useActions(surveyLogic) + const { responseSummary, responseSummaryLoading } = useValues(surveyLogic) + + return ( + + summarize({ questionIndex })} + disabledReason={ + responseSummaryLoading ? 'Let me think...' : responseSummary ? 'already summarized' : undefined + } + icon={} + > + {responseSummaryLoading ? ( + <> + Let me think... + + + ) : ( + <>Summarize responses + )} + + + ) +} + +function ResponseSummariesDisplay(): JSX.Element { + const { survey, responseSummary } = useValues(surveyLogic) + + return ( + + {responseSummary ? ( + <> +

Responses summary

+ {responseSummary.content} + + + + ) : null} +
+ ) +} + +function ResponseSummaryFeedback({ surveyId }: { surveyId: string }): JSX.Element { + const [rating, setRating] = useState<'good' | 'bad' | null>(null) + + function submitRating(newRating: 'good' | 'bad'): void { + if (rating) { + return // Already rated + } + setRating(newRating) + posthog.capture('chat rating', { + survey_id: surveyId, + answer_rating: rating, + }) + } + + return ( +
+ {rating === null ? <>Summaries are generated by AI. What did you think? : null} + {rating !== 'bad' && ( + : } + type="tertiary" + size="small" + tooltip="Good summary" + onClick={() => submitRating('good')} + /> + )} + {rating !== 'good' && ( + : } + type="tertiary" + size="small" + tooltip="Bad summary" + onClick={() => submitRating('bad')} + /> + )} +
+ ) +} diff --git a/posthog/api/survey.py b/posthog/api/survey.py index 4864612c2b4..bfd2dd7d9f8 100644 --- a/posthog/api/survey.py +++ b/posthog/api/survey.py @@ -1,18 +1,24 @@ +import os from contextlib import contextmanager +from datetime import datetime, timedelta from typing import Any, cast from urllib.parse import urlparse import nh3 +import posthoganalytics +from django.conf import settings +from django.core.cache import cache from django.db.models import Min from django.http import HttpResponse, JsonResponse from django.utils.text import slugify from django.views.decorators.csrf import csrf_exempt from loginas.utils import is_impersonated_session from nanoid import generate -from rest_framework import request, serializers, status, viewsets +from rest_framework import request, serializers, status, viewsets, exceptions from rest_framework.request import Request from rest_framework.response import Response +from ee.surveys.summaries.summarize_surveys import summarize_survey_responses from posthog.api.action import ActionSerializer from posthog.api.feature_flag import ( BEHAVIOURAL_COHORT_FOUND_ERROR_CODE, @@ -23,6 +29,7 @@ from posthog.api.routing import TeamAndOrgViewSetMixin from posthog.api.shared import UserBasicSerializer from posthog.api.utils import action, get_token from posthog.client import sync_execute +from posthog.cloud_utils import is_cloud from posthog.constants import AvailableFeature from posthog.event_usage import report_user_action from posthog.exceptions import generate_exception_response @@ -646,6 +653,67 @@ class SurveyViewSet(TeamAndOrgViewSetMixin, viewsets.ModelViewSet): ) return activity_page_response(activity_page, limit, page, request) + @action(methods=["POST"], detail=True, required_scopes=["survey:read"]) + def summarize_responses(self, request: request.Request, **kwargs): + if not request.user.is_authenticated: + raise exceptions.NotAuthenticated() + + user = cast(User, request.user) + + survey_id = kwargs["pk"] + + if not Survey.objects.filter(id=survey_id, team_id=self.team_id).exists(): + return Response(status=status.HTTP_404_NOT_FOUND) + + survey = self.get_object() + + cache_key = f'summarize_survey_responses_{self.team.pk}_{self.kwargs["pk"]}' + # Check if the response is cached + cached_response = cache.get(cache_key) + if cached_response is not None: + return Response(cached_response) + + environment_is_allowed = settings.DEBUG or is_cloud() + has_openai_api_key = bool(os.environ.get("OPENAI_API_KEY")) + if not environment_is_allowed or not has_openai_api_key: + raise exceptions.ValidationError("session summary is only supported in PostHog Cloud") + + if not posthoganalytics.feature_enabled("ai-survey-response-summary", str(user.distinct_id)): + raise exceptions.ValidationError("survey response summary is not enabled for this user") + + end_date: datetime = (survey.end_date or datetime.now()).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + timedelta(days=1) + + try: + question_index_param = request.query_params.get("question_index", None) + question_index = int(question_index_param) if question_index_param else None + except (ValueError, TypeError): + question_index = None + + summary = summarize_survey_responses( + survey_id=survey_id, + question_index=question_index, + survey_start=(survey.start_date or survey.created_at).replace(hour=0, minute=0, second=0, microsecond=0), + survey_end=end_date, + team=self.team, + user=user, + ) + timings = summary.pop("timings", None) + cache.set(cache_key, summary, timeout=30) + + posthoganalytics.capture( + event="survey response summarized", distinct_id=str(user.distinct_id), properties=summary + ) + + # let the browser cache for half the time we cache on the server + r = Response(summary, headers={"Cache-Control": "max-age=15"}) + if timings: + r.headers["Server-Timing"] = ", ".join( + f"{key};dur={round(duration, ndigits=2)}" for key, duration in timings.items() + ) + return r + class SurveyConfigSerializer(serializers.ModelSerializer): class Meta: