0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-21 13:39:22 +01:00

feat: the best ever use of ai (#26046)

This commit is contained in:
Paul D'Ambra 2024-11-07 16:45:16 +01:00 committed by GitHub
parent 4186edfb49
commit 4045a1bac9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 326 additions and 11 deletions

View File

@ -0,0 +1,135 @@
import json
import openai
from datetime import datetime
from typing import Optional, cast
from posthog.hogql import ast
from posthog.hogql.parser import parse_select
from posthog.hogql_queries.insights.paginators import HogQLHasMorePaginator
from posthog.schema import HogQLQueryResponse
from posthog.utils import get_instance_region
from prometheus_client import Histogram
from posthog.api.activity_log import ServerTimingsGathered
from posthog.models import Team, User
import structlog
logger = structlog.get_logger(__name__)
TOKENS_IN_PROMPT_HISTOGRAM = Histogram(
"posthog_survey_summary_tokens_in_prompt_histogram",
"histogram of the number of tokens in the prompt used to generate a survey summary",
buckets=[
0,
10,
50,
100,
500,
1000,
2000,
3000,
4000,
5000,
6000,
7000,
8000,
10000,
20000,
30000,
40000,
50000,
100000,
128000,
float("inf"),
],
)
def prepare_data(query_response: HogQLQueryResponse) -> list[str]:
response_values = []
properties_list: list[dict] = [json.loads(x[1]) for x in query_response.results]
for props in properties_list:
response_values.extend([value for key, value in props.items() if key.startswith("$survey_response") and value])
return response_values
def summarize_survey_responses(
survey_id: str, question_index: Optional[int], survey_start: datetime, survey_end: datetime, team: Team, user: User
):
timer = ServerTimingsGathered()
with timer("prepare_query"):
paginator = HogQLHasMorePaginator(limit=100, offset=0)
q = parse_select(
"""
SELECT distinct_id, properties
FROM events
WHERE event == 'survey sent'
AND properties.$survey_id = {survey_id}
-- e.g. `$survey_response` or `$survey_response_2`
AND trim(JSONExtractString(properties, {survey_response_property})) != ''
AND timestamp >= {start_date}
AND timestamp <= {end_date}
""",
{
"survey_id": ast.Constant(value=survey_id),
"survey_response_property": ast.Constant(
value=f"$survey_response_{question_index}" if question_index else "$survey_response"
),
"start_date": ast.Constant(value=survey_start),
"end_date": ast.Constant(value=survey_end),
},
)
with timer("run_query"):
query_response = paginator.execute_hogql_query(
team=team,
query_type="survey_response_list_query",
query=cast(ast.SelectQuery, q),
)
with timer("llm_api_prep"):
instance_region = get_instance_region() or "HOBBY"
prepared_data = prepare_data(query_response)
with timer("openai_completion"):
result = openai.chat.completions.create(
model="gpt-4o-mini", # allows 128k tokens
temperature=0.7,
messages=[
{
"role": "system",
"content": """
You are a product manager's assistant. You summarise survey responses from users for the product manager.
You don't do any other tasks.
""",
},
{
"role": "user",
"content": f"""the survey responses are {prepared_data}.""",
},
{
"role": "user",
"content": """
generate a one or two paragraph summary of the survey response.
only summarize, the goal is to identify real user pain points and needs
use bullet points to identify the themes, and highlights of quotes to bring them to life
we're trying to identify what to work on
use as concise and simple language as is possible.
generate no text other than the summary.
the aim is to let people see themes in the responses received. return the text in github flavoured markdown format""",
},
],
user=f"{instance_region}/{user.pk}",
)
usage = result.usage.prompt_tokens if result.usage else None
if usage:
TOKENS_IN_PROMPT_HISTOGRAM.observe(usage)
content: str = result.choices[0].message.content or ""
return {"content": content, "timings": timer.get_all_timings()}

View File

@ -2144,6 +2144,13 @@ const api = {
async getResponsesCount(): Promise<{ [key: string]: number }> {
return await new ApiRequest().surveysResponsesCount().get()
},
async summarize_responses(surveyId: Survey['id'], questionIndex: number | undefined): Promise<any> {
let apiRequest = new ApiRequest().survey(surveyId).withAction('summarize_responses')
if (questionIndex !== undefined) {
apiRequest = apiRequest.withQueryString('questionIndex=' + questionIndex)
}
return await apiRequest.create()
},
},
dataWarehouseTables: {

View File

@ -228,6 +228,7 @@ export const FEATURE_FLAGS = {
DEAD_CLICKS_AUTOCAPTURE: 'dead-clicks-autocapture', // owner: @pauldambra #team-replay
ONBOARDING_PRODUCT_MULTISELECT: 'onboarding-product-multiselect', // owner: @danielbachhuber #team-experiments
EDIT_DWH_SOURCE_CONFIG: 'edit_dwh_source_config', // owner: @Gilbert09 #team-data-warehouse
AI_SURVEY_RESPONSE_SUMMARY: 'ai-survey-response-summary', // owner: @pauldambra
} as const
export type FeatureFlagKey = (typeof FEATURE_FLAGS)[keyof typeof FEATURE_FLAGS]

View File

@ -181,6 +181,11 @@ export const surveyLogic = kea<surveyLogicType>([
setFlagPropertyErrors: (errors: any) => ({ errors }),
}),
loaders(({ props, actions, values }) => ({
responseSummary: {
summarize: async ({ questionIndex }: { questionIndex?: number }) => {
return api.surveys.summarize_responses(props.id, questionIndex)
},
},
survey: {
loadSurvey: async () => {
if (props.id && props.id !== 'new') {

View File

@ -1,10 +1,21 @@
import { IconInfo } from '@posthog/icons'
import { LemonTable } from '@posthog/lemon-ui'
import {
IconInfo,
IconSparkles,
IconThumbsDown,
IconThumbsDownFilled,
IconThumbsUp,
IconThumbsUpFilled,
} from '@posthog/icons'
import { LemonButton, LemonTable, Spinner } from '@posthog/lemon-ui'
import { BindLogic, useActions, useValues } from 'kea'
import { FlaggedFeature } from 'lib/components/FlaggedFeature'
import { FEATURE_FLAGS } from 'lib/constants'
import { dayjs } from 'lib/dayjs'
import { LemonDivider } from 'lib/lemon-ui/LemonDivider'
import { LemonMarkdown } from 'lib/lemon-ui/LemonMarkdown'
import { Tooltip } from 'lib/lemon-ui/Tooltip'
import { humanFriendlyNumber } from 'lib/utils'
import posthog from 'posthog-js'
import { useEffect, useState } from 'react'
import { insightLogic } from 'scenes/insights/insightLogic'
import { LineGraph } from 'scenes/insights/views/LineGraph/LineGraph'
@ -577,15 +588,19 @@ export function OpenTextViz({
<></>
) : (
<>
<Tooltip title="See all Open Text responses in the Events table at the bottom.">
<div className="inline-flex gap-1">
<div className="font-semibold text-muted-alt">Open text</div>
<LemonDivider vertical className="my-1 mx-1" />
<div className="font-semibold text-muted-alt">random selection</div>
<IconInfo className="text-lg text-muted-alt shrink-0 ml-0.5 mt-0.5" />
</div>
</Tooltip>
<div className="flex flex-row justify-between items-center">
<Tooltip title="See all Open Text responses in the Events table at the bottom.">
<div className="inline-flex gap-1">
<div className="font-semibold text-muted-alt">Open text</div>
<LemonDivider vertical className="my-1 mx-1" />
<div className="font-semibold text-muted-alt">random selection</div>
<IconInfo className="text-lg text-muted-alt shrink-0 ml-0.5 mt-0.5" />
</div>
</Tooltip>
<ResponseSummariesButton questionIndex={questionIndex} />
</div>
<div className="text-xl font-bold mb-4">{question.question}</div>
<ResponseSummariesDisplay />
<div className="mt-4 mb-8 masonry-container">
{surveyOpenTextResults[questionIndex].events.map((event, i) => {
const personProp = {
@ -617,3 +632,87 @@ export function OpenTextViz({
</div>
)
}
function ResponseSummariesButton({ questionIndex }: { questionIndex: number | undefined }): JSX.Element {
const { summarize } = useActions(surveyLogic)
const { responseSummary, responseSummaryLoading } = useValues(surveyLogic)
return (
<FlaggedFeature flag={FEATURE_FLAGS.AI_SURVEY_RESPONSE_SUMMARY} match={true}>
<LemonButton
type="secondary"
data-attr="summarize-survey"
onClick={() => summarize({ questionIndex })}
disabledReason={
responseSummaryLoading ? 'Let me think...' : responseSummary ? 'already summarized' : undefined
}
icon={<IconSparkles />}
>
{responseSummaryLoading ? (
<>
Let me think...
<Spinner />
</>
) : (
<>Summarize responses</>
)}
</LemonButton>
</FlaggedFeature>
)
}
function ResponseSummariesDisplay(): JSX.Element {
const { survey, responseSummary } = useValues(surveyLogic)
return (
<FlaggedFeature flag={FEATURE_FLAGS.AI_SURVEY_RESPONSE_SUMMARY} match={true}>
{responseSummary ? (
<>
<h1>Responses summary</h1>
<LemonMarkdown>{responseSummary.content}</LemonMarkdown>
<LemonDivider dashed={true} />
<ResponseSummaryFeedback surveyId={survey.id} />
</>
) : null}
</FlaggedFeature>
)
}
function ResponseSummaryFeedback({ surveyId }: { surveyId: string }): JSX.Element {
const [rating, setRating] = useState<'good' | 'bad' | null>(null)
function submitRating(newRating: 'good' | 'bad'): void {
if (rating) {
return // Already rated
}
setRating(newRating)
posthog.capture('chat rating', {
survey_id: surveyId,
answer_rating: rating,
})
}
return (
<div className="flex items-center justify-end">
{rating === null ? <>Summaries are generated by AI. What did you think?</> : null}
{rating !== 'bad' && (
<LemonButton
icon={rating === 'good' ? <IconThumbsUpFilled /> : <IconThumbsUp />}
type="tertiary"
size="small"
tooltip="Good summary"
onClick={() => submitRating('good')}
/>
)}
{rating !== 'good' && (
<LemonButton
icon={rating === 'bad' ? <IconThumbsDownFilled /> : <IconThumbsDown />}
type="tertiary"
size="small"
tooltip="Bad summary"
onClick={() => submitRating('bad')}
/>
)}
</div>
)
}

View File

@ -1,18 +1,24 @@
import os
from contextlib import contextmanager
from datetime import datetime, timedelta
from typing import Any, cast
from urllib.parse import urlparse
import nh3
import posthoganalytics
from django.conf import settings
from django.core.cache import cache
from django.db.models import Min
from django.http import HttpResponse, JsonResponse
from django.utils.text import slugify
from django.views.decorators.csrf import csrf_exempt
from loginas.utils import is_impersonated_session
from nanoid import generate
from rest_framework import request, serializers, status, viewsets
from rest_framework import request, serializers, status, viewsets, exceptions
from rest_framework.request import Request
from rest_framework.response import Response
from ee.surveys.summaries.summarize_surveys import summarize_survey_responses
from posthog.api.action import ActionSerializer
from posthog.api.feature_flag import (
BEHAVIOURAL_COHORT_FOUND_ERROR_CODE,
@ -23,6 +29,7 @@ from posthog.api.routing import TeamAndOrgViewSetMixin
from posthog.api.shared import UserBasicSerializer
from posthog.api.utils import action, get_token
from posthog.client import sync_execute
from posthog.cloud_utils import is_cloud
from posthog.constants import AvailableFeature
from posthog.event_usage import report_user_action
from posthog.exceptions import generate_exception_response
@ -646,6 +653,67 @@ class SurveyViewSet(TeamAndOrgViewSetMixin, viewsets.ModelViewSet):
)
return activity_page_response(activity_page, limit, page, request)
@action(methods=["POST"], detail=True, required_scopes=["survey:read"])
def summarize_responses(self, request: request.Request, **kwargs):
if not request.user.is_authenticated:
raise exceptions.NotAuthenticated()
user = cast(User, request.user)
survey_id = kwargs["pk"]
if not Survey.objects.filter(id=survey_id, team_id=self.team_id).exists():
return Response(status=status.HTTP_404_NOT_FOUND)
survey = self.get_object()
cache_key = f'summarize_survey_responses_{self.team.pk}_{self.kwargs["pk"]}'
# Check if the response is cached
cached_response = cache.get(cache_key)
if cached_response is not None:
return Response(cached_response)
environment_is_allowed = settings.DEBUG or is_cloud()
has_openai_api_key = bool(os.environ.get("OPENAI_API_KEY"))
if not environment_is_allowed or not has_openai_api_key:
raise exceptions.ValidationError("session summary is only supported in PostHog Cloud")
if not posthoganalytics.feature_enabled("ai-survey-response-summary", str(user.distinct_id)):
raise exceptions.ValidationError("survey response summary is not enabled for this user")
end_date: datetime = (survey.end_date or datetime.now()).replace(
hour=0, minute=0, second=0, microsecond=0
) + timedelta(days=1)
try:
question_index_param = request.query_params.get("question_index", None)
question_index = int(question_index_param) if question_index_param else None
except (ValueError, TypeError):
question_index = None
summary = summarize_survey_responses(
survey_id=survey_id,
question_index=question_index,
survey_start=(survey.start_date or survey.created_at).replace(hour=0, minute=0, second=0, microsecond=0),
survey_end=end_date,
team=self.team,
user=user,
)
timings = summary.pop("timings", None)
cache.set(cache_key, summary, timeout=30)
posthoganalytics.capture(
event="survey response summarized", distinct_id=str(user.distinct_id), properties=summary
)
# let the browser cache for half the time we cache on the server
r = Response(summary, headers={"Cache-Control": "max-age=15"})
if timings:
r.headers["Server-Timing"] = ", ".join(
f"{key};dur={round(duration, ndigits=2)}" for key, duration in timings.items()
)
return r
class SurveyConfigSerializer(serializers.ModelSerializer):
class Meta: