0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-12-01 12:21:02 +01:00
posthog/.run/Celery.run.xml
David Newell 4f6d9c8673
feat: generate recording text embeddings (#20046)
* make migration

* general flow

* abstract shared methods

* generate input

* remove postgres migration

* generate embedding strings

* remove random file

* Update query snapshots

* Update query snapshots

* feat: create periodic replay embedding

* first sketch of table

* batch and flush embeddings

* add default to timestamp generation

* fetch recordings query

* save first embeddings to CH

* dump session metadata into tokens

* fix lint

* brain dump to help th future traveller

* prom timing instead

* fix input concatenation

* add an e :/

* obey mypy

* some time limits to reduce what we query

* a little fiddling to get it to run locally

* paging and counting

* Update query snapshots

* Update query snapshots

* move the AI stuff to EE for now

* Update query snapshots

* kick off the task with interval from settings

* push embedding generation onto its own queue

* on a different queue

* EE to the max

* doh

* fix

* fangling

* Remove clashes so we can merge this into the other PR

* Remove clashes so we can merge this into the other PR

* start wiring up Celery task

* hmmm

* it's a chord

* wire up celery simple version

* rename

* why is worker failing

* Update .run/Celery.run.xml

* update embedding input to remove duplicates

* ttl on the table

* Revert "update embedding input to remove duplicates"

This reverts commit 9a09d9c9f0.

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Paul D'Ambra <paul@posthog.com>
2024-02-14 12:50:42 +00:00

31 lines
1.7 KiB
XML

<component name="ProjectRunConfigurationManager">
<configuration default="false" name="Celery" type="PythonConfigurationType" factoryName="Python">
<module name="posthog" />
<option name="ENV_FILES" value="$PROJECT_DIR$/bin/celery-queues.env" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="CLICKHOUSE_SECURE" value="False" />
<env name="DATABASE_URL" value="postgres://posthog:posthog@localhost:5432/posthog" />
<env name="DEBUG" value="1" />
<env name="KAFKA_HOSTS" value="localhost" />
<env name="PYTHONUNBUFFERED" value="1" />
<env name="SKIP_SERVICE_VERSION_REQUIREMENTS" value="1" />
<env name="REPLAY_EMBEDDINGS_ALLOWED_TEAM" value="1,2,3" />
</envs>
<option name="SDK_HOME" value="$PROJECT_DIR$/env/bin/python" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/env/bin" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/env/bin/celery" />
<option name="PARAMETERS" value="-A posthog worker -B --scheduler redbeat.RedBeatScheduler --without-heartbeat --without-mingle --loglevel=DEBUG" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
</component>