Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@ test_profile:
extended_tests:
python -m pytest --only-extended $(TEST_FILE)

######################
# MONITORING AND EXECUTION
######################

phoenix:
.venv/bin/python3 -m phoenix.server.main serve

run:
export PYTHONPATH=$PYTHONPATH:$(pwd)/src && .venv/bin/python3 test_agent.py

######################
# LINTING AND FORMATTING
Expand Down Expand Up @@ -64,4 +73,6 @@ help:
@echo 'tests - run unit tests'
@echo 'test TEST_FILE=<test_file> - run all tests in file'
@echo 'test_watch - run unit tests in watch mode'
@echo 'phoenix - start Arize Phoenix dashboard'
@echo 'run - run the agent with monitoring enabled'

20 changes: 20 additions & 0 deletions bm25s_index/corpus.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{"id":0,"text":"E001 Required python package is missing in the execution environment. ModuleNotFoundError: No module named 'pandas' ValueError: Please install the 'db-dtypes' package to use this function"}
{"id":1,"text":"E010 User does not have sufficient permissions to access the resource (BigQuery, Drive, etc.). google.api_core.exceptions.Forbidden: 403 Access Denied: BigQuery: Permission denied while getting Drive credentials."}
{"id":2,"text":"E012 The query result schema does not match the destination table schema. BadRequest: 400 Provided schema does not match. Field 'amount' has changed type from INT64 to FLOAT64"}
{"id":3,"text":"E013 Failed to automatically infer BigQuery schema from DataFrame. pyarrow.lib.ArrowTypeError: Could not convert FLOAT64 to INT64"}
{"id":4,"text":"E014 Data delivery operation exceeded the time limit. AirflowSensorTimeout: Timeout reached DELIVERY_TIMEOUT: Execution time has exceeded the time limit"}
{"id":5,"text":"E018 The specified source file or destination table does not exist. NotFound: 404 Not found: Table project.dataset.table FileNotFoundError: [Errno 2] No such file or directory: '/tmp/data.csv'"}
{"id":6,"text":"E020 The process was terminated because it exceeded the available memory. MemoryError: Unable to allocate 2.4 GiB for an array with shape Container terminated with exit code 137 (OOMKilled)"}
{"id":7,"text":"E021 API call frequency or data volume exceeded the cloud provider's quota. google.api_core.exceptions.Forbidden: 403 Quota exceeded: Your project exceeded quota for free query bytes per day"}
{"id":8,"text":"E022 Could not connect to the database server. Server might be down or blocked by firewall. psycopg2.OperationalError: could not connect to server: Connection refused"}
{"id":9,"text":"E023 The local disk or temporary directory is full. OSError: [Errno 28] No space left on device: '/var/lib/airflow/tmp'"}
{"id":10,"text":"E024 The generated SQL query contains syntax errors. google.api_core.exceptions.BadRequest: 400 Syntax error: Unexpected keyword SELECT at [5:1]"}
{"id":11,"text":"E025 SSL handshake failed while connecting to an external API/DB. [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate"}
{"id":12,"text":"E026 A Spark executor was terminated, often due to OOM or spot instance preemption. org.apache.spark.SparkException: Job aborted due to stage failure: ExecutorLostFailure (executor 5 exited caused by external signal)"}
{"id":13,"text":"E027 Attempted to insert NULL into a column that has a NOT NULL constraint. sqlalchemy.exc.IntegrityError: (psycopg2.errors.NotNullViolation) column 'user_id' contains null values"}
{"id":14,"text":"E028 The authentication token or credentials used for the service have expired. google.auth.exceptions.RefreshError: The credentials have expired and could not be refreshed"}
{"id":15,"text":"E029 The Kubernetes pod running the task was preempted by a higher priority pod or node maintenance. AirflowException: Kubernetes pod was deleted before it could complete"}
{"id":16,"text":"E030 A database transaction was rolled back because it was deadlocked with another process. MySQLdb.OperationalError: (1213, 'Deadlock found when trying to get lock; try restarting transaction')"}
{"id":17,"text":"E031 A required environment variable for the DAG/Task is missing. KeyError: 'DB_PASSWORD' during os.environ retrieval"}
{"id":18,"text":"E032 The source data file is corrupted or does not follow the expected format. pandas.errors.ParserError: Error tokenizing data. C error: Expected 5 fields in line 10, saw 7"}
{"id":19,"text":"E033 Conflict during file upload/download in Google Cloud Storage due to version mismatch. google.api_core.exceptions.PreconditionFailed: 412 Precondition Failed (Generation match failed)"}
1 change: 1 addition & 0 deletions bm25s_index/corpus.mmindex.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[0,206,437,631,779,950,1161,1369,1578,1763,1901,2065,2234,2470,2669,2861,3051,3265,3402,3595]
Binary file added bm25s_index/data.csc.index.npy
Binary file not shown.
Binary file added bm25s_index/indices.csc.index.npy
Binary file not shown.
Binary file added bm25s_index/indptr.csc.index.npy
Binary file not shown.
102 changes: 102 additions & 0 deletions bm25s_index/metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
[
{
"error_id": "E001",
"category": "DEPENDENCY_ISSUES",
"resolution": "Add the missing package to your requirements.txt or rebuild the Docker image."
},
{
"error_id": "E010",
"category": "PERMISSION",
"resolution": "Check the IAM roles for the service account and ensure it has access to both BigQuery and the underlying source (e.g., Google Drive)."
},
{
"error_id": "E012",
"category": "SCHEMA_ISSUES",
"resolution": "Align the column names and types between the query and the target table."
},
{
"error_id": "E013",
"category": "SCHEMA_ISSUES",
"resolution": "Explicitly define the schema instead of relying on automatic inference."
},
{
"error_id": "E014",
"category": "DELIVERY_ERRORS",
"resolution": "Check network latency or increase the timeout setting in the operator."
},
{
"error_id": "E018",
"category": "LOGIC_ERRORS",
"resolution": "Verify the path or table name and ensure it was created in previous steps."
},
{
"error_id": "E020",
"category": "RESOURCE_LIMITS",
"resolution": "Increase the memory limit of the container or optimize the code to handle data in smaller chunks."
},
{
"error_id": "E021",
"category": "QUOTA_ISSUES",
"resolution": "Implement exponential backoff or request a quota increase from the cloud provider console."
},
{
"error_id": "E022",
"category": "NETWORK_ISSUES",
"resolution": "Check if the DB server is running and verify firewall/security group rules for the worker IP."
},
{
"error_id": "E023",
"category": "RESOURCE_LIMITS",
"resolution": "Clean up the /tmp directory or increase the disk size of the worker/PVC."
},
{
"error_id": "E024",
"category": "LOGIC_ERRORS",
"resolution": "Check the SQL string for missing commas, incorrect keywords, or unclosed quotes."
},
{
"error_id": "E025",
"category": "NETWORK_ISSUES",
"resolution": "Update the CA certificates in the environment or check if the target's certificate is expired."
},
{
"error_id": "E026",
"category": "SPARK_ERRORS",
"resolution": "Check executor memory settings or increase the number of retries for Spark tasks."
},
{
"error_id": "E027",
"category": "DATA_QUALITY",
"resolution": "Inspect the source data for unexpected NULL values or update the target table schema."
},
{
"error_id": "E028",
"category": "PERMISSION",
"resolution": "Refresh the service account token or update the connection credentials in Airflow."
},
{
"error_id": "E029",
"category": "INFRASTRUCTURE",
"resolution": "The task should be retried automatically. If it happens frequently, check pod priority settings."
},
{
"error_id": "E030",
"category": "DATABASE",
"resolution": "Reduce parallelism for this task or check for long-running transactions locking the same tables."
},
{
"error_id": "E031",
"category": "CONFIGURATION",
"resolution": "Define the missing environment variable in the Airflow deployment or Dockerfile."
},
{
"error_id": "E032",
"category": "DATA_QUALITY",
"resolution": "Verify the source file integrity and check for unexpected delimiters or header changes."
},
{
"error_id": "E033",
"category": "STORAGE_ISSUES",
"resolution": "Check if multiple processes are trying to write to the same GCS object simultaneously."
}
]
12 changes: 12 additions & 0 deletions bm25s_index/params.index.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"k1": 1.5,
"b": 0.75,
"delta": 0.5,
"method": "lucene",
"idf_method": "lucene",
"dtype": "float32",
"int_dtype": "int32",
"num_docs": 20,
"version": "0.3.2.post1",
"backend": "numpy"
}
1 change: 1 addition & 0 deletions bm25s_index/vocab.index.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"variabl":0,"deni":1,"sql":2,"requir":3,"alloc":4,"api":5,"exceed":6,"404":7,"error":8,"data":9,"code":10,"memoryerror":11,"e024":12,"sign":13,"task":14,"preconditionfail":15,"expir":16,"could":17,"gib":18,"local":19,"storag":20,"panda":21,"e031":22,"self":23,"oper":24,"mainten":25,"e010":26,"upload":27,"shape":28,"filenotfounderror":29,"spark":30,"roll":31,"oomkil":32,"etc":33,"follow":34,"specifi":35,"forbidden":36,"failur":37,"org":38,"directori":39,"run":40,"instal":41,"null":42,"var":43,"day":44,"mismatch":45,"miss":46,"lib":47,"drive":48,"caus":49,"call":50,"quota":51,"precondit":52,"ssl":53,"python":54,"queri":55,"project":56,"abort":57,"certificate_verify_fail":58,"job":59,"e027":60,"termin":61,"reach":62,"doe":63,"keyerror":64,"has":65,"suffici":66,"server":67,"generat":68,"token":69,"automat":70,"user":71,"e030":72,"googl":73,"csv":74,"disk":75,"auth":76,"credenti":77,"your":78,"parsererror":79,"e023":80,"db_password":81,"spot":82,"saw":83,"timeout":84,"apach":85,"format":86,"higher":87,"pod":88,"bigqueri":89,"sqlalchemi":90,"valu":91,"notfound":92,"executorlostfailur":93,"from":94,"when":95,"permiss":96,"array":97,"e032":98,"resourc":99,"unabl":100,"might":101,"file":102,"prioriti":103,"syntax":104,"corrupt":105,"float64":106,"sparkexcept":107,"os":108,"verifi":109,"1213":110,"volum":111,"e029":112,"full":113,"mysqldb":114,"field":115,"function":116,"temporari":117,"space":118,"infer":119,"found":120,"airflow":121,"match":122,"schema":123,"e021":124,"exc":125,"complet":126,"destin":127,"exit":128,"28":129,"pyarrow":130,"memori":131,"retriev":132,"becaus":133,"403":134,"airflowsensortimeout":135,"412":136,"e033":137,"dure":138,"contain":139,"tabl":140,"per":141,"connect":142,"notnullviol":143,"limit":144,"environ":145,"unexpect":146,"kubernet":147,"line":148,"type":149,"e001":150,"oom":151,"constraint":152,"keyword":153,"dag":154,"e026":155,"dtype":156,"errno":157,"e013":158,"free":159,"e012":160,"int64":161,"download":162,"authent":163,"e018":164,"restart":165,"preemption":166,"version":167,"access":168,"delet":169,"avail":170,"get":171,"tri":172,"while":173,"badrequest":174,"anoth":175,"delivery_timeout":176,"10":177,"result":178,"preempt":179,"instanc":180,"insert":181,"137":182,"db":183,"airflowexcept":184,"operationalerror":185,"executor":186,"left":187,"signal":188,"user_id":189,"api_cor":190,"back":191,"name":192,"datafram":193,"refresherror":194,"execut":195,"cloud":196,"valueerror":197,"expect":198,"time":199,"down":200,"400":201,"convert":202,"deliveri":203,"modul":204,"stage":205,"column":206,"deadlock":207,"transact":208,"e014":209,"integrityerror":210,"process":211,"arrowtypeerror":212,"attempt":213,"node":214,"refresh":215,"befor":216,"psycopg2":217,"packag":218,"byte":219,"dataset":220,"e022":221,"refus":222,"e020":223,"certif":224,"handshak":225,"have":226,"sourc":227,"provid":228,"tmp":229,"fail":230,"frequenc":231,"pleas":232,"select":233,"conflict":234,"oserror":235,"extern":236,"use":237,"e028":238,"servic":239,"lock":240,"firewal":241,"chang":242,"often":243,"databas":244,"amount":245,"exist":246,"block":247,"devic":248,"due":249,"e025":250,"modulenotfounderror":251,"except":252,"":253}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Empty file.
Binary file added chroma_db/chroma.sqlite3
Binary file not shown.
Loading