buerokratt · erangi-ar · Nov 1, 2025 · Nov 10, 2025 · Nov 10, 2025 · Nov 10, 2025
diff --git a/.env b/.env
@@ -1,7 +1,36 @@
-AWS_ACCESS_KEY_ID=your_aws_access_key_id
-AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
+API_CORS_ORIGIN=*
+API_DOCUMENTATION_ENABLED=true
+S3_REGION=eu-west-1
+S3_ENDPOINT_URL=http://minio:9000
+S3_ENDPOINT_NAME=minio:9000
+S3_DATA_BUCKET_PATH=resources
+S3_DATA_BUCKET_NAME=global-classifier
+FS_DATA_DIRECTORY_PATH=/app
+S3_SECRET_ACCESS_KEY=minioadmin
+S3_ACCESS_KEY_ID=minioadmin
+S3_HEALTH_ENDPOINT=http://minio:9000/minio/health/live
+MINIO_BROWSER_REDIRECT_URL=http://localhost:9001/minio-console
+GF_SECURITY_ADMIN_USER=admin
+GF_SECURITY_ADMIN_PASSWORD=admin123
+GF_USERS_ALLOW_SIGN_UP=false
+GF_SERVER_ROOT_URL=https://dev-gloclf.buerokratt.ee/grafana
+GF_SERVER_SERVE_FROM_SUB_PATH=true
+PORT=3000
+AWS_BEDROCK_ACCESS_KEY_ID=your_aws_access_key_id
+AWS_BEDROCK_SECRET_ACCESS_KEY=your_aws_secret_access_key
 BEDROCK_AWS_REGION=eu-west-1
 AZURE_OPENAI_API_KEY=your_openai_api_key
 AZURE_OPENAI_ENDPOINT=your_openai_endpoint
 AZURE_OPENAI_DEPLOYMENT_NAME=gpt-4o-mini
-PROVIDER_NAME=azure-openai
+PROVIDER_NAME=azure-openai
+MLFLOW_TRACKING_USERNAME=mlflowadmin
+MLFLOW_TRACKING_PASSWORD=value
+MLFLOW_HOST_PORT=5000
+MLFLOW_CONT_PORT=5000
+MLFLOW_HOST=0.0.0.0
+MLFLOW_PORT=${MLFLOW_CONT_PORT}
+MLFLOW_BACKEND_STORE_URI=sqlite:////mlflow/mlflow_data/mlflow.db
+MLFLOW_DEFAULT_ARTIFACT_ROOT=file:///mlflow/mlflow_artifacts
+MLFLOW_HOST_CONFIG_PATH=./mlflow/config
+MLFLOW_CONT_CONFIG_PATH=/mlflow/config
+MLFLOW_FLASK_SERVER_SECRET_KEY=byk-mlflow-secret
diff --git a/DSL/CronManager/DSL/callback_formatter.yml b/DSL/CronManager/DSL/callback_formatter.yml
@@ -2,4 +2,4 @@ callback_format:
   trigger: off
   type: exec
   command: "../app/scripts/callback_format.sh"
-  allowedEnvs: ['filePath', 'results', 'taskId']
+  allowedEnvs: ['filePath', 'results', 'taskId', 'metricsFile']
diff --git a/DSL/CronManager/script/callback_format.sh b/DSL/CronManager/script/callback_format.sh
@@ -3,8 +3,8 @@
 echo "Started Shell Script for Dataset Generation Callback Processing"
 
 # Check if environment variables are set
-if [ -z "$filePath" ] || [ -z "$results" ] || [ -z "$taskId" ]; then
-  echo "Please set the filePath, results, and taskId environment variables."
+if [ -z "$filePath" ] || [ -z "$results" ] || [ -z "$taskId" ] || [ -z "$metricsFile" ]; then
+  echo "Please set the filePath, results, taskId, and metricsFile environment variables."
   exit 1
 fi
 
@@ -62,6 +62,7 @@ python3 "$CALLBACK_SCRIPT" \
   --encoded-results "$results" \
   --output-json "$temp_response" \
   --session-id "$taskId" \
+  --metrics-file "$metricsFile" \
   > /tmp/callback_stdout.log 2> /tmp/callback_stderr.log
 exit_code=$?
 

diff --git a/DSL/CronManager/script/dataset_pipeline_s3.sh b/DSL/CronManager/script/dataset_pipeline_s3.sh
@@ -379,17 +379,58 @@ EOF
     else
         log "S3 download failed - success status: $success_status"
         log "Response: $response_body"
+
+        # Update progress status to indicate failure
+        progress_update_payload=$(cat <<EOF
+{
+  "sessionId": "$sessionId",
+  "generationStatus": "Fail",
+  "generationMessage": "Generation Failed",
+  "progressPercentage": 100,
+  "processComplete": true
+}
+EOF
+)
+
+        progress_update_response=$(curl -s -X POST "$PROGRESS_UPDATE_URL" \
+        -H "Content-Type: application/json" \
+        -d "$progress_update_payload")
+        log "Progress status updated to failed: $progress_update_response"
+
         send_failure_status_update "S3 download and extraction failed" "$CURRENT_DATASET_ID" "$response_body" "extraction_failure"
         rm -f /tmp/download_response.json
         exit 1
     fi
 
 else
     log "Python script execution failed with exit code: $exit_code"
+
+    # Update progress status to indicate failure
+    progress_update_payload=$(cat <<EOF
+{
+  "sessionId": "$sessionId",
+  "generationStatus": "Fail",
+  "generationMessage": "Generation Failed",
+  "progressPercentage": 100,
+  "processComplete": true
+}
+EOF
+)
+
+    progress_update_response=$(curl -s -X POST "$PROGRESS_UPDATE_URL" \
+    -H "Content-Type: application/json" \
+    -d "$progress_update_payload")
+    log "Progress status updated to failed: $progress_update_response"
+
     if [ -f "$temp_response" ]; then
         log "Error response: $(cat $temp_response)"
-        rm -f /tmp/download_response.json
+        response_body=$(cat "$temp_response")
+        send_failure_status_update "Python script execution failed" "$CURRENT_DATASET_ID" "$response_body" "extraction_failure"
+    else
+        send_failure_status_update "Python script execution failed - no response data" "$CURRENT_DATASET_ID" "" "extraction_failure"
     fi
+
+    rm -f /tmp/download_response.json
     exit 1
 fi
 

diff --git a/DSL/CronManager/script/train_script_starter.sh b/DSL/CronManager/script/train_script_starter.sh
@@ -7,6 +7,48 @@ GET_FIRST_COME_TRAINING_JOB_SQL="http://resql:8082/global-classifier/get-queued-
 GET_DATA_MODEL_BY_MODEL_ID_SQL="http://resql:8082/global-classifier/get-data-model-info-by-given-model-id"
 UPDATE_JOB_STATUS="http://resql:8082/global-classifier/update-training-job-status"
 
+# Centralized error handling function
+handle_training_failure() {
+    local error_message="$1"
+    echo "[FAILED] $error_message"
+
+    # Only proceed with status updates if we have the required variables
+    if [ -n "$job_id" ] && [ -n "$model_id" ] && [ -n "$session_id" ]; then
+        echo "[UPDATE] Updating job status to training-failed..."
+        response_update_job_status=$(curl -s -X POST "$UPDATE_JOB_STATUS" \
+            -H "Content-Type: application/json" \
+            -d "{\"jobId\": $job_id, \"jobStatus\": \"training-failed\"}")
+
+        echo "[MODEL] Updating model training status to failed..."
+        UPDATE_MODEL_TRAINING_STATUS_FAILED="http://resql:8082/global-classifier/update-training_status-failed"
+        response_update_model_status=$(curl -s -X POST "$UPDATE_MODEL_TRAINING_STATUS_FAILED" \
+            -H "Content-Type: application/json" \
+            -d "{\"model_id\": $model_id}")
+
+        echo "[PROGRESS] Updating progress session to show training failure..."
+        UPDATE_PROGRESS_SESSION_ENDPOINT="http://ruuter-public:8086/global-classifier/datamodels/progress/update"
+        response_update_progress_failure=$(curl -s -X POST "$UPDATE_PROGRESS_SESSION_ENDPOINT" \
+            -H "Content-Type: application/json" \
+            -d "{
+                \"sessionId\": $session_id,
+                \"trainingStatus\": \"Training Failed\",
+                \"trainingMessage\": \"Training Failed\",
+                \"progressPercentage\": 100,
+                \"processComplete\": false
+            }")
+
+        if [ -z "$response_update_progress_failure" ]; then
+            echo "[WARNING] Failed to update progress session with failure status"
+        else
+            echo "[PROGRESS] Progress session updated with failure status successfully"
+        fi
+    else
+        echo "[WARNING] Cannot update training status - missing required variables (job_id, model_id, or session_id)"
+    fi
+
+    exit 1
+}
+
 echo "[START] Training script starter"
 
 # Check if training is in progress
@@ -102,8 +144,7 @@ echo "[DEBUG] Create session response: '$response_create_session'"
 
 # Extract session ID from response
 if [ -z "$response_create_session" ]; then
-    echo "[ERROR] Failed to create training progress session - empty response"
-    exit 1
+    handle_training_failure "Failed to create training progress session - empty response"
 fi
 
 # Check if session creation was successful
@@ -113,14 +154,14 @@ if echo "$response_create_session" | grep -q '"operationSuccessful":true'; then
     if [ -z "$session_id" ] || [ "$session_id" = "$response_create_session" ]; then
         echo "[ERROR] Failed to extract session ID from response"
         echo "[DEBUG] Raw response: '$response_create_session'"
-        exit 1
+        handle_training_failure "Failed to extract session ID from response"
     fi
 
     echo "[SESSION] Training progress session created successfully with ID: $session_id"
 else
     echo "[ERROR] Training progress session creation failed"
     echo "[DEBUG] Raw response: '$response_create_session'"
-    exit 1
+    handle_training_failure "Training progress session creation failed"
 fi
 
 # Update initial training progress
@@ -154,16 +195,15 @@ echo "[DEBUG] Dataset ID response: '$response_get_dataset_id'"
 
 # Handle empty response
 if [ -z "$response_get_dataset_id" ] || [ "$response_get_dataset_id" = "[]" ]; then
-    echo "[ERROR] No dataset information found for model ID: $model_id"
-    exit 1
+    handle_training_failure "No dataset information found for model ID: $model_id"
 fi
 
 dataset_id=$(echo "$response_get_dataset_id" | sed -E 's/.*"connectedDsId":([0-9]+).*/\1/')
 
 if [ -z "$dataset_id" ] || [ "$dataset_id" = "$response_get_dataset_id" ]; then
     echo "[ERROR] Connected Dataset ID not found in response"
     echo "[DEBUG] Raw response: '$response_get_dataset_id'"
-    exit 1
+    handle_training_failure "Connected Dataset ID not found in response"
 fi
 
 echo "[DATASET] Dataset ID: $dataset_id"
@@ -177,12 +217,12 @@ else
     echo "[ERROR] Failed to extract base models from response"
     echo "[ERROR] Raw response: $response_get_dataset_id"
     echo "[ERROR] Extracted base_models: $base_models_json"
-    exit 1
+    handle_training_failure "Failed to extract base models from response"
 fi
 
 # Activate existing virtualenv
 echo "[INFO] Activating existing virtualenv at /app/python_virtual_env"
-source /app/python_virtual_env/bin/activate || { echo "[ERROR] Failed to activate virtualenv"; exit 1; }
+source /app/python_virtual_env/bin/activate || { echo "[ERROR] Failed to activate virtualenv"; handle_training_failure "Failed to activate Python virtual environment"; }
 export PYTHONPATH="/app:/app/src:/app/src/training:/app/src/s3_dataset_processor:$PYTHONPATH"
 echo "[DEBUG] PYTHONPATH set to: $PYTHONPATH"
 # Add these debug commands
@@ -224,41 +264,41 @@ if [ ${#missing_pkgs[@]} -ne 0 ]; then
         # Create installation directory
         mkdir -p "$UV_INSTALL_DIR" || {
             echo "[ERROR] Failed to create UV installation directory"
-            exit 1
+            handle_training_failure "Failed to create UV installation directory"
         }
 
         # Use unmanaged installation to avoid root directory modifications
         curl -LsSf https://astral.sh/uv/install.sh | env UV_UNMANAGED_INSTALL="$UV_INSTALL_DIR" sh || {
             echo "[ERROR] Failed to install uv"
-            exit 1
+            handle_training_failure "Failed to install UV package manager"
         }
 
         # Verify installation
         if [ ! -x "$UV_BIN" ]; then
             echo "[ERROR] UV installation failed or not executable"
-            exit 1
+            handle_training_failure "UV installation failed or not executable"
         fi
 
         # Verify functionality
         "$UV_BIN" --version || {
             echo "[ERROR] UV installation corrupted"
-            exit 1
+            handle_training_failure "UV installation corrupted"
         }
 
         echo "[UV] Successfully installed uv (unmanaged) to $UV_INSTALL_DIR"
     fi
 
     if [ ! -f /app/src/training/requirements-gpu.txt ]; then
         echo "/app/src/training/requirements-gpu.txt not found!"
-        exit 1
+        handle_training_failure "Training requirements file not found"
     fi
 
     echo "[INSTALL] Installing from /app/src/training/requirements-gpu.txt using secure uv..."
     "$UV_BIN" pip install --python "$VIRTUAL_ENV/bin/python3" -r /app/src/training/requirements-gpu.txt || {
         echo "[WARNING] uv install failed — trying pip as fallback..."
         pip install -r /app/src/training/requirements-gpu.txt || {
             echo "[ERROR] Both uv and pip install failed inside virtualenv"
-            exit 1
+            handle_training_failure "Failed to install required Python packages"
         }
     }
 
@@ -321,41 +361,7 @@ if [ $training_exit_code -eq 0 ]; then
 
     echo "[DEBUG] Update job status to trained response: '$response_update_job_status_trained'"
 else
-    echo "[FAILED] Training failed with exit code: $training_exit_code"
-
-    echo "[UPDATE] Updating job status to training-failed..."
-    response_update_job_status=$(curl -s -X POST "$UPDATE_JOB_STATUS" \
-    -H "Content-Type: application/json" \
-    -d "{\"jobId\": $job_id, \"jobStatus\": \"training-failed\"}")
-
-    echo "[MODEL] Updating model training status to failed..."
-    UPDATE_MODEL_TRAINING_STATUS_FAILED="http://resql:8082/global-classifier/update-training_status-failed"
-    response_update_model_status=$(curl -s -X POST "$UPDATE_MODEL_TRAINING_STATUS_FAILED" \
-    -H "Content-Type: application/json" \
-    -d "{\"model_id\": $model_id}")
-
-    echo "[DEBUG] Update model training status response: '$response_update_model_status'"
-
-    echo "[PROGRESS] Updating progress session to show training failure..."
-    response_update_progress_failure=$(curl -s -X POST "$UPDATE_PROGRESS_SESSION_ENDPOINT" \
-    -H "Content-Type: application/json" \
-    -d "{
-        \"sessionId\": $session_id,
-        \"trainingStatus\": \"Training Failed\",
-        \"trainingMessage\": \"Model training has failed\",
-        \"progressPercentage\": 100,
-        \"processComplete\": false
-    }")
-
-    echo "[DEBUG] Update progress failure response: '$response_update_progress_failure'"
-
-    if [ -z "$response_update_progress_failure" ]; then
-        echo "[WARNING] Failed to update progress session with failure status"
-    else
-        echo "[PROGRESS] Progress session updated with failure status successfully"
-    fi
-
-    exit 1
+    handle_training_failure "Model training script failed with exit code: $training_exit_code"
 fi
 
 echo "[DONE] Training script starter completed"
diff --git a/DSL/Resql/global-classifier/POST/get-agency-centops.sql b/DSL/Resql/global-classifier/POST/get-agency-centops.sql
@@ -0,0 +1,4 @@
+-- Check if agency exists in mock_centops table
+SELECT agency_id
+FROM public.mock_centops
+WHERE agency_id = :agencyId;
diff --git a/DSL/Resql/global-classifier/POST/get-all-dataset-versions.sql b/DSL/Resql/global-classifier/POST/get-all-dataset-versions.sql
@@ -1,3 +1,4 @@
 SELECT id, major, minor
 FROM public.dataset_versions
+WHERE generation_status = 'Generation_Success'
 ORDER BY id;
diff --git a/DSL/Resql/global-classifier/POST/get-datasets.sql b/DSL/Resql/global-classifier/POST/get-datasets.sql
@@ -1,27 +1,29 @@
 SELECT 
-    id,
-    major,
-    minor,
-    created_at,
-    generation_status,
-    last_model_trained,
-    last_trained,
+    dv.id,
+    dv.major,
+    dv.minor,
+    dv.created_at,
+    dv.generation_status,
+    COALESCE(dm.model_name, dv.last_model_trained) AS last_model_trained,
+    dv.last_trained,
     CEIL(COUNT(*) OVER() / :page_size::DECIMAL) AS total_pages
 FROM 
-    dataset_versions
+    dataset_versions dv
+LEFT JOIN 
+    data_models dm ON dv.last_model_trained = dm.model_id::text
 WHERE
-    (:generation_status = 'all' OR generation_status ILIKE '%' || :generation_status || '%')
+    (:generation_status = 'all' OR dv.generation_status ILIKE '%' || :generation_status || '%')
     AND (:dataset_name = 'all' 
-         OR POSITION(LOWER(:dataset_name) IN LOWER(CONCAT('v', major, '.', minor))) > 0
-         OR POSITION(LOWER(:dataset_name) IN LOWER(CONCAT(major, '.', minor))) > 0
-         OR POSITION(LOWER(:dataset_name) IN LOWER(major::text)) > 0
-         OR POSITION(LOWER(:dataset_name) IN LOWER(minor::text)) > 0)
+         OR POSITION(LOWER(:dataset_name) IN LOWER(CONCAT('v', dv.major, '.', dv.minor))) > 0
+         OR POSITION(LOWER(:dataset_name) IN LOWER(CONCAT(dv.major, '.', dv.minor))) > 0
+         OR POSITION(LOWER(:dataset_name) IN LOWER(dv.major::text)) > 0
+         OR POSITION(LOWER(:dataset_name) IN LOWER(dv.minor::text)) > 0)
 ORDER BY
-    CASE WHEN :sort_by = 'created_at' AND :sort_type = 'asc' THEN created_at END ASC,
-    CASE WHEN :sort_by = 'created_at' AND :sort_type = 'desc' THEN created_at END DESC,
-    -- CASE WHEN :sort_by = 'major' AND :sort_type = 'asc' THEN major END ASC,
-    -- CASE WHEN :sort_by = 'major' AND :sort_type = 'desc' THEN major END DESC,
-    -- CASE WHEN :sort_by = 'minor' AND :sort_type = 'asc' THEN minor END ASC,
-    -- CASE WHEN :sort_by = 'minor' AND :sort_type = 'desc' THEN minor END DESC,
-    CASE WHEN :sort_by IS NULL OR :sort_by = '' THEN created_at END DESC
+    CASE WHEN :sort_by = 'created_at' AND :sort_type = 'asc' THEN dv.created_at END ASC,
+    CASE WHEN :sort_by = 'created_at' AND :sort_type = 'desc' THEN dv.created_at END DESC,
+    -- CASE WHEN :sort_by = 'major' AND :sort_type = 'asc' THEN dv.major END ASC,
+    -- CASE WHEN :sort_by = 'major' AND :sort_type = 'desc' THEN dv.major END DESC,
+    -- CASE WHEN :sort_by = 'minor' AND :sort_type = 'asc' THEN dv.minor END ASC,
+    -- CASE WHEN :sort_by = 'minor' AND :sort_type = 'desc' THEN dv.minor END DESC,
+    CASE WHEN :sort_by IS NULL OR :sort_by = '' THEN dv.created_at END DESC
 OFFSET ((GREATEST(:page, 1) - 1) * :page_size) LIMIT :page_size;
diff --git a/DSL/Resql/global-classifier/POST/insert-agency-centops.sql b/DSL/Resql/global-classifier/POST/insert-agency-centops.sql
@@ -0,0 +1,3 @@
+-- Insert new agency into mock_centops table
+INSERT INTO public.mock_centops (agency_id, agency_name, created_at)
+VALUES (:agencyId, :agencyName, NOW());