Improve health schemas and readiness checks

This commit is contained in:
2025-11-27 14:21:25 +01:00
parent 7acce1da02
commit 3a7208d28d
4 changed files with 246 additions and 100 deletions

View File

@@ -1,12 +1,14 @@
"""Health endpoints."""
"""Health endpoints for FastAPI application health checks (Liveness, Readiness, Detailed Status)."""
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import PlainTextResponse
# Dependencies for application configuration and schema definitions
from core.config import Settings, get_settings
from operations.health.schemas import HealthStatus
from operations.health.schemas import HealthStatus, HealthStatusEnum
from operations.health.service import get_detailed_health, readiness_check
# Initialize the API router for health endpoints, grouping them under the "/health" prefix
router = APIRouter(prefix="/health", tags=["health"])
@@ -25,8 +27,9 @@ def liveness() -> str:
performing no deep checks on external dependencies.
**Success Response:** HTTP 200 OK with "live" body.
**Failure Response:** Endpoint timeout (no response).
**Failure Response:** The orchestrator will interpret a *TCP connection timeout* as a failure.
"""
# Simply returning a string confirms the Python process and FastAPI are functional.
return "live"
@@ -40,14 +43,18 @@ async def readiness() -> str:
"""
**Readiness Probe:** Determines if the application can accept user traffic.
This endpoint is used by load balancers to route traffic. It performs deep checks
on all critical dependencies (e.g., database, message queue).
This endpoint is used by load balancers or service meshes to decide whether
to route traffic to this specific instance. It performs deep checks
on all critical dependencies (e.g., database connection, external services).
**Success Response:** HTTP 200 OK with "ready" body.
**Failure Response:** HTTP 503 Service Unavailable if any critical dependency fails.
"""
# Call the service layer function that runs all critical checks concurrently
ok = await readiness_check()
if not ok:
# If any check fails, signal 'Service Unavailable' so traffic is diverted
raise HTTPException(status_code=503, detail="not ready")
return "ready"
@@ -57,18 +64,21 @@ async def readiness() -> str:
"",
summary="Detailed Health Status Page",
response_model=HealthStatus,
status_code=200,
)
async def detailed_health(settings: Settings = Depends(get_settings)) -> HealthStatus:
"""
**Detailed Status Page:** Provides granular health information for human operators.
**Detailed Status Page:** Provides granular health information for human operators/monitoring tools.
This endpoint runs all readiness checks and returns a structured JSON object.
The top-level HTTP status code reflects the overall application health (200 OK or 503 Service Unavailable).
This endpoint runs all readiness checks and returns a structured JSON object
containing the status of each individual component.
The top-level HTTP status code reflects the overall application health (200 OK for 'pass', 503 for 'fail').
"""
# Retrieve the comprehensive health status model
detailed_health = await get_detailed_health(settings)
if detailed_health.status != "pass":
if detailed_health.status != HealthStatusEnum.passed:
# Align the HTTP status code with the overall health status for easy monitoring
raise HTTPException(status_code=503, detail="not ready")
# Status code is 200
return detailed_health