Improve health schemas and readiness checks
This commit is contained in:
@@ -1,12 +1,14 @@
|
||||
"""Health endpoints."""
|
||||
"""Health endpoints for FastAPI application health checks (Liveness, Readiness, Detailed Status)."""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import PlainTextResponse
|
||||
|
||||
# Dependencies for application configuration and schema definitions
|
||||
from core.config import Settings, get_settings
|
||||
from operations.health.schemas import HealthStatus
|
||||
from operations.health.schemas import HealthStatus, HealthStatusEnum
|
||||
from operations.health.service import get_detailed_health, readiness_check
|
||||
|
||||
# Initialize the API router for health endpoints, grouping them under the "/health" prefix
|
||||
router = APIRouter(prefix="/health", tags=["health"])
|
||||
|
||||
|
||||
@@ -25,8 +27,9 @@ def liveness() -> str:
|
||||
performing no deep checks on external dependencies.
|
||||
|
||||
**Success Response:** HTTP 200 OK with "live" body.
|
||||
**Failure Response:** Endpoint timeout (no response).
|
||||
**Failure Response:** The orchestrator will interpret a *TCP connection timeout* as a failure.
|
||||
"""
|
||||
# Simply returning a string confirms the Python process and FastAPI are functional.
|
||||
return "live"
|
||||
|
||||
|
||||
@@ -40,14 +43,18 @@ async def readiness() -> str:
|
||||
"""
|
||||
**Readiness Probe:** Determines if the application can accept user traffic.
|
||||
|
||||
This endpoint is used by load balancers to route traffic. It performs deep checks
|
||||
on all critical dependencies (e.g., database, message queue).
|
||||
This endpoint is used by load balancers or service meshes to decide whether
|
||||
to route traffic to this specific instance. It performs deep checks
|
||||
on all critical dependencies (e.g., database connection, external services).
|
||||
|
||||
**Success Response:** HTTP 200 OK with "ready" body.
|
||||
**Failure Response:** HTTP 503 Service Unavailable if any critical dependency fails.
|
||||
"""
|
||||
# Call the service layer function that runs all critical checks concurrently
|
||||
ok = await readiness_check()
|
||||
|
||||
if not ok:
|
||||
# If any check fails, signal 'Service Unavailable' so traffic is diverted
|
||||
raise HTTPException(status_code=503, detail="not ready")
|
||||
|
||||
return "ready"
|
||||
@@ -57,18 +64,21 @@ async def readiness() -> str:
|
||||
"",
|
||||
summary="Detailed Health Status Page",
|
||||
response_model=HealthStatus,
|
||||
status_code=200,
|
||||
)
|
||||
async def detailed_health(settings: Settings = Depends(get_settings)) -> HealthStatus:
|
||||
"""
|
||||
**Detailed Status Page:** Provides granular health information for human operators.
|
||||
**Detailed Status Page:** Provides granular health information for human operators/monitoring tools.
|
||||
|
||||
This endpoint runs all readiness checks and returns a structured JSON object.
|
||||
The top-level HTTP status code reflects the overall application health (200 OK or 503 Service Unavailable).
|
||||
This endpoint runs all readiness checks and returns a structured JSON object
|
||||
containing the status of each individual component.
|
||||
The top-level HTTP status code reflects the overall application health (200 OK for 'pass', 503 for 'fail').
|
||||
"""
|
||||
# Retrieve the comprehensive health status model
|
||||
detailed_health = await get_detailed_health(settings)
|
||||
|
||||
if detailed_health.status != "pass":
|
||||
if detailed_health.status != HealthStatusEnum.passed:
|
||||
# Align the HTTP status code with the overall health status for easy monitoring
|
||||
raise HTTPException(status_code=503, detail="not ready")
|
||||
|
||||
# Status code is 200
|
||||
return detailed_health
|
||||
|
||||
Reference in New Issue
Block a user