| | | 1 | | using System.Text.Json; |
| | | 2 | | using Serilog.Events; |
| | | 3 | | |
| | | 4 | | namespace Kestrun.Health; |
| | | 5 | | |
| | | 6 | | /// <summary> |
| | | 7 | | /// A health probe that performs an HTTP GET request to a specified URL and interprets the JSON |
| | | 8 | | /// response according to the health probe contract. |
| | | 9 | | /// </summary> |
| | | 10 | | /// <remarks> |
| | | 11 | | /// Initializes a new instance of the <see cref="HttpProbe"/> class. |
| | | 12 | | /// </remarks> |
| | | 13 | | /// <param name="name">The name of the probe.</param> |
| | | 14 | | /// <param name="tags">The tags associated with the probe.</param> |
| | | 15 | | /// <param name="http">The HTTP client to use.</param> |
| | | 16 | | /// <param name="url">The URL to probe.</param> |
| | | 17 | | /// <param name="timeout">The timeout for the probe.</param> |
| | | 18 | | /// <param name="logger">Optional logger; if null a contextual logger is created.</param> |
| | 5 | 19 | | public sealed class HttpProbe(string name, string[] tags, HttpClient http, string url, TimeSpan? timeout = null, Serilog |
| | | 20 | | { |
| | | 21 | | /// <summary> |
| | | 22 | | /// The name of the probe. |
| | | 23 | | /// </summary> |
| | 20 | 24 | | public string Name { get; } = name; |
| | | 25 | | /// <summary> |
| | | 26 | | /// The tags associated with the probe. |
| | | 27 | | /// </summary> |
| | 5 | 28 | | public string[] Tags { get; } = tags; |
| | | 29 | | /// <summary> |
| | | 30 | | /// Logger used for diagnostics. |
| | | 31 | | /// </summary> |
| | 34 | 32 | | public Serilog.ILogger Logger { get; init; } = logger ?? Serilog.Log.ForContext("HealthProbe", name); |
| | | 33 | | /// <summary> |
| | | 34 | | /// The HTTP client to use. |
| | | 35 | | /// </summary> |
| | 5 | 36 | | private readonly HttpClient _http = http; |
| | 5 | 37 | | private readonly string _url = url; |
| | 5 | 38 | | private readonly TimeSpan _timeout = timeout ?? TimeSpan.FromSeconds(5); |
| | | 39 | | |
| | | 40 | | /// <summary> |
| | | 41 | | /// Executes the HTTP GET request and interprets the response according to the health probe contract. |
| | | 42 | | /// </summary> |
| | | 43 | | /// <param name="ct">The cancellation token.</param> |
| | | 44 | | /// <returns>A task representing the asynchronous operation, with a <see cref="ProbeResult"/> as the result.</return |
| | | 45 | | public async Task<ProbeResult> CheckAsync(CancellationToken ct = default) |
| | | 46 | | { |
| | 5 | 47 | | using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct); |
| | 5 | 48 | | cts.CancelAfter(_timeout); |
| | | 49 | | |
| | | 50 | | try |
| | | 51 | | { |
| | 5 | 52 | | var (response, body) = await ExecuteHttpRequestAsync(cts.Token).ConfigureAwait(false); |
| | 3 | 53 | | return ParseHealthResponse(response, body); |
| | | 54 | | } |
| | 1 | 55 | | catch (OperationCanceledException) when (ct.IsCancellationRequested) |
| | | 56 | | { |
| | | 57 | | // Upstream/request cancellation -> propagate so the runner can handle overall request abort semantics. |
| | 0 | 58 | | throw; |
| | | 59 | | } |
| | 1 | 60 | | catch (TaskCanceledException) when (cts.Token.IsCancellationRequested) // timeout from our internal cts |
| | | 61 | | { |
| | 1 | 62 | | return HandleTimeout(); |
| | | 63 | | } |
| | 0 | 64 | | catch (OperationCanceledException) when (cts.Token.IsCancellationRequested) // internal timeout (already handled |
| | | 65 | | { |
| | 0 | 66 | | return HandleTimeout(); |
| | | 67 | | } |
| | 1 | 68 | | catch (Exception ex) |
| | | 69 | | { |
| | 1 | 70 | | Logger.Error(ex, "HttpProbe {Probe} failed", Name); |
| | 1 | 71 | | return new ProbeResult(ProbeStatus.Unhealthy, $"Exception: {ex.Message}"); |
| | | 72 | | } |
| | 5 | 73 | | } |
| | | 74 | | |
| | | 75 | | /// <summary> |
| | | 76 | | /// Executes the HTTP GET request and returns the response and body. |
| | | 77 | | /// </summary> |
| | | 78 | | /// <param name="cancellationToken">The cancellation token.</param> |
| | | 79 | | /// <returns>A tuple containing the HTTP response and response body.</returns> |
| | | 80 | | private async Task<(HttpResponseMessage Response, string? Body)> ExecuteHttpRequestAsync(CancellationToken cancellat |
| | | 81 | | { |
| | 5 | 82 | | if (Logger.IsEnabled(LogEventLevel.Debug)) |
| | | 83 | | { |
| | 5 | 84 | | Logger.Debug("HttpProbe {Probe} sending GET {Url} (timeout={Timeout})", Name, _url, _timeout); |
| | | 85 | | } |
| | | 86 | | |
| | 5 | 87 | | var response = await _http.GetAsync(_url, cancellationToken).ConfigureAwait(false); |
| | 3 | 88 | | var body = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false); |
| | | 89 | | |
| | 3 | 90 | | if (Logger.IsEnabled(LogEventLevel.Debug)) |
| | | 91 | | { |
| | 3 | 92 | | Logger.Debug("HttpProbe {Probe} received {StatusCode} length={Length}", Name, (int)response.StatusCode, body |
| | | 93 | | } |
| | | 94 | | |
| | 3 | 95 | | return (response, body); |
| | 3 | 96 | | } |
| | | 97 | | |
| | | 98 | | /// <summary> |
| | | 99 | | /// Parses the HTTP response and determines the health status. |
| | | 100 | | /// </summary> |
| | | 101 | | /// <param name="response">The HTTP response.</param> |
| | | 102 | | /// <param name="body">The response body.</param> |
| | | 103 | | /// <returns>The probe result.</returns> |
| | | 104 | | private ProbeResult ParseHealthResponse(HttpResponseMessage response, string? body) |
| | | 105 | | { |
| | 3 | 106 | | var contractResult = TryParseHealthContract(body); |
| | 3 | 107 | | if (contractResult != null) |
| | | 108 | | { |
| | 1 | 109 | | if (Logger.IsEnabled(LogEventLevel.Debug)) |
| | | 110 | | { |
| | 1 | 111 | | Logger.Debug("HttpProbe {Probe} parsed contract status={Status}", Name, contractResult.Status); |
| | | 112 | | } |
| | 1 | 113 | | return contractResult; |
| | | 114 | | } |
| | | 115 | | |
| | 2 | 116 | | return HandleNonContractResponse(response); |
| | | 117 | | } |
| | | 118 | | |
| | | 119 | | /// <summary> |
| | | 120 | | /// Attempts to parse the response body as a health contract JSON. |
| | | 121 | | /// </summary> |
| | | 122 | | /// <param name="body">The response body.</param> |
| | | 123 | | /// <returns>The probe result if parsing succeeds, null otherwise.</returns> |
| | | 124 | | private ProbeResult? TryParseHealthContract(string? body) |
| | | 125 | | { |
| | | 126 | | try |
| | | 127 | | { |
| | 3 | 128 | | var doc = JsonDocument.Parse(body ?? string.Empty); |
| | 1 | 129 | | var statusStr = doc.RootElement.GetProperty("status").GetString(); |
| | 1 | 130 | | var status = statusStr?.ToLowerInvariant() switch |
| | 1 | 131 | | { |
| | 1 | 132 | | ProbeStatusLabels.STATUS_HEALTHY => ProbeStatus.Healthy, |
| | 0 | 133 | | ProbeStatusLabels.STATUS_DEGRADED => ProbeStatus.Degraded, |
| | 0 | 134 | | ProbeStatusLabels.STATUS_UNHEALTHY => ProbeStatus.Unhealthy, |
| | 0 | 135 | | _ => ProbeStatus.Unhealthy |
| | 1 | 136 | | }; |
| | 1 | 137 | | var desc = doc.RootElement.TryGetProperty("description", out var d) ? d.GetString() : null; |
| | 1 | 138 | | return new ProbeResult(status, desc, null); |
| | | 139 | | } |
| | 2 | 140 | | catch |
| | | 141 | | { |
| | 2 | 142 | | if (Logger.IsEnabled(LogEventLevel.Debug)) |
| | | 143 | | { |
| | 2 | 144 | | Logger.Debug("HttpProbe {Probe} response body is not valid contract JSON", Name); |
| | | 145 | | } |
| | 2 | 146 | | return null; |
| | | 147 | | } |
| | 3 | 148 | | } |
| | | 149 | | |
| | | 150 | | /// <summary> |
| | | 151 | | /// Handles responses that don't conform to the health contract. |
| | | 152 | | /// </summary> |
| | | 153 | | /// <param name="response">The HTTP response.</param> |
| | | 154 | | /// <returns>The probe result.</returns> |
| | | 155 | | private ProbeResult HandleNonContractResponse(HttpResponseMessage response) |
| | | 156 | | { |
| | 2 | 157 | | var result = response.IsSuccessStatusCode |
| | 2 | 158 | | ? new ProbeResult(ProbeStatus.Degraded, "No contract JSON") |
| | 2 | 159 | | : new ProbeResult(ProbeStatus.Unhealthy, $"HTTP {(int)response.StatusCode}"); |
| | | 160 | | |
| | 2 | 161 | | if (Logger.IsEnabled(LogEventLevel.Debug)) |
| | | 162 | | { |
| | 2 | 163 | | Logger.Debug("HttpProbe {Probe} non-contract response mapped to {Status}", Name, result.Status); |
| | | 164 | | } |
| | | 165 | | |
| | 2 | 166 | | return result; |
| | | 167 | | } |
| | | 168 | | |
| | | 169 | | /// <summary> |
| | | 170 | | /// Handles timeout scenarios. |
| | | 171 | | /// </summary> |
| | | 172 | | /// <returns>The probe result for timeout.</returns> |
| | | 173 | | private ProbeResult HandleTimeout() |
| | | 174 | | { |
| | 1 | 175 | | if (Logger.IsEnabled(LogEventLevel.Debug)) |
| | | 176 | | { |
| | 1 | 177 | | Logger.Debug("HttpProbe {Probe} timed out after {Timeout}", Name, _timeout); |
| | | 178 | | } |
| | 1 | 179 | | return new ProbeResult(ProbeStatus.Degraded, $"Timeout after {_timeout}"); |
| | | 180 | | } |
| | | 181 | | } |