Grafana Dashboards
Building comprehensive Grafana dashboards for monitoring Excalibur applications with Prometheus metrics.
Before You Start
- .NET 8.0+ (or .NET 9/10 for latest features)
- A Grafana instance with Prometheus data source
- Familiarity with metrics reference and production observability
Overview
Grafana provides powerful visualization and alerting for:
- Application Metrics: Request rates, error rates, latency
- Business Metrics: Commands processed, events published, aggregates loaded
- Infrastructure Metrics: CPU, memory, database connections
- Custom Metrics: Domain-specific KPIs
Prerequisites
Install Prometheus
# docker-compose.yml
version: '3.8'
services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus-data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
grafana:
image: grafana/grafana:latest
container_name: grafana
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
- GF_USERS_ALLOW_SIGN_UP=false
volumes:
- grafana-data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning
depends_on:
- prometheus
volumes:
prometheus-data:
grafana-data:
Prometheus Configuration
# prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'dispatch-api'
static_configs:
- targets: ['host.docker.internal:8080']
labels:
app: 'dispatch-api'
environment: 'production'
- job_name: 'dispatch-worker'
static_configs:
- targets: ['host.docker.internal:8081']
labels:
app: 'dispatch-worker'
environment: 'production'
Add Prometheus Metrics to ASP.NET Core
Install Package
dotnet add package prometheus-net.AspNetCore
Configure Metrics
// Program.cs
using Prometheus;
var builder = WebApplication.CreateBuilder(args);
// Add services
builder.Services.AddControllers();
var app = builder.Build();
// Enable Prometheus metrics
app.UseMetricServer(); // /metrics endpoint
app.UseHttpMetrics(); // HTTP request metrics
app.MapControllers();
app.Run();
Metrics endpoint: http://localhost:8080/metrics
Custom Dispatch Metrics
Command Processing Metrics
using Prometheus;
public class MetricsMiddleware : IDispatchMiddleware
{
private static readonly Counter MessagesProcessed = Metrics
.CreateCounter(
"dispatch_messages_total",
"Total messages processed",
new CounterConfiguration
{
LabelNames = new[] { "message_type", "status" }
});
private static readonly Histogram MessageDuration = Metrics
.CreateHistogram(
"dispatch_message_duration_seconds",
"Message processing duration in seconds",
new HistogramConfiguration
{
LabelNames = new[] { "message_type" },
Buckets = Histogram.ExponentialBuckets(0.001, 2, 10)
});
private static readonly Gauge ActiveMessages = Metrics
.CreateGauge(
"dispatch_messages_active",
"Number of messages currently being processed",
new GaugeConfiguration
{
LabelNames = new[] { "message_type" }
});
public DispatchMiddlewareStage? Stage => DispatchMiddlewareStage.Processing;
public async ValueTask<IMessageResult> InvokeAsync(
IDispatchMessage message,
IMessageContext context,
DispatchRequestDelegate nextDelegate,
CancellationToken cancellationToken)
{
var messageType = message.GetType().Name;
using (MessageDuration.WithLabels(messageType).NewTimer())
using (ActiveMessages.WithLabels(messageType).TrackInProgress())
{
try
{
var result = await nextDelegate(message, context, cancellationToken);
MessagesProcessed
.WithLabels(messageType, result.Succeeded ? "success" : "failure")
.Inc();
return result;
}
catch
{
MessagesProcessed
.WithLabels(messageType, "failure")
.Inc();
throw;
}
}
}
}
// Register middleware
builder.Services.AddDispatch(dispatch =>
{
dispatch.UseMiddleware<MetricsMiddleware>();
});
Event Store Metrics
public class MetricsEventStore : IEventStore
{
private readonly IEventStore _inner;
private static readonly Counter EventsAppended = Metrics
.CreateCounter(
"eventstore_events_appended_total",
"Total events appended to event store",
new CounterConfiguration
{
LabelNames = new[] { "aggregate_type", "event_type" }
});
private static readonly Histogram AppendDuration = Metrics
.CreateHistogram(
"eventstore_append_duration_seconds",
"Duration of event store append operations",
new HistogramConfiguration
{
LabelNames = new[] { "aggregate_type" },
Buckets = Histogram.ExponentialBuckets(0.001, 2, 10)
});
private static readonly Counter ConcurrencyConflicts = Metrics
.CreateCounter(
"eventstore_concurrency_conflicts_total",
"Total concurrency conflicts encountered",
new CounterConfiguration
{
LabelNames = new[] { "aggregate_type" }
});
public MetricsEventStore(IEventStore inner)
{
_inner = inner;
}
public async ValueTask<AppendResult> AppendAsync(
string aggregateId,
string aggregateType,
IEnumerable<IDomainEvent> events,
long expectedVersion,
CancellationToken cancellationToken)
{
using (AppendDuration.WithLabels(aggregateType).NewTimer())
{
try
{
var result = await _inner.AppendAsync(
aggregateId,
aggregateType,
events,
expectedVersion,
cancellationToken);
foreach (var evt in events)
{
EventsAppended
.WithLabels(aggregateType, evt.EventType)
.Inc();
}
return result;
}
catch (ConcurrencyException)
{
ConcurrencyConflicts
.WithLabels(aggregateType)
.Inc();
throw;
}
}
}
// Implement other IEventStore methods (LoadAsync, etc.)...
}
// Register decorator
builder.Services.Decorate<IEventStore, MetricsEventStore>();
Outbox Metrics
public class MetricsOutboxPublisher : IOutboxPublisher
{
private readonly IOutboxPublisher _inner;
private static readonly Counter MessagesPublished = Metrics
.CreateCounter(
"outbox_messages_published_total",
"Total messages published from outbox",
new CounterConfiguration
{
LabelNames = new[] { "message_type", "status" }
});
private static readonly Histogram PublishDuration = Metrics
.CreateHistogram(
"outbox_publish_duration_seconds",
"Duration of outbox publish operations");
private static readonly Gauge PendingMessages = Metrics
.CreateGauge(
"outbox_pending_messages",
"Number of messages pending in outbox");
public MetricsOutboxPublisher(IOutboxPublisher inner)
{
_inner = inner;
}
public async Task PublishAsync(
OutboxMessage message,
CancellationToken cancellationToken)
{
using (PublishDuration.NewTimer())
{
try
{
await _inner.PublishAsync(message, cancellationToken);
MessagesPublished
.WithLabels(message.MessageType, "success")
.Inc();
}
catch
{
MessagesPublished
.WithLabels(message.MessageType, "failure")
.Inc();
throw;
}
}
}
}
Grafana Dashboard JSON
Command Processing Dashboard
Save as grafana/dashboards/dispatch-commands.json:
{
"dashboard": {
"title": "Excalibur - Command Processing",
"tags": ["dispatch", "commands"],
"timezone": "browser",
"panels": [
{
"id": 1,
"title": "Command Rate",
"type": "graph",
"gridPos": { "x": 0, "y": 0, "w": 12, "h": 8 },
"targets": [
{
"expr": "sum(rate(dispatch_commands_total[5m])) by (command_type)",
"legendFormat": "{{command_type}}"
}
],
"yaxes": [
{ "label": "Commands/sec", "format": "short" }
]
},
{
"id": 2,
"title": "Command Success Rate",
"type": "graph",
"gridPos": { "x": 12, "y": 0, "w": 12, "h": 8 },
"targets": [
{
"expr": "sum(rate(dispatch_commands_total{status=\"success\"}[5m])) / sum(rate(dispatch_commands_total[5m]))",
"legendFormat": "Success Rate"
}
],
"yaxes": [
{ "label": "Success Rate", "format": "percentunit" }
]
},
{
"id": 3,
"title": "Command Duration (p50, p95, p99)",
"type": "graph",
"gridPos": { "x": 0, "y": 8, "w": 12, "h": 8 },
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(dispatch_command_duration_seconds_bucket[5m])) by (le, command_type))",
"legendFormat": "p50 - {{command_type}}"
},
{
"expr": "histogram_quantile(0.95, sum(rate(dispatch_command_duration_seconds_bucket[5m])) by (le, command_type))",
"legendFormat": "p95 - {{command_type}}"
},
{
"expr": "histogram_quantile(0.99, sum(rate(dispatch_command_duration_seconds_bucket[5m])) by (le, command_type))",
"legendFormat": "p99 - {{command_type}}"
}
],
"yaxes": [
{ "label": "Duration", "format": "s" }
]
},
{
"id": 4,
"title": "Active Commands",
"type": "graph",
"gridPos": { "x": 12, "y": 8, "w": 12, "h": 8 },
"targets": [
{
"expr": "sum(dispatch_commands_active) by (command_type)",
"legendFormat": "{{command_type}}"
}
],
"yaxes": [
{ "label": "Active", "format": "short" }
]
}
]
}
}
Event Store Dashboard
Save as grafana/dashboards/dispatch-eventstore.json:
{
"dashboard": {
"title": "Excalibur - Event Store",
"tags": ["dispatch", "eventstore"],
"timezone": "browser",
"panels": [
{
"id": 1,
"title": "Events Appended Rate",
"type": "graph",
"gridPos": { "x": 0, "y": 0, "w": 12, "h": 8 },
"targets": [
{
"expr": "sum(rate(eventstore_events_appended_total[5m])) by (aggregate_type)",
"legendFormat": "{{aggregate_type}}"
}
],
"yaxes": [
{ "label": "Events/sec", "format": "short" }
]
},
{
"id": 2,
"title": "Append Duration",
"type": "graph",
"gridPos": { "x": 12, "y": 0, "w": 12, "h": 8 },
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(eventstore_append_duration_seconds_bucket[5m])) by (le, aggregate_type))",
"legendFormat": "p95 - {{aggregate_type}}"
}
],
"yaxes": [
{ "label": "Duration", "format": "s" }
]
},
{
"id": 3,
"title": "Concurrency Conflicts",
"type": "graph",
"gridPos": { "x": 0, "y": 8, "w": 12, "h": 8 },
"targets": [
{
"expr": "sum(rate(eventstore_concurrency_conflicts_total[5m])) by (aggregate_type)",
"legendFormat": "{{aggregate_type}}"
}
],
"yaxes": [
{ "label": "Conflicts/sec", "format": "short" }
]
},
{
"id": 4,
"title": "Event Types Distribution",
"type": "piechart",
"gridPos": { "x": 12, "y": 8, "w": 12, "h": 8 },
"targets": [
{
"expr": "sum(increase(eventstore_events_appended_total[1h])) by (event_type)",
"legendFormat": "{{event_type}}"
}
]
}
]
}
}
Outbox Dashboard
{
"dashboard": {
"title": "Excalibur - Outbox",
"tags": ["dispatch", "outbox"],
"timezone": "browser",
"panels": [
{
"id": 1,
"title": "Outbox Publish Rate",
"type": "graph",
"gridPos": { "x": 0, "y": 0, "w": 12, "h": 8 },
"targets": [
{
"expr": "sum(rate(outbox_messages_published_total{status=\"success\"}[5m]))",
"legendFormat": "Success"
},
{
"expr": "sum(rate(outbox_messages_published_total{status=\"failure\"}[5m]))",
"legendFormat": "Failure"
}
],
"yaxes": [
{ "label": "Messages/sec", "format": "short" }
]
},
{
"id": 2,
"title": "Pending Messages",
"type": "gauge",
"gridPos": { "x": 12, "y": 0, "w": 12, "h": 8 },
"targets": [
{
"expr": "outbox_pending_messages"
}
],
"options": {
"orientation": "auto",
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{ "value": 0, "color": "green" },
{ "value": 100, "color": "yellow" },
{ "value": 500, "color": "red" }
]
}
}
}
},
{
"id": 3,
"title": "Publish Duration",
"type": "graph",
"gridPos": { "x": 0, "y": 8, "w": 24, "h": 8 },
"targets": [
{
"expr": "histogram_quantile(0.50, rate(outbox_publish_duration_seconds_bucket[5m]))",
"legendFormat": "p50"
},
{
"expr": "histogram_quantile(0.95, rate(outbox_publish_duration_seconds_bucket[5m]))",
"legendFormat": "p95"
},
{
"expr": "histogram_quantile(0.99, rate(outbox_publish_duration_seconds_bucket[5m]))",
"legendFormat": "p99"
}
],
"yaxes": [
{ "label": "Duration", "format": "s" }
]
}
]
}
}
Provisioning Dashboards
Dashboard Provisioning
# grafana/provisioning/dashboards/dashboard.yml
apiVersion: 1
providers:
- name: 'Dispatch Dashboards'
orgId: 1
folder: 'Excalibur.Dispatch'
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards
Datasource Provisioning
# grafana/provisioning/datasources/prometheus.yml
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: true
Application Insights Integration
For Azure-hosted applications, integrate Application Insights with Grafana:
Install Plugin
grafana-cli plugins install grafana-azure-monitor-datasource
Configure Datasource
# grafana/provisioning/datasources/azure-monitor.yml
apiVersion: 1
datasources:
- name: Azure Monitor
type: grafana-azure-monitor-datasource
access: proxy
jsonData:
subscriptionId: <your-subscription-id>
tenantId: <your-tenant-id>
clientId: <your-client-id>
cloudName: azuremonitor
secureJsonData:
clientSecret: <your-client-secret>
Query Application Insights
customMetrics
| where name == "dispatch_commands_total"
| summarize sum(value) by bin(timestamp, 5m), tostring(customDimensions.command_type)
CloudWatch Integration
For AWS-hosted applications:
Install Plugin
grafana-cli plugins install cloudwatch
Configure Datasource
# grafana/provisioning/datasources/cloudwatch.yml
apiVersion: 1
datasources:
- name: CloudWatch
type: cloudwatch
jsonData:
authType: keys
defaultRegion: us-east-1
secureJsonData:
accessKey: <your-access-key>
secretKey: <your-secret-key>
Alerting
Configure Alert Rules
# grafana/provisioning/alerting/alerts.yml
apiVersion: 1
groups:
- name: dispatch-alerts
interval: 1m
rules:
- alert: HighCommandFailureRate
expr: |
sum(rate(dispatch_commands_total{status="failure"}[5m]))
/
sum(rate(dispatch_commands_total[5m]))
> 0.05
for: 5m
labels:
severity: warning
annotations:
summary: "High command failure rate"
description: "Command failure rate is {{ $value | humanizePercentage }}"
- alert: SlowCommandProcessing
expr: |
histogram_quantile(0.95,
sum(rate(dispatch_command_duration_seconds_bucket[5m]))
by (le, command_type)
) > 5
for: 5m
labels:
severity: warning
annotations:
summary: "Slow command processing"
description: "p95 latency for {{ $labels.command_type }} is {{ $value }}s"
- alert: HighConcurrencyConflicts
expr: |
sum(rate(eventstore_concurrency_conflicts_total[5m]))
by (aggregate_type)
> 0.1
for: 5m
labels:
severity: warning
annotations:
summary: "High concurrency conflicts"
description: "{{ $labels.aggregate_type }} has {{ $value }} conflicts/sec"
- alert: OutboxBacklog
expr: outbox_pending_messages > 1000
for: 10m
labels:
severity: critical
annotations:
summary: "Outbox backlog"
description: "{{ $value }} messages pending in outbox"
Notification Channels
# grafana/provisioning/notifiers/slack.yml
apiVersion: 1
notifiers:
- name: Slack Alerts
type: slack
uid: slack-alerts
org_id: 1
is_default: true
send_reminder: false
settings:
url: https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK
recipient: '#alerts'
username: Grafana
Custom Variables
Add dashboard variables for filtering:
{
"templating": {
"list": [
{
"name": "environment",
"type": "query",
"query": "label_values(dispatch_commands_total, environment)",
"current": {
"text": "production",
"value": "production"
}
},
{
"name": "command_type",
"type": "query",
"query": "label_values(dispatch_commands_total{environment=\"$environment\"}, command_type)",
"multi": true,
"includeAll": true
}
]
}
}
Usage in queries:
sum(rate(dispatch_commands_total{environment="$environment", command_type=~"$command_type"}[5m]))
Kubernetes Dashboards
Monitor Kubernetes-deployed Dispatch apps:
{
"dashboard": {
"title": "Excalibur - Kubernetes",
"panels": [
{
"title": "Pod CPU Usage",
"targets": [
{
"expr": "sum(rate(container_cpu_usage_seconds_total{pod=~\"dispatch-.*\"}[5m])) by (pod)"
}
]
},
{
"title": "Pod Memory Usage",
"targets": [
{
"expr": "sum(container_memory_working_set_bytes{pod=~\"dispatch-.*\"}) by (pod)"
}
]
},
{
"title": "Pod Restarts",
"targets": [
{
"expr": "sum(kube_pod_container_status_restarts_total{pod=~\"dispatch-.*\"}) by (pod)"
}
]
}
]
}
}
Business Metrics Dashboard
Track domain-specific KPIs:
// Custom business metrics
public class OrderMetrics
{
private static readonly Counter OrdersCreated = Metrics
.CreateCounter(
"orders_created_total",
"Total orders created");
private static readonly Counter OrdersCancelled = Metrics
.CreateCounter(
"orders_cancelled_total",
"Total orders cancelled");
private static readonly Histogram OrderValue = Metrics
.CreateHistogram(
"order_value_dollars",
"Order value in dollars",
new HistogramConfiguration
{
Buckets = new[] { 10, 50, 100, 250, 500, 1000, 5000 }
});
public static void RecordOrderCreated(decimal value)
{
OrdersCreated.Inc();
OrderValue.Observe((double)value);
}
public static void RecordOrderCancelled()
{
OrdersCancelled.Inc();
}
}
Dashboard:
{
"dashboard": {
"title": "Business Metrics - Orders",
"panels": [
{
"title": "Orders Created (Hourly)",
"targets": [
{
"expr": "sum(increase(orders_created_total[1h]))"
}
]
},
{
"title": "Cancellation Rate",
"targets": [
{
"expr": "sum(rate(orders_cancelled_total[5m])) / sum(rate(orders_created_total[5m]))"
}
]
},
{
"title": "Order Value Distribution",
"targets": [
{
"expr": "histogram_quantile(0.50, rate(order_value_dollars_bucket[1h]))",
"legendFormat": "Median"
},
{
"expr": "histogram_quantile(0.95, rate(order_value_dollars_bucket[1h]))",
"legendFormat": "p95"
}
]
}
]
}
}
Best Practices
1. Use Consistent Label Names
// GOOD - Consistent labeling
private static readonly Counter CommandsProcessed = Metrics
.CreateCounter(
"dispatch_commands_total",
"Total commands processed",
new CounterConfiguration
{
LabelNames = new[] { "command_type", "status", "environment" }
});
// BAD - Inconsistent labeling
private static readonly Counter CommandsProcessed = Metrics
.CreateCounter(
"commands_count", // Inconsistent naming
"Commands",
new CounterConfiguration
{
LabelNames = new[] { "type", "result" } // Different label names
});
2. Choose Appropriate Metric Types
- Counter: Monotonically increasing values (requests, errors)
- Gauge: Values that can go up or down (active connections, queue depth)
- Histogram: Distributions (latency, request size)
- Summary: Like histogram, but calculated client-side
3. Use Histogram Buckets Wisely
// Exponential buckets for latency
Buckets = Histogram.ExponentialBuckets(0.001, 2, 10)
// [0.001, 0.002, 0.004, 0.008, 0.016, 0.032, 0.064, 0.128, 0.256, 0.512]
// Linear buckets for sizes
Buckets = Histogram.LinearBuckets(0, 100, 10)
// [0, 100, 200, 300, 400, 500, 600, 700, 800, 900]
4. Limit Cardinality
// BAD - High cardinality (user_id can have millions of values)
CommandsProcessed.WithLabels(commandType, userId).Inc();
// GOOD - Low cardinality
CommandsProcessed.WithLabels(commandType, "success").Inc();
5. Dashboard Organization
- Overview Dashboard: High-level health metrics
- Component Dashboards: Dispatcher, Event Store, Outbox
- Business Dashboards: Domain-specific KPIs
- Infrastructure Dashboards: CPU, memory, network
Testing Dashboards Locally
# Start Prometheus + Grafana
docker-compose up -d
# Run your application
dotnet run
# Generate test traffic
curl http://localhost:8080/api/orders -X POST -d '{"productId":"123"}'
# View metrics
curl http://localhost:8080/metrics
# Access Grafana
open http://localhost:3000
# Login: admin / admin
Next Steps
- Add Prometheus metrics to your Dispatch application
- Deploy Prometheus and Grafana (Docker Compose or Kubernetes)
- Import dashboard JSON files
- Configure alerting rules
- Set up notification channels (Slack, PagerDuty, email)
- Create custom business metrics dashboards
- Integrate with other monitoring platforms:
See Also
- Observability Overview - Monitor Dispatch applications with OpenTelemetry, health checks, and integrations
- Metrics Reference - Complete catalog of 100+ available metrics
- Datadog Integration - Datadog APM and custom metrics integration