Nginx → 日志文件 → Promtail/Logstash → Loki/Prometheus → Grafana
↓
Nginx Exporter → Prometheus → Grafana
1. 安装Nginx Prometheus Exporter
# 方法1:直接下载
wget https://github.com/nginxinc/nginx-prometheus-exporter/releases/download/v1.0.0/nginx-prometheus-exporter_1.0.0_linux_amd64.tar.gz
tar -xzf nginx-prometheus-exporter_*.tar.gz
# 方法2:Docker部署
docker run -d -p 9113:9113 nginx/nginx-prometheus-exporter:latest \
-nginx.scrape-uri http://nginx:8080/stub_status
2. 配置Nginx启用状态模块
在nginx.conf中添加:
server {
listen 8080;
server_name localhost;
location /stub_status {
stub_status on;
access_log off;
allow 127.0.0.1;
deny all;
}
}
3. Prometheus配置
# prometheus.yml
scrape_configs:
- job_name: 'nginx'
static_configs:
- targets: ['nginx-exporter:9113']
scrape_interval: 15s
4. Grafana仪表板
导入官方仪表板 ID:12708 或自定义:
{
"panels": [
{
"title": "Nginx请求率",
"targets": [{
"expr": "rate(nginx_http_requests_total[5m])",
"legendFormat": "{{host}}"
}]
},
{
"title": "活跃连接数",
"targets": [{
"expr": "nginx_connections_active",
"legendFormat": "活跃连接"
}]
}
]
}
1. 安装Loki和Promtail
# docker-compose.yml
version: '3'
services:
loki:
image: grafana/loki:latest
ports:
- "3100:3100"
promtail:
image: grafana/promtail:latest
volumes:
- /var/log/nginx:/var/log/nginx
- ./promtail-config.yml:/etc/promtail/config.yml
2. Promtail配置
# promtail-config.yml
server:
http_listen_port: 9080
grpc_listen_port: 0
positions:
filename: /tmp/positions.yaml
clients:
- url: http://loki:3100/loki/api/v1/push
scrape_configs:
- job_name: nginx
static_configs:
- targets:
- localhost
labels:
job: nginx
__path__: /var/log/nginx/*.log
pipeline_stages:
- regex:
expression: '(?P<ip>\S+)\s+\S+\s+\S+\s+\[(?P<timestamp>[^\]]+)\]\s+"(?P<method>\S+)\s+(?P<path>\S+)\s+\S+"\s+(?P<status>\d+)\s+(?P<size>\d+)\s+"(?P<referer>[^"]*)"\s+"(?P<agent>[^"]*)"'
- labels:
status:
method:
path:
- timestamp:
format: '02/Jan/2006:15:04:05 -0700'
source: timestamp
3. Nginx日志格式优化
http {
log_format main '$remote_addr - $remote_user [$time_local] '
'"$request" $status $body_bytes_sent '
'"$http_referer" "$http_user_agent" '
'rt=$request_time uct="$upstream_connect_time" '
'uht="$upstream_header_time" urt="$upstream_response_time"';
access_log /var/log/nginx/access.log main;
}
# 在Grafana中添加数据源
# 1. Prometheus数据源:http://prometheus:9090
# 2. Loki数据源:http://loki:3100
{
"title": "Nginx综合监控",
"panels": [
{
"title": "QPS & 响应时间",
"type": "stat",
"targets": [{
"expr": "rate(nginx_http_requests_total[1m])",
"format": "time_series"
}]
},
{
"title": "HTTP状态码分布",
"type": "piechart",
"targets": [{
"expr": "sum(rate(nginx_http_requests_total[5m])) by (status)"
}]
},
{
"title": "访问日志实时流",
"type": "logs",
"datasource": "Loki",
"targets": [{
"expr": "{job=\"nginx\"} |= ``",
"refId": "A"
}]
},
{
"title": "错误请求统计",
"type": "table",
"datasource": "Loki",
"targets": [{
"expr": "sum(count_over_time({job=\"nginx\"} | json | status >= 400 [5m])) by (status, path)"
}]
}
]
}
Prometheus指标:
# 请求率
rate(nginx_http_requests_total[5m])
# 错误率
sum(rate(nginx_http_requests_total{status=~"5.."}[5m]))
/
sum(rate(nginx_http_requests_total[5m]))
# 上游响应时间
nginx_upstream_response_time_seconds
# 连接数
nginx_connections_active
nginx_connections_reading
nginx_connections_writing
Loki查询:
# 按状态码统计
sum by(status) (count_over_time({job="nginx"} [5m]))
# 查找特定错误
{job="nginx"} | json | status=500
# 慢查询分析
{job="nginx"} | json | request_time > 1
# 热门端点
topk(10, sum by(path) (count_over_time({job="nginx"} [1h])))
# alert_rules.yml
groups:
- name: nginx_alerts
rules:
- alert: NginxHighErrorRate
expr: |
sum(rate(nginx_http_requests_total{status=~"5.."}[5m]))
/
sum(rate(nginx_http_requests_total[5m])) > 0.05
for: 2m
labels:
severity: warning
annotations:
summary: "Nginx错误率过高"
- alert: NginxDown
expr: up{job="nginx"} == 0
for: 1m
labels:
severity: critical
在仪表板面板上直接设置:
在Nginx配置中添加自定义日志变量:
map $status $is_error {
~^[45] 1;
default 0;
}
log_format custom '$remote_addr - $remote_user [$time_local] '
'"$request" $status $body_bytes_sent '
'error=$is_error request_time=$request_time';
# 日志缓存
access_log /var/log/nginx/access.log main buffer=32k flush=1m;
# 日志轮转
# /etc/logrotate.d/nginx
/var/log/nginx/*.log {
daily
rotate 30
compress
delaycompress
missingok
notifempty
create 644 nginx adm
postrotate
/usr/bin/systemctl reload nginx
endscript
}
生产环境推荐:
安全建议:
location /stub_status {
stub_status on;
allow 172.16.0.0/12; # 内网IP
deny all;
auth_basic "Restricted";
auth_basic_user_file /etc/nginx/.htpasswd;
}
这个方案提供了从基础监控到日志分析的全套解决方案,可以根据实际需求选择部分或全部组件部署。