libreoffice-online/kubernetes/helm/collabora-online/templates/prometheus-rules.yaml
genofire cc8a0c7ea3 fix: improve prometheus-rules
Signed-off-by: genofire <geno+dev@fireorbit.de>
Change-Id: I5076c74c3beeada0978d5b29dcc4e531cfce9241
2023-11-20 12:40:30 +05:30

162 lines
6.5 KiB
YAML

{{- if and ( .Values.prometheus.rules.enabled ) ( .Capabilities.APIVersions.Has "monitoring.coreos.com/v1" ) }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ include "collabora-online.fullname" . }}
{{- with .Values.prometheus.rules.namespace }}
namespace: {{ . | quote }}
{{- end }}
labels:
{{- include "collabora-online.labels" . | nindent 4 }}
{{- toYaml .Values.prometheus.rules.additionalLabels | nindent 4 }}
spec:
groups:
{{- if .Values.prometheus.rules.defaults.enabled }}
- name: {{ template "collabora-online.name" . }}-Defaults
rules:
- alert: "Collabora NoProcess"
expr: 'coolwsd_count < 1'
for: "1m"
labels:
severity: "critical"
{{`
annotations:
summary: "no coolwsd process running: in namespace {{ $labels.namespace }} on pod {{ $labels.pod }}."
`}}
{{- range $key, $value := .Values.prometheus.rules.defaults.docs.pod }}
- alert: "Collabora Open Docs by Pod"
expr: 'kit_assigned_count > {{ $value }}'
for: "1m"
labels:
severity: "{{ $key }}"
{{`
annotations:
summary: "too many docs ({{ $value }}) are open in namespace {{ $labels.namespace }} on pod {{ $labels.pod }}."
`}}
{{- end }}
{{- range $key, $value := .Values.prometheus.rules.defaults.docs.sum }}
- alert: "Collabora Open Docs by Namespace"
expr: 'sum(kit_assigned_count) without (instance, pod) > {{ $value }}'
for: "1m"
labels:
severity: "{{ $key }}"
{{`
annotations:
summary: "too many docs ({{ $value }}) are open in namespace {{ $labels.namespace }}."
`}}
{{- end }}
{{- range $key, $value := .Values.prometheus.rules.defaults.viewers.pod }}
- alert: "Collabora Viewers by Pod"
expr: 'document_active_views_active_count_total > {{ $value }}'
for: "1m"
labels:
severity: "{{ $key }}"
{{`
annotations:
summary: "too many viewers ({{ $value }}) in namespace {{ $labels.namespace }} on pod {{ $labels.pod }}."
`}}
{{- end }}
{{- range $key, $value := .Values.prometheus.rules.defaults.viewers.doc }}
- alert: "Collabora Viewers by Document"
expr: 'doc_views_active > {{ $value }}'
for: "1m"
labels:
severity: "{{ $key }}"
{{`
annotations:
summary: "too many viewers ({{ $value }}) on document {{ $labels.key }} in namespace {{ $labels.namespace }}."
`}}
{{- end }}
{{- range $key, $value := .Values.prometheus.rules.defaults.viewers.sum }}
- alert: "Collabora Viewers by Namespace"
expr: 'sum(document_active_views_active_count_total) without (instance, pod) > {{ $value }}'
for: "1m"
labels:
severity: "{{ $key }}"
{{`
annotations:
summary: "too many viewers ({{ $value }}) in namespace {{ $labels.namespace }}."
`}}
{{- end }}
- alert: "Collabora DocumentsOpenSimultaneously"
expr: 'count(count (doc_info) by (key, namespace) > 1) by (namespace) / count (doc_info) by (namespace) * 100 > {{ .Values.prometheus.rules.defaults.docs.duplicated }}'
labels:
severity: "critical"
{{`
annotations:
summary: '{{ printf "%.0f" $value }}% of all documents are opened simultaneously on different pods in namespace {{ $labels.namespace }}. Viewers can not see each others.'
`}}
- alert: "Collabora DocumentsOpenSimultaneously"
expr: 'count(doc_info) by (key, namespace) > 1'
labels:
severity: "warning"
{{`
annotations:
summary: "the document {{ $labels.key }} is opened simultaneously on different pods in namespace {{ $labels.namespace }}. Viewers can not see each others."
`}}
- alert: "Collabora Error StorageSpaceLow"
expr: 'increase(error_storage_space_low[1m]) > 0'
labels:
severity: "warning"
{{`
annotations:
summary: "local storage space too low to operate in namespace {{ $labels.namespace }} on pod {{ $labels.pod }}."
`}}
{{- range $key, $value := .Values.prometheus.rules.defaults.errorStorageConnections }}
- alert: "Collabora Error StorageConnection"
expr: 'increase(error_storage_connection[1m]) > {{ $value }}'
labels:
severity: "{{ $key }}"
{{`
annotations:
summary: "unable to connect to storage in namespace {{ $labels.namespace }} on pod {{ $labels.pod }}."
`}}
{{- end }}
- alert: "Collabora Error BadRequest"
expr: 'increase(error_bad_request[1m]) > 0'
labels:
severity: "warning"
{{`
annotations:
summary: "we returned an HTTP bad request to a caller in namespace {{ $labels.namespace }} on pod {{ $labels.pod }}."
`}}
- alert: "Collabora Error BadArgument"
expr: 'increase(error_bad_argument[1m]) > 0'
labels:
severity: "warning"
{{`
annotations:
summary: "we returned an HTTP bad argument to a caller in namespace {{ $labels.namespace }} on pod {{ $labels.pod }}."
`}}
- alert: "Collabora Error UnauthorizedRequest"
expr: 'increase(error_unauthorized_request[1m]) > 0'
labels:
severity: "warning"
{{`
annotations:
summary: "an authorization exception usually on CheckFileInfo in namespace {{ $labels.namespace }} on pod {{ $labels.pod }}."
`}}
{{- range $key, $value := .Values.prometheus.rules.defaults.errorServiceUnavailable }}
- alert: "Collabora Error ServiceUnavailable"
expr: 'increase(error_service_unavailable[1m]) > {{ $value }}'
labels:
severity: "{{ $key }}"
{{`
annotations:
summary: "internal error, service is unavailable in namespace {{ $labels.namespace }} on pod {{ $labels.pod }}."
`}}
{{- end }}
- alert: "Collabora Error ParseError"
expr: 'increase(error_parse_error[1m]) > 0'
labels:
severity: "warning"
{{`
annotations:
summary: "badly formed data provided for us to parse in namespace {{ $labels.namespace }} on pod {{ $labels.pod }}."
`}}
{{- end }}
{{- if .Values.prometheus.rules.additionalRules }}
- name: {{ template "collabora-online.name" . }}-Additional
rules: {{- toYaml .Values.prometheus.rules.additionalRules | nindent 4 }}
{{- end }}
{{- end }}