4 # Copyright (c) 2013-2021 Wind River Systems, Inc.
6 # SPDX-License-Identifier: Apache-2.0
9 ############################################################################
11 # ALARM & CUSTOMER LOG DOCUMENTATION
13 ############################################################################
15 ############################################################################
17 # Record Format ... for documentation
20 # Type: < Alarm | Log >
21 # Description: < yaml string >
23 # [ < yaml string >, // list of yaml strings
26 # critical: < yaml string > // i.e. dictionary of yaml strings indexed by severity
27 # major: < yaml string >
28 # minor: < yaml string >
29 # warning: < yaml string >
30 # Entity_Instance_ID: < yaml string ... e.g. host=<hostname>.interface=<ifname> >
32 # [ < yaml string >, // list of yaml strings
34 # Severity: < critical | major | minor | warning >
36 # [ critical, major ] // list of severity values
37 # Proposed_Repair_Action: < yaml string > // NOTE ALARM ONLY FIELD
39 # critical: < yaml string > // i.e. dictionary of yaml strings indexed by severity
40 # major: < yaml string >
41 # minor: < yaml string >
42 # warning: < yaml string >
43 # Maintenance_Action: < yaml string > // NOTE ALARM ONLY FIELD
45 # critical: < yaml string > // i.e. dictionary of yaml strings indexed by severity
46 # major: < yaml string >
47 # minor: < yaml string >
48 # warning: < yaml string >
49 # Inhibit_Alarms: < True | False > // NOTE ALARM ONLY FIELD
50 # Alarm_Type: < operational-violation | ... >
51 # Probable_Cause: < timing-problem | ... >
53 # [ < timing-problem | ... >, // list of probable-causes
54 # < timing-problem | ... > ]
55 # Service_Affecting: < True | False >
56 # Suppression: < True | False > // NOTE ALARM ONLY FIELD
57 # Management_Affecting_Severity: < none | critical | major | minor | warning >
58 # // lowest alarm level of this type that will block forced upgrades & orchestration actions
59 # Degrade_Affecting_Severity: < none | critical | major | minor >
60 # // lowest alarm level of this type sets a host to 'degraded'
64 # - use general record format above
65 # - the only dictionaries allowed are ones indexed by severity
66 # - if there are multiple lists in a record,
67 # then they should all have the same # of items and corresponding list items represent instance of alarm
68 # - if you can't describe the alarm/log based on the above rules,
69 # then you can use a multi-line string format
70 # - DELETING alarms from events.yaml: alarms should only be deleted when going to a new Titanium Cloud release
71 # - if all possible alarm severities are mgmt affecting, the convention is to
72 # use 'warning' as the Management_Affecting_Severity, even if warning is not a possible severity for that alarm
75 # - Testing of events.yaml can be done by running regular make command
76 # and specifying fm-doc:
77 # nice -n 20 ionice -c Idle make -C build fm-doc.rebuild
78 # - When building, events.yaml will be parsed for correct format, and also
79 # to ensure that Alarm IDs defined in constants.py and fmAlarm.h are
80 # listed in events.yaml
82 ############################################################################
85 #---------------------------------------------------------------------------
86 # Monitored Resource Alarms
87 #---------------------------------------------------------------------------
93 Platform CPU threshold exceeded; threshold x%, actual y% .
96 Entity_Instance_ID: host=<hostname>
97 Severity: [critical, major]
98 Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
103 Alarm_Type: operational-violation
104 Probable_Cause: threshold-crossed
105 Service_Affecting: False
107 Management_Affecting_Severity: major
108 Degrade_Affecting_Severity: critical
113 VSwitch CPU threshold exceeded; threshold x%, actual y% .
117 Entity_Instance_ID: host=<hostname>
118 Severity: [critical, major, minor]
119 Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
124 Alarm_Type: operational-violation
125 Probable_Cause: threshold-crossed
126 Service_Affecting: False
128 Management_Affecting_Severity: none
129 Degrade_Affecting_Severity: none
134 Memory threshold exceeded; threshold x%, actual y% .
137 Entity_Instance_ID: |-
140 host=<hostname>.memory=total
142 host=<hostname>.memory=platform
144 host=<hostname>.numa=node<number>
145 Severity: [critical, major]
146 Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support; may require additional memory on Host."
151 Alarm_Type: operational-violation
152 Probable_Cause: threshold-crossed
153 Service_Affecting: False
155 Management_Affecting_Severity: none
156 Degrade_Affecting_Severity: critical
158 100.104: # NOTE This should really be split into two different Alarms.
161 host=<hostname>.filesystem=<mount-dir>
162 File System threshold exceeded; threshold x%, actual y% .
166 host=<hostname>.volumegroup=<volumegroup-name>
167 Monitor and if condition persists, consider adding additional physical volumes to the volume group.
168 Entity_Instance_ID: |-
169 host=<hostname>.filesystem=<mount-dir>
171 host=<hostname>.volumegroup=<volumegroup-name>
172 Severity: [critical, major]
173 Proposed_Repair_Action: "Reduce usage or resize filesystem."
178 Alarm_Type: operational-violation
179 Probable_Cause: threshold-crossed
180 Service_Affecting: False
182 Management_Affecting_Severity: critical
183 Degrade_Affecting_Severity: critical
188 Filesystem Alarm Condition:
189 <fs_name> filesystem is not added on both controllers and/or does not have the same size: <hostname>.
190 Entity_Instance_ID: fs_name=<image-conversion>
192 Proposed_Repair_Action: "Add image-conversion filesystem on both controllers.
193 Consult the System Administration Manual for more details.
194 If problem persists, contact next level of support."
195 Maintenance_Action: degrade
197 Alarm_Type: equipment
198 Probable_Cause: configuration-or-customization-error
199 Service_Affecting: True
201 Management_Affecting_Severity: major
202 Degrade_Affecting_Severity: none
205 # 100.105: Retired (with R2 release): previously monitored /etc/nova/instances
206 # NFS mount from controller to computes
211 Description: "'OAM' Port failed."
212 Entity_Instance_ID: host=<hostname>.port=<port-name>
214 Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
215 Maintenance_Action: degrade
217 Alarm_Type: operational-violation
218 Probable_Cause: unknown
219 Service_Affecting: True
221 Management_Affecting_Severity: warning
222 Degrade_Affecting_Severity: major
227 'OAM' Interface degraded.
229 'OAM' Interface failed.
230 Entity_Instance_ID: host=<hostname>.interface=<if-name>
231 Severity: [critical, major]
232 Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
237 Alarm_Type: operational-violation
238 Probable_Cause: unknown
239 Service_Affecting: True
241 Management_Affecting_Severity: warning
242 Degrade_Affecting_Severity: major
246 Description: "'MGMT' Port failed."
247 Entity_Instance_ID: host=<hostname>.port=<port-name>
249 Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
250 Maintenance_Action: degrade
252 Alarm_Type: operational-violation
253 Probable_Cause: unknown
254 Service_Affecting: True
256 Management_Affecting_Severity: warning
257 Degrade_Affecting_Severity: major
262 'MGMT' Interface degraded.
264 'MGMT' Interface failed.
265 Entity_Instance_ID: host=<hostname>.interface=<if-name>
266 Severity: [critical, major]
267 Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
272 Alarm_Type: operational-violation
273 Probable_Cause: unknown
274 Service_Affecting: True
276 Management_Affecting_Severity: warning
277 Degrade_Affecting_Severity: major
281 Description: "'CLUSTER-HOST' Port failed."
282 Entity_Instance_ID: host=<hostname>.port=<port-name>
284 Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
285 Maintenance_Action: degrade
287 Alarm_Type: operational-violation
288 Probable_Cause: unknown
289 Service_Affecting: True
291 Management_Affecting_Severity: warning
292 Degrade_Affecting_Severity: major
297 'CLUSTER-HOST' Interface degraded.
299 'CLUSTER-HOST' Interface failed.
300 Entity_Instance_ID: host=<hostname>.interface=<if-name>
301 Severity: [critical, major]
302 Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
307 Alarm_Type: operational-violation
308 Probable_Cause: unknown
309 Service_Affecting: True
311 Management_Affecting_Severity: warning
312 Degrade_Affecting_Severity: major
316 Description: "'DATA-VRS' Port down."
317 Entity_Instance_ID: host=<hostname>.port=<port-name>
319 Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
320 Maintenance_Action: degrade
322 Alarm_Type: operational-violation
323 Probable_Cause: unknown
324 Service_Affecting: True
326 Management_Affecting_Severity: none
327 Degrade_Affecting_Severity: major
332 'DATA-VRS' Interface degraded.
334 'DATA-VRS' Interface down.
335 Entity_Instance_ID: host=<hostname>.interface=<if-name>
336 Severity: [critical, major]
337 Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
341 Alarm_Type: operational-violation
342 Probable_Cause: unknown
343 Service_Affecting: True
345 Management_Affecting_Severity: none
346 Degrade_Affecting_Severity: major
351 major: "NTP configuration does not contain any valid or reachable NTP servers."
352 minor: "NTP address <IP address> is not a valid or a reachable NTP server."
354 major: host=<hostname>.ntp
355 minor: host=<hostname>.ntp=<IP address>
356 Severity: [major, minor]
357 Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
358 Maintenance_Action: none
360 Alarm_Type: communication
361 Probable_Cause: unknown
362 Service_Affecting: False
364 Management_Affecting_Severity: none
365 Degrade_Affecting_Severity: none
369 Description: "VSwitch Memory Usage, processor <processor> threshold exceeded; threshold x%, actual y% ."
370 Entity_Instance_ID: host=<hostname>.processor=<processor>
371 Severity: [critical, major, minor]
372 Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
377 Alarm_Type: operational-violation
378 Probable_Cause: threshold-crossed
379 Service_Affecting: False
381 Management_Affecting_Severity: none
382 Degrade_Affecting_Severity: critical
386 Description: "Cinder LVM Thinpool Usage threshold exceeded; threshold x%, actual y% ."
387 Entity_Instance_ID: host=<hostname>
388 Severity: [critical, major, minor]
389 Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
394 Alarm_Type: operational-violation
395 Probable_Cause: threshold-crossed
396 Service_Affecting: False
398 Management_Affecting_Severity: none
399 Degrade_Affecting_Severity: critical
403 Description: "Nova LVM Thinpool Usage threshold exceeded; threshold x%, actual y% ."
404 Entity_Instance_ID: host=<hostname>
405 Severity: [critical, major, minor]
406 Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
411 Alarm_Type: operational-violation
412 Probable_Cause: threshold-crossed
413 Service_Affecting: False
415 Management_Affecting_Severity: major
416 Degrade_Affecting_Severity: critical
420 Description: Controller cannot establish connection with remote logging server.
421 Entity_Instance_ID: host=<hostname>
423 Proposed_Repair_Action: "Ensure Remote Log Server IP is reachable from Controller through OAM interface; otherwise contact next level of support."
424 Maintenance_Action: none
425 Inhibit_Alarms: False
426 Alarm_Type: communication
427 Probable_Cause: communication-subsystem-failure
428 Service_Affecting: False
430 Management_Affecting_Severity: none
431 Degrade_Affecting_Severity: none
436 <hostname> does not support the provisioned PTP mode
438 <hostname> PTP clocking is out-of-tolerance
440 <hostname> is not locked to remote PTP Grand Master
442 <hostname> GNSS signal loss state:<state>
444 <hostname> 1PPS signal loss state:<state>
445 Entity_Instance_ID: |-
448 host=<hostname>.ptp=no-lock
450 host=<hostname>.ptp=<interface>.unsupported=hardware-timestamping
452 host=<hostname>.ptp=<interface>.unsupported=software-timestamping
454 host=<hostname>.ptp=<interface>.unsupported=legacy-timestamping
456 host=<hostname>.ptp=out-of-tolerance
458 host=<hostname>.instance=<instance>.ptp=out-of-tolerance
460 host=<hostname>.interface=<interface>.ptp=signal-loss
461 Severity: [major, minor]
462 Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
463 Maintenance_Action: none
465 Alarm_Type: communication
466 Probable_Cause: unknown
467 Service_Affecting: False
469 Management_Affecting_Severity: none
470 Degrade_Affecting_Severity: none
475 critical: "service open file descriptor has reached its limit"
476 major: "service open file descriptor is approaching to its limit"
477 Entity_Instance_ID: |-
478 host=<hostname>.resource_type=file-descriptor.service_name=<service-name>
479 Severity: [critical, major]
480 Proposed_Repair_Action: "swact to the other controller if it is available"
481 Maintenance_Action: none
483 Alarm_Type: operational-violation
484 Probable_Cause: threshold-crossed
485 Service_Affecting: True
487 Management_Affecting_Severity: critical
488 Degrade_Affecting_Severity: critical
490 #---------------------------------------------------------------------------
492 #---------------------------------------------------------------------------
497 Description: <hostname> was administratively locked to take it out-of-service.
498 Entity_Instance_ID: host=<hostname>
500 Proposed_Repair_Action: Administratively unlock Host to bring it back in-service.
501 Maintenance_Action: none
503 Alarm_Type: operational-violation
504 Probable_Cause: out-of-service
505 Service_Affecting: True
507 Management_Affecting_Severity: warning
508 Degrade_Affecting_Severity: none
513 <hostname> experienced a service-affecting failure.
514 Host is being auto recovered by Reboot.
515 Entity_Instance_ID: host=<hostname>
517 Proposed_Repair_Action: If auto-recovery is consistently unable to recover host to the unlocked-enabled state contact next level of support or lock and replace failing host.
518 Maintenance_Action: auto recover
519 Inhibit_Alarms: False
520 Alarm_Type: operational-violation
521 Probable_Cause: application-subsystem-failure
522 Service_Affecting: True
524 Management_Affecting_Severity: warning
525 Degrade_Affecting_Severity: none
529 Description: <hostname> experienced a configuration failure during initialization. Host is being re-configured by Reboot.
530 Entity_Instance_ID: host=<hostname>
532 Proposed_Repair_Action: If auto-recovery is consistently unable to recover host to the unlocked-enabled state contact next level of support or lock and replace failing host.
533 Maintenance_Action: auto-recover
534 Inhibit_Alarms: False
535 Alarm_Type: operational-violation
536 Probable_Cause: configuration-or-customization-error
537 Service_Affecting: True
539 Management_Affecting_Severity: warning
540 Degrade_Affecting_Severity: none
544 Description: <hostname> access to board management module has failed.
545 Entity_Instance_ID: host=<hostname>
547 Proposed_Repair_Action: Check Host's board management configuration and connectivity.
548 Maintenance_Action: auto recover
549 Inhibit_Alarms: False
550 Alarm_Type: operational-violation
551 Probable_Cause: communication-subsystem-failure
552 Service_Affecting: False
554 Management_Affecting_Severity: none
555 Degrade_Affecting_Severity: none
559 Description: <hostname> controller function has in-service failure while compute services remain healthy.
560 Entity_Instance_ID: host=<hostname>
562 Proposed_Repair_Action: Lock and then Unlock host to recover. Avoid using 'Force Lock' action as that will impact compute services running on this host. If lock action fails then contact next level of support to investigate and recover.
563 Maintenance_Action: "degrade - requires manual action"
564 Inhibit_Alarms: False
565 Alarm_Type: operational-violation
566 Probable_Cause: communication-subsystem-failure
567 Service_Affecting: True
569 Management_Affecting_Severity: warning
570 Degrade_Affecting_Severity: major
574 Description: <hostname> compute service of the only available controller is not poperational. Auto-recovery is disabled. Deggrading host instead.
575 Entity_Instance_ID: host=<hostname>
577 Proposed_Repair_Action: Enable second controller and Switch Activity (Swact) over to it as soon as possible. Then Lock and Unlock host to recover its local compute service.
578 Maintenance_Action: "degrade - requires manual action"
579 Inhibit_Alarms: False
580 Alarm_Type: operational-violation
581 Probable_Cause: communication-subsystem-failure
582 Service_Affecting: True
584 Management_Affecting_Severity: warning
585 Degrade_Affecting_Severity: major
591 <hostname> is experiencing an intermittent 'Management Network' communication failures that have exceeded its lower alarming threshold.
594 <hostname> is experiencing a persistent critical 'Management Network' communication failure."
595 Entity_Instance_ID: host=<hostname>
596 Severity: [critical, major]
597 Proposed_Repair_Action: "Check 'Management Network' connectivity and support for multicast messaging. If problem consistently occurs after that and Host is reset, then contact next level of support or lock and replace failing host."
598 Maintenance_Action: auto recover
599 Inhibit_Alarms: False
600 Alarm_Type: communication
601 Probable_Cause: unknown
602 Service_Affecting: True
604 Management_Affecting_Severity: warning
605 Degrade_Affecting_Severity: none
611 <hostname> is experiencing an intermittent 'Cluster-host Network' communication failures that have exceeded its lower alarming threshold.
614 <hostname> is experiencing a persistent critical 'Cluster-host Network' communication failure."
615 Entity_Instance_ID: host=<hostname>
616 Severity: [critical, major]
617 Proposed_Repair_Action: "Check 'Cluster-host Network' connectivity and support for multicast messaging. If problem consistently occurs after that and Host is reset, then contact next level of support or lock and replace failing host."
618 Maintenance_Action: auto recover
619 Inhibit_Alarms: False
620 Alarm_Type: communication
621 Probable_Cause: unknown
622 Service_Affecting: True
624 Management_Affecting_Severity: warning
625 Degrade_Affecting_Severity: none
631 Main Process Monitor Daemon Failure (major):
632 <hostname> 'Process Monitor' (pmond) process is not running or functioning properly. The system is trying to recover this process.
634 Monitored Process Failure (critical/major/minor):
635 Critical: <hostname> critical '<processname>' process has failed and could not be auto-recovered gracefully.
636 Auto-recovery progression by host reboot is required and in progress.
637 Major: <hostname> is degraded due to the failure of its '<processname>' process. Auto recovery of this major process is in progress.
638 Minor: <hostname> '<processname>' process has failed. Auto recovery of this minor process is in progress.
640 <hostname> '<processname>' process has failed. Manual recovery is required.
641 Entity_Instance_ID: host=<hostname>.process=<processname>
642 Severity: [critical, major, minor]
643 Proposed_Repair_Action: |-
644 If this alarm does not automatically clear after some time and continues to be asserted after Host is locked and unlocked then contact next level of support for root cause analysis and recovery.
646 If problem consistently occurs after Host is locked and unlocked then contact next level of support for root cause analysis and recovery."
648 critical: auto-recover
651 Inhibit_Alarms: False
652 Alarm_Type: operational-violation
653 Probable_Cause: unknown
659 Management_Affecting_Severity: warning
660 Degrade_Affecting_Severity: major
662 # 200.006: // NOTE using duplicate ID of a completely analogous Alarm for this
665 # Main Process Monitor Daemon Failure (major)
666 # <hostname> 'Process Monitor' (pmond) process is not running or functioning properly.
667 # The system is trying to recover this process.
669 # Monitored Process Failure (critical/major/minor)
670 # critical: <hostname> critical '<processname>' process has failed and could not be auto-recovered gracefully.
671 # Auto-recovery progression by host reboot is required and in progress.
672 # major: <hostname> is degraded due to the failure of its '<processname>' process. Auto recovery of this major process is in progress.
673 # minor: <hostname> '<processname>' process has failed. Auto recovery of this minor process is in progress.
675 # <hostname> '<processname>' process has failed. Manual recovery is required.
676 # Entity_Instance_ID: host=<hostname>.process=<process-name>
679 # Probable_Cause: unspecified-reason
680 # Service_Affecting: True
686 critical: "Host is degraded due to a 'critical' out-of-tolerance reading from the '<sensorname>' sensor"
687 major: "Host is degraded due to a 'major' out-of-tolerance reading from the '<sensorname>' sensor"
688 minor: "Host is reporting a 'minor' out-of-tolerance reading from the '<sensorname>' sensor"
689 Entity_Instance_ID: host=<hostname>.sensor=<sensorname>
690 Severity: [critical, major, minor]
691 Proposed_Repair_Action: "If problem consistently occurs after Host is power cycled and or reset, contact next level of support or lock and replace failing host."
695 minor: auto-recover (polling)
697 Alarm_Type: operational-violation
698 Probable_Cause: unspecified-reason
704 Management_Affecting_Severity: none
705 Degrade_Affecting_Severity: critical
709 Description: "The Hardware Monitor was unable to load, configure and monitor one or more hardware sensors."
710 Entity_Instance_ID: host=<hostname>
712 Proposed_Repair_Action: Check Board Management Controller provisioning. Try reprovisioning the BMC. If problem persists try power cycling the host and then the entire server including the BMC power. If problem persists then contact next level of support.
713 Maintenance_Action: None
714 Inhibit_Alarms: False
715 Alarm_Type: operational-violation
716 Probable_Cause: unknown
717 Service_Affecting: False
719 Management_Affecting_Severity: none
720 Degrade_Affecting_Severity: none
724 Description: Unable to read one or more sensor groups from this host's board management controller
725 Entity_Instance_ID: host=<hostname>
727 Proposed_Repair_Action: Check board management connectivity and try rebooting the board management controller. If problem persists contact next level of support or lock and replace failing host.
728 Maintenance_Action: None
729 Inhibit_Alarms: False
730 Alarm_Type: operational-violation
731 Probable_Cause: unknown
732 Service_Affecting: False
734 Management_Affecting_Severity: none
735 Degrade_Affecting_Severity: none
740 Description: ["<hostname> has been 'discovered' on the network",
741 "<hostname> has been 'added' to the system",
742 "<hostname> has 'entered' multi-node failure avoidance",
743 "<hostname> has 'exited' multi-node failure avoidance"]
744 Entity_Instance_ID: [host=<hostname>.event=discovered,
745 host=<hostname>.event=add,
746 host=<hostname>.event=mnfa_enter,
747 host=<hostname>.event=mnfa_exit]
750 Probable_Cause: unspecified-reason
751 Service_Affecting: True
756 Description: ["<hostname> board management controller has been 'provisioned'",
757 "<hostname> board management controller has been 're-provisioned'",
758 "<hostname> board management controller has been 'de-provisioned'",
759 "<hostname> manual 'unlock' request",
760 "<hostname> manual 'reboot' request",
761 "<hostname> manual 'reset' request",
762 "<hostname> manual 'power-off' request",
763 "<hostname> manual 'power-on' request",
764 "<hostname> manual 'reinstall' request",
765 "<hostname> manual 'force-lock' request",
766 "<hostname> manual 'delete' request",
767 "<hostname> manual 'controller switchover' request"]
768 Entity_Instance_ID: [host=<hostname>.command=provision,
769 host=<hostname>.command=reprovision,
770 host=<hostname>.command=deprovision,
771 host=<hostname>.command=unlock,
772 host=<hostname>.command=reboot,
773 host=<hostname>.command=reset,
774 host=<hostname>.command=power-off,
775 host=<hostname>.command=power-on,
776 host=<hostname>.command=reinstall,
777 host=<hostname>.command=force-lock,
778 host=<hostname>.command=delete,
779 host=<hostname>.command=swact]
782 Probable_Cause: unspecified-reason
783 Service_Affecting: False
788 Description: ["<hostname> is now 'disabled'",
789 "<hostname> is now 'enabled'",
790 "<hostname> is now 'online'",
791 "<hostname> is now 'offline'",
792 "<hostname> is 'disabled-failed' to the system",
793 "<hostname> reinstall failed",
794 "<hostname> reinstall completed successfully"]
795 Entity_Instance_ID: [host=<hostname>.state=disabled,
796 host=<hostname>.state=enabled,
797 host=<hostname>.status=online,
798 host=<hostname>.status=offline,
799 host=<hostname>.status=failed,
800 host=<hostname>.status=reinstall-failed,
801 host=<hostname>.status=reinstall-complete]
804 Probable_Cause: unspecified-reason
805 Service_Affecting: True
808 #---------------------------------------------------------------------------
810 #---------------------------------------------------------------------------
814 Description: System Backup in progress.
815 Entity_Instance_ID: host=controller
817 Proposed_Repair_Action: No action required.
820 Alarm_Type: operational-violation
821 Probable_Cause: unspecified-reason
822 Service_Affecting: False
824 Management_Affecting_Severity: warning
825 Degrade_Affecting_Severity: none
828 #---------------------------------------------------------------------------
829 # SYSTEM CONFIGURATION
830 #---------------------------------------------------------------------------
834 Description: <hostname> Configuration is out-of-date.
835 Entity_Instance_ID: host=<hostname>
837 Proposed_Repair_Action: Administratively lock and unlock <hostname> to update config.
840 Alarm_Type: operational-violation
841 Probable_Cause: unspecified-reason
842 Service_Affecting: True
844 Management_Affecting_Severity: warning
845 Degrade_Affecting_Severity: none
849 Description: <hostname> Ceph cache tiering configuration is out-of-date.
850 Entity_Instance_ID: cluster=<dist-fs-uuid>
852 Proposed_Repair_Action: Apply Ceph service parameter settings.
855 Alarm_Type: operational-violation
856 Probable_Cause: unspecified-reason
857 Service_Affecting: False
859 Management_Affecting_Severity: warning
860 Degrade_Affecting_Severity: none
864 Description: "Kubernetes certificates rotation failed on host[, reason = <reason_text>]"
865 Entity_Instance_ID: host=<hostname>
867 Proposed_Repair_Action: Lock and unlock the host to update services with new certificates (Manually renew kubernetes certificates first if renewal failed).
870 Alarm_Type: operational-violation
871 Probable_Cause: unspecified-reason
872 Service_Affecting: False
874 Management_Affecting_Severity: warning
875 Degrade_Affecting_Severity: none
877 #---------------------------------------------------------------------------
878 # Deployment Manager Monitor
879 #---------------------------------------------------------------------------
882 Description: "Deployment Manager resource not reconciled: <name>"
883 Entity_Instance_ID: resource=<crd-resource>,name=<resource-name>
885 Proposed_Repair_Action: Monitor and if condition persists, validate deployment configuration.
888 Alarm_Type: operational-violation
889 Probable_Cause: configuration-out-of-date
890 Service_Affecting: True
892 Management_Affecting_Severity: warning
893 Degrade_Affecting_Severity: none
895 #---------------------------------------------------------------------------
896 # VM Compute Services
897 #---------------------------------------------------------------------------
900 Description: "Host <host_name> compute services failure[, reason = <reason_text>]"
901 Entity_Instance_ID: host=<host_name>.services=compute
903 Proposed_Repair_Action: Wait for host services recovery to complete; if problem persists contact next level of support
906 Alarm_Type: processing-error
907 Probable_Cause: unspecified-reason
908 Service_Affecting: True
910 Management_Affecting_Severity: warning
911 Degrade_Affecting_Severity: none
915 Description: "Host <host_name> compute services failure[, reason = <reason_text>]"
916 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
918 Alarm_Type: equipment
919 Probable_Cause: unspecified-reason
920 Service_Affecting: False
924 Description: Host <host_name> compute services enabled
925 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
927 Alarm_Type: equipment
928 Probable_Cause: unspecified-reason
929 Service_Affecting: False
933 Description: Host <host_name> compute services disabled
934 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
936 Alarm_Type: equipment
937 Probable_Cause: unspecified-reason
938 Service_Affecting: False
943 Description: Host <host_name> hypervisor is now <administrative_state>-<operational_state>
944 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
946 Alarm_Type: equipment
947 Probable_Cause: unspecified-reason
948 Service_Affecting: False
951 #---------------------------------------------------------------------------
953 #---------------------------------------------------------------------------
957 Description: <subcloud> is offline
958 Entity_Instance_ID: subcloud=<subcloud>
960 Proposed_Repair_Action: Wait for subcloud to become online; if problem persists contact next level of support
963 Alarm_Type: communication
964 Probable_Cause: loss-of-signal
965 Service_Affecting: False
967 Management_Affecting_Severity: none
968 Degrade_Affecting_Severity: none
972 Description: <subcloud> <resource> sync_status is out-of-sync
973 Entity_Instance_ID: [subcloud=<subcloud>.resource=<compute | network | platform | volumev2>]
975 Proposed_Repair_Action: If problem persists contact next level of support
979 Probable_Cause: application-subsystem-failure
980 Service_Affecting: False
982 Management_Affecting_Severity: none
983 Degrade_Affecting_Severity: none
987 Description: Subcloud Backup Failure
988 Entity_Instance_ID: subcloud=<subcloud>
990 Proposed_Repair_Action: Retry subcloud backup after checking backup input file. If problem persists contact next level of support.
993 Alarm_Type: processing-error
994 Probable_Cause: unknown
995 Service_Affecting: False
997 Management_Affecting_Severity: none
998 Degrade_Affecting_Severity: none
1000 #---------------------------------------------------------------------------
1002 #---------------------------------------------------------------------------
1006 Description: "'Data' Port failed."
1007 Entity_Instance_ID: host=<hostname>.port=<port-uuid>
1009 Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1012 Alarm_Type: equipment
1013 Probable_Cause: loss-of-signal
1014 Service_Affecting: True
1016 Management_Affecting_Severity: warning
1017 Degrade_Affecting_Severity: none
1023 'Data' Interface degraded.
1025 'Data' Interface failed.
1026 Entity_Instance_ID: host=<hostname>.interface=<if-uuid>
1027 Severity: [critical, major]
1028 Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1031 Alarm_Type: equipment
1032 Probable_Cause: loss-of-signal
1033 Service_Affecting: True
1035 Management_Affecting_Severity: warning
1036 Degrade_Affecting_Severity: critical
1041 Description: Networking Agent not responding.
1042 Entity_Instance_ID: host=<hostname>.agent=<agent-uuid>
1044 Proposed_Repair_Action: "If condition persists, attempt to clear issue by administratively locking and unlocking the Host."
1047 Alarm_Type: operational-violation
1048 Probable_Cause: underlying-resource-unavailable
1049 Service_Affecting: True
1051 Management_Affecting_Severity: warning
1052 Degrade_Affecting_Severity: none
1057 Description: No enabled compute host with connectivity to provider network.
1058 Entity_Instance_ID: service=networking.providernet=<pnet-uuid>
1060 Proposed_Repair_Action: Enable compute hosts with required provider network connectivity.
1063 Alarm_Type: operational-violation
1064 Probable_Cause: underlying-resource-unavailable
1065 Service_Affecting: True
1067 Management_Affecting_Severity: warning
1068 Degrade_Affecting_Severity: none
1074 Communication failure detected over provider network x% for ranges y% on host z%.
1076 Communication failure detected over provider network x% on host z%.
1077 Entity_Instance_ID: host=<hostname>.service=networking.providernet=<pnet-uuid>
1079 Proposed_Repair_Action: Check neighbour switch port VLAN assignments.
1082 Alarm_Type: operational-violation
1083 Probable_Cause: underlying-resource-unavailable
1084 Service_Affecting: True
1086 Management_Affecting_Severity: warning
1087 Degrade_Affecting_Severity: none
1093 ML2 Driver Agent non-reachable
1095 ML2 Driver Agent reachable but non-responsive
1097 ML2 Driver Agent authentication failure
1099 ML2 Driver Agent is unable to sync Neutron database
1100 Entity_Instance_ID: host=<hostname>.ml2driver=<driver>
1102 Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
1105 Alarm_Type: processing-error
1106 Probable_Cause: underlying-resource-unavailable
1107 Service_Affecting: True
1109 Management_Affecting_Severity: warning
1110 Degrade_Affecting_Severity: none
1115 Description: "Openflow Controller connection failed."
1116 Entity_Instance_ID: host=<hostname>.openflow-controller=<uri>
1118 Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1121 Alarm_Type: equipment
1122 Probable_Cause: loss-of-signal
1123 Service_Affecting: True
1125 Management_Affecting_Severity: warning
1126 Degrade_Affecting_Severity: critical
1132 No active Openflow controller connections found for this network.
1134 One or more Openflow controller connections in disconnected state for this network.
1135 Entity_Instance_ID: host=<hostname>.openflow-network=<name>
1136 Severity: [critical, major]
1137 Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1140 Alarm_Type: equipment
1141 Probable_Cause: loss-of-signal
1142 Service_Affecting: True
1144 Management_Affecting_Severity: warning
1145 Degrade_Affecting_Severity: critical
1150 Description: "OVSDB Manager connection failed."
1151 Entity_Instance_ID: host=<hostname>.sdn-controller=<uuid>
1153 Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1156 Alarm_Type: equipment
1157 Probable_Cause: loss-of-signal
1158 Service_Affecting: True
1160 Management_Affecting_Severity: warning
1161 Degrade_Affecting_Severity: critical
1166 Description: "No active OVSDB connections found."
1167 Entity_Instance_ID: host=<hostname>
1169 Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1172 Alarm_Type: equipment
1173 Probable_Cause: loss-of-signal
1174 Service_Affecting: True
1176 Management_Affecting_Severity: warning
1177 Degrade_Affecting_Severity: critical
1181 Description: "Dynamic routing agent x% lost connectivity to peer y%."
1182 Entity_Instance_ID: host=<hostname>,agent=<agent-uuid>,bgp-peer=<bgp-peer>
1184 Proposed_Repair_Action: If condition persists, fix connectivity to peer.
1187 Alarm_Type: operational-violation
1188 Probable_Cause: loss-of-signal
1189 Service_Affecting: True
1191 Management_Affecting_Severity: warning
1192 Degrade_Affecting_Severity: none
1195 #---------------------------------------------------------------------------
1197 #---------------------------------------------------------------------------
1202 Service group failure; <list of affected services>.
1204 Service group degraded; <list of affected services>.
1206 Service group warning; <list of affected services>.
1207 Entity_Instance_ID: service_domain=<domain_name>.service_group=<group_name>.host=<hostname>
1208 Severity: [critical, major, minor]
1209 Proposed_Repair_Action: Contact next level of support.
1211 Inhibit_Alarms: False
1212 Alarm_Type: processing-error
1213 Probable_Cause: underlying-resource-unavailable
1214 Service_Affecting: True
1216 Management_Affecting_Severity: warning
1217 Degrade_Affecting_Severity: major
1223 Service group loss of redundancy; expected <num> standby member<s> but only <num> standby member<s> available.
1225 Service group loss of redundancy; expected <num> standby member<s> but only <num> standby member<s> available.
1227 Service group loss of redundancy; expected <num> active member<s> but no active members available.
1229 Service group loss of redundancy; expected <num> active member<s> but only <num> active member<s> available.
1230 Entity_Instance_ID: service_domain=<domain_name>.service_group=<group_name>
1232 Proposed_Repair_Action: "Bring a controller node back in to service, otherwise contact next level of support."
1234 Inhibit_Alarms: False
1235 Alarm_Type: processing-error
1236 Probable_Cause: underlying-resource-unavailable
1237 Service_Affecting: True
1239 Management_Affecting_Severity: warning
1240 Degrade_Affecting_Severity: none
1246 License key is not installed; a valid license key is required for operation.
1248 License key has expired or is invalid; a valid license key is required for operation.
1250 Evaluation license key will expire on <date>; there are <num_days> days remaining in this evaluation.
1252 Evaluation license key will expire on <date>; there is only 1 day remaining in this evaluation.
1253 Entity_Instance_ID: host=<hostname>
1255 Proposed_Repair_Action: Contact next level of support to obtain a new license key.
1257 Inhibit_Alarms: False
1258 Alarm_Type: processing-error
1259 Probable_Cause: key-expired
1260 Service_Affecting: True
1262 Management_Affecting_Severity: critical
1263 Degrade_Affecting_Severity: none
1266 # 400.004: // NOTE Removed
1268 # Description: Service group software modification detected; <list of affected files>.
1269 # Entity_Instance_ID: host=<hostname>
1271 # Proposed_Repair_Action: Contact next level of support.
1272 # Maintenance_Action:
1273 # Inhibit_Alarms: False
1274 # Alarm_Type: processing-error
1275 # Probable_Cause: software-program-error
1276 # Service_Affecting: True
1277 # Suppression: False
1283 Communication failure detected with peer over port <linux-ifname>.
1285 Communication failure detected with peer over port <linux-ifname> within the last 30 seconds.
1286 Entity_Instance_ID: host=<hostname>.network=<mgmt | oam | cluster-host>
1288 Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1290 Inhibit_Alarms: False
1291 Alarm_Type: communication
1292 Probable_Cause: underlying-resource-unavailable
1293 Service_Affecting: True
1295 Management_Affecting_Severity: warning
1296 Degrade_Affecting_Severity: none
1299 #---------------------------------------------------------------------------
1301 #---------------------------------------------------------------------------
1305 Description: Service group <group> state change from <state> to <state> on host <host_name>
1306 Entity_Instance_ID: service_domain=<domain>.service_group=<group>.host=<host_name>
1308 Alarm_Type: processing-error
1309 Probable_Cause: unspecified-reason
1310 Service_Affecting: True
1315 Service group <group> loss of redundancy; expected <X> standby member but no standby members available
1317 Service group <group> loss of redundancy; expected <X> standby member but only <Y> standby member(s) available
1319 Service group <group> has no active members available; expected <X> active member(s)
1321 Service group <group> loss of redundancy; expected <X> active member(s) but only <Y> active member(s) available
1322 Entity_Instance_ID: service_domain=<domain>.service_group=<group>
1324 Alarm_Type: processing-error
1325 Probable_Cause: unspecified-reason
1326 Service_Affecting: True
1331 License key has expired or is invalid
1333 Evaluation license key will expire on <date>
1335 License key is valid
1336 Entity_Instance_ID: host=<host_name>
1338 Alarm_Type: processing-error
1339 Probable_Cause: unspecified-reason
1340 Service_Affecting: True
1345 Communication failure detected with peer over port <port> on host <host name>
1347 Communication failure detected with peer over port <port> on host <host name> within the last <X> seconds
1349 Communication established with peer over port <port> on host <host name>
1350 Entity_Instance_ID: host=<host_name>.network=<network>
1352 Alarm_Type: processing-error
1353 Probable_Cause: unspecified-reason
1354 Service_Affecting: True
1358 Description: Swact or swact-force
1359 Entity_Instance_ID: host=<host_name>
1361 Alarm_Type: processing-error
1362 Probable_Cause: unspecified-reason
1363 Service_Affecting: True
1366 #---------------------------------------------------------------------------
1368 #---------------------------------------------------------------------------
1372 Description: TPM initialization failed on host.
1373 Entity_Instance_ID: host=<hostname>
1375 Proposed_Repair_Action: reinstall HTTPS certificate; if problem persists contact next level of support.
1376 Maintenance_Action: degrade
1378 Alarm_Type: equipment
1379 Probable_Cause: procedural-error
1380 Service_Affecting: True
1382 Management_Affecting_Severity: none
1383 Degrade_Affecting_Severity: none
1387 Description: Developer patch certificate enabled.
1388 Entity_Instance_ID: host=controller
1390 Proposed_Repair_Action: Reinstall system to disable developer certificate and remove untrusted patches.
1393 Alarm_Type: operational-violation
1394 Probable_Cause: unspecified-reason
1395 Service_Affecting: False
1397 Management_Affecting_Severity: none
1398 Degrade_Affecting_Severity: none
1403 Certificate 'system certificate-show <uuid>' (mode=<ssl/ssl_ca/docker_registry/openstack/openstack_ca>) expiring soon on <date>.
1405 Certificate '<Namespace>/<Certificate/Secret>' expiring soon on <date>.
1407 Certificate '<k8sRootCA/EtcdCA>' expiring soon on <date>.
1408 Entity_Instance_ID: |-
1409 system.certificate.mode=<mode>.uuid=<uuid>
1411 namespace=<namespace-name>.certificate=<certificate-name>
1413 namespace=<namespace-name>.secret=<secret-name>
1415 system.certificate.k8sRootCA
1417 Proposed_Repair_Action: Check certificate expiration time. Renew certificate for the entity identified.
1420 Alarm_Type: operational-violation
1421 Probable_Cause: certificate-expiration
1422 Service_Affecting: False
1424 Management_Affecting_Severity: none
1425 Degrade_Affecting_Severity: none
1430 Certificate 'system certificate-show <uuid>' (mode=<ssl/ssl_ca/docker_registry/openstack/openstack_ca>) expired.
1432 Certificate '<Namespace>/<Certificate/Secret>' expired.
1434 Certificate '<k8sRootCA/EtcdRootCA>' expired.
1435 Entity_Instance_ID: |-
1436 system.certificate.mode=<mode>.uuid=<uuid>
1438 namespace=<namespace-name>.certificate=<certificate-name>
1440 namespace=<namespace-name>.secret=<secret-name>
1442 system.certificate.k8sRootCA
1444 Proposed_Repair_Action: Check certificate expiration time. Renew certificate for the entity identified.
1447 Alarm_Type: operational-violation
1448 Probable_Cause: certificate-expiration
1449 Service_Affecting: False
1451 Management_Affecting_Severity: none
1452 Degrade_Affecting_Severity: none
1456 Description: "Host <host_name> has IMA Appraisal failure for service <service> when executing <file>, reason = <reason_text>]"
1457 Entity_Instance_ID: host=<hostname>.service=<service>
1459 Alarm_Type: integrity-violation
1460 Probable_Cause: information-modification-detected
1461 Service_Affecting: False
1464 #---------------------------------------------------------------------------
1466 #---------------------------------------------------------------------------
1471 Instance <instance_name> owned by <tenant_name> has failed on host <host_name>
1472 Instance <instance_name> owned by <tenant_name> has failed to schedule
1473 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1475 Proposed_Repair_Action: The system will attempt recovery; no repair action required
1478 Alarm_Type: processing-error
1479 Probable_Cause: software-error
1480 Service_Affecting: True
1482 Management_Affecting_Severity: warning
1483 Degrade_Affecting_Severity: none
1487 Description: Instance <instance_name> owned by <tenant_name> is paused on host <host_name>
1488 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1490 Proposed_Repair_Action: Unpause the instance
1493 Alarm_Type: processing-error
1494 Probable_Cause: procedural-error
1495 Service_Affecting: True
1497 Management_Affecting_Severity: warning
1498 Degrade_Affecting_Severity: none
1502 Description: Instance <instance_name> owned by <tenant_name> is suspended on host <host_name>
1503 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1505 Proposed_Repair_Action: Resume the instance
1508 Alarm_Type: processing-error
1509 Probable_Cause: procedural-error
1510 Service_Affecting: True
1512 Management_Affecting_Severity: warning
1513 Degrade_Affecting_Severity: none
1517 Description: Instance <instance_name> owned by <tenant_name> is stopped on host <host_name>
1518 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1520 Proposed_Repair_Action: Start the instance
1523 Alarm_Type: processing-error
1524 Probable_Cause: procedural-error
1525 Service_Affecting: True
1527 Management_Affecting_Severity: warning
1528 Degrade_Affecting_Severity: none
1532 Description: Instance <instance_name> owned by <tenant_name> is rebooting on host <host_name>
1533 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1535 Proposed_Repair_Action: Wait for reboot to complete; if problem persists contact next level of support
1538 Alarm_Type: processing-error
1539 Probable_Cause: unspecified-reason
1540 Service_Affecting: True
1542 Management_Affecting_Severity: warning
1543 Degrade_Affecting_Severity: none
1547 Description: Instance <instance_name> owned by <tenant_name> is rebuilding on host <host_name>
1548 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1550 Proposed_Repair_Action: Wait for rebuild to complete; if problem persists contact next level of support
1553 Alarm_Type: processing-error
1554 Probable_Cause: underlying-resource-unavailable
1555 Service_Affecting: True
1557 Management_Affecting_Severity: warning
1558 Degrade_Affecting_Severity: none
1562 Description: Instance <instance_name> owned by <tenant_name> is evacuating from host <host_name>
1563 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1565 Proposed_Repair_Action: Wait for evacuate to complete; if problem persists contact next level of support
1568 Alarm_Type: processing-error
1569 Probable_Cause: underlying-resource-unavailable
1570 Service_Affecting: True
1572 Management_Affecting_Severity: warning
1573 Degrade_Affecting_Severity: none
1577 Description: Instance <instance_name> owned by <tenant_name> is live migrating from host <host_name>
1578 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1580 Proposed_Repair_Action: Wait for live migration to complete; if problem persists contact next level of support
1583 Alarm_Type: processing-error
1584 Probable_Cause: unspecified-reason
1585 Service_Affecting: True
1587 Management_Affecting_Severity: warning
1588 Degrade_Affecting_Severity: none
1592 Description: Instance <instance_name> owned by <tenant_name> is cold migrating from host <host_name>
1593 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1595 Proposed_Repair_Action: Wait for cold migration to complete; if problem persists contact next level of support
1598 Alarm_Type: processing-error
1599 Probable_Cause: unspecified-reason
1600 Service_Affecting: True
1602 Management_Affecting_Severity: warning
1603 Degrade_Affecting_Severity: none
1607 Description: Instance <instance_name> owned by <tenant_name> has been cold-migrated to host <host_name> waiting for confirmation
1608 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1610 Proposed_Repair_Action: Confirm or revert cold-migrate of instance
1613 Alarm_Type: processing-error
1614 Probable_Cause: unspecified-reason
1615 Service_Affecting: True
1617 Management_Affecting_Severity: warning
1618 Degrade_Affecting_Severity: none
1622 Description: Instance <instance_name> owned by <tenant_name> is reverting cold migrate to host <host_name>
1623 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1625 Proposed_Repair_Action: "Wait for cold migration revert to complete; if problem persists contact next level of support"
1629 Probable_Cause: unspecified-reason
1630 Service_Affecting: True
1632 Management_Affecting_Severity: warning
1633 Degrade_Affecting_Severity: none
1637 Description: Instance <instance_name> owned by <tenant_name> is resizing on host <host_name>
1638 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1640 Proposed_Repair_Action: Wait for resize to complete; if problem persists contact next level of support
1643 Alarm_Type: processing-error
1644 Probable_Cause: unspecified-reason
1645 Service_Affecting: True
1647 Management_Affecting_Severity: warning
1648 Degrade_Affecting_Severity: none
1652 Description: Instance <instance_name> owned by <tenant_name> has been resized on host <host_name> waiting for confirmation
1653 Entity_Instance_ID: itenant=<tenant-uuid>.instance=<instance-uuid>
1655 Proposed_Repair_Action: Confirm or revert resize of instance
1658 Alarm_Type: processing-error
1659 Probable_Cause: unspecified-reason
1660 Service_Affecting: True
1662 Management_Affecting_Severity: warning
1663 Degrade_Affecting_Severity: none
1667 Description: Instance <instance_name> owned by <tenant_name> is reverting resize on host <host_name>
1668 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1670 Proposed_Repair_Action: "Wait for resize revert to complete; if problem persists contact next level of support"
1674 Probable_Cause: unspecified-reason
1675 Service_Affecting: True
1677 Management_Affecting_Severity: warning
1678 Degrade_Affecting_Severity: none
1682 Description: Guest Heartbeat not established for instance <instance_name> owned by <tenant_name> on host <host_name>
1683 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1685 Proposed_Repair_Action: "Verify that the instance is running the Guest-Client daemon, or disable Guest Heartbeat for the instance if no longer needed, otherwise contact next level of support"
1688 Alarm_Type: communication
1689 Probable_Cause: procedural-error
1690 Service_Affecting: True
1692 Management_Affecting_Severity: warning
1693 Degrade_Affecting_Severity: none
1697 Description: Multi-Node Recovery Mode
1698 Entity_Instance_ID: subsystem=vim
1700 Proposed_Repair_Action: "Wait for the system to exit out of this mode"
1703 Alarm_Type: equipment
1704 Probable_Cause: unspecified-reason
1705 Service_Affecting: True
1707 Management_Affecting_Severity: warning
1708 Degrade_Affecting_Severity: none
1712 Description: Server group <server_group_name> <policy> policy was not satisfied
1713 Entity_Instance_ID: server-group<server-group-uuid>
1715 Proposed_Repair_Action: "Migrate instances in an attempt to satisfy the policy; if problem persists contact next level of support"
1718 Alarm_Type: processing-error
1719 Probable_Cause: procedural-error
1720 Service_Affecting: True
1722 Management_Affecting_Severity: none
1723 Degrade_Affecting_Severity: none
1728 Description: Instance <instance_name> is enabled on host <host_name>
1729 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1731 Alarm_Type: equipment
1732 Probable_Cause: unspecified-reason
1733 Service_Affecting: False
1737 Description: Instance <instance_name> owned by <tenant_name> has failed[, reason = <reason_text>]
1738 Instance <instance_name> owned by <tenant_name> has failed to schedule[, reason = <reason_text>]
1739 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1741 Alarm_Type: equipment
1742 Probable_Cause: unspecified-reason
1743 Service_Affecting: False
1747 Description: Create issued <by <tenant_name>|by the system> against <instance_name> owned by <tenant_name>
1748 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1750 Alarm_Type: equipment
1751 Probable_Cause: unspecified-reason
1752 Service_Affecting: False
1756 Description: Creating instance <instance_name> owned by <tenant_name>
1757 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1759 Alarm_Type: equipment
1760 Probable_Cause: unspecified-reason
1761 Service_Affecting: False
1765 Description: "Create rejected for instance <instance_name>[, reason = <reason_text>]"
1766 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1768 Alarm_Type: equipment
1769 Probable_Cause: unspecified-reason
1770 Service_Affecting: False
1774 Description: "Create cancelled for instance <instance_name>[, reason = <reason_text>]"
1775 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1777 Alarm_Type: equipment
1778 Probable_Cause: unspecified-reason
1779 Service_Affecting: False
1783 Description: "Create failed for instance <instance_name>[, reason = <reason_text>]"
1784 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1786 Alarm_Type: equipment
1787 Probable_Cause: unspecified-reason
1788 Service_Affecting: False
1792 Description: Inance <instance_name> owned by <tenant_name> has been created
1793 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1795 Alarm_Type: equipment
1796 Probable_Cause: unspecified-reason
1797 Service_Affecting: False
1801 Description: "Delete issued <by tenant <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
1802 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1804 Alarm_Type: equipment
1805 Probable_Cause: unspecified-reason
1806 Service_Affecting: False
1810 Description: Deleting instance <instance_name> owned by <tenatn_name>
1811 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1813 Alarm_Type: equipment
1814 Probable_Cause: unspecified-reason
1815 Service_Affecting: False
1819 Description: "Delete rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
1820 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1822 Alarm_Type: equipment
1823 Probable_Cause: unspecified-reason
1824 Service_Affecting: False
1828 Description: "Delete cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
1829 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1831 Alarm_Type: equipment
1832 Probable_Cause: unspecified-reason
1833 Service_Affecting: False
1837 Description: "Delete failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
1838 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1840 Alarm_Type: equipment
1841 Probable_Cause: unspecified-reason
1842 Service_Affecting: False
1846 Description: Deleted instance <instance_name> owned by <tenant_name>
1847 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1849 Alarm_Type: equipment
1850 Probable_Cause: unspecified-reason
1851 Service_Affecting: False
1855 Description: "Pause issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
1856 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1858 Alarm_Type: equipment
1859 Probable_Cause: unspecified-reason
1860 Service_Affecting: False
1864 Description: Pause inprogress for instance <instance_name> on host <host_name>
1865 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1867 Alarm_Type: equipment
1868 Probable_Cause: unspecified-reason
1869 Service_Affecting: False
1873 Description: "Pause rejected for instance <instance_name> enabled on host <host_name>[, reason = <reason_text>]"
1874 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1876 Alarm_Type: equipment
1877 Probable_Cause: unspecified-reason
1878 Service_Affecting: False
1882 Description: "Pause cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
1883 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1885 Alarm_Type: equipment
1886 Probable_Cause: unspecified-reason
1887 Service_Affecting: False
1891 Description: "Pause failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
1892 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1894 Alarm_Type: equipment
1895 Probable_Cause: unspecified-reason
1896 Service_Affecting: False
1900 Description: Pause complete for instance <instance_name> now paused on host <host_name>
1901 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1903 Alarm_Type: equipment
1904 Probable_Cause: unspecified-reason
1905 Service_Affecting: False
1909 Description: "Unpause issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
1910 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1912 Alarm_Type: equipment
1913 Probable_Cause: unspecified-reason
1914 Service_Affecting: False
1918 Description: Unpause inprogress for instance <instance_name> on host <host_name>
1919 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1921 Alarm_Type: equipment
1922 Probable_Cause: unspecified-reason
1923 Service_Affecting: False
1927 Description: "Unpause rejected for instance <instance_name> paused on host <host_name>[, reason = <reason_text>]"
1928 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1930 Alarm_Type: equipment
1931 Probable_Cause: unspecified-reason
1932 Service_Affecting: False
1936 Description: "Unpause cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
1937 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1939 Alarm_Type: equipment
1940 Probable_Cause: unspecified-reason
1941 Service_Affecting: False
1945 Description: "Unpause failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
1946 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1948 Alarm_Type: equipment
1949 Probable_Cause: unspecified-reason
1950 Service_Affecting: False
1954 Description: Unpause complete for instance <instance_name> now enabled on host <host_name>
1955 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1957 Alarm_Type: equipment
1958 Probable_Cause: unspecified-reason
1959 Service_Affecting: False
1963 Description: "Suspend issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
1964 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1966 Alarm_Type: equipment
1967 Probable_Cause: unspecified-reason
1968 Service_Affecting: False
1972 Description: Suspend inprogress for instance <instance_name> on host <host_name>
1973 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1975 Alarm_Type: equipment
1976 Probable_Cause: unspecified-reason
1977 Service_Affecting: False
1981 Description: "Suspend rejected for instance <instance_name> enabled on host <host_name>[, reason = <reason_text>]"
1982 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1984 Alarm_Type: equipment
1985 Probable_Cause: unspecified-reason
1986 Service_Affecting: False
1990 Description: "Suspend cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
1991 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1993 Alarm_Type: equipment
1994 Probable_Cause: unspecified-reason
1995 Service_Affecting: False
1999 Description: "Suspend failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2000 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2002 Alarm_Type: equipment
2003 Probable_Cause: unspecified-reason
2004 Service_Affecting: False
2008 Description: Suspend complete for instance <instance_name> now suspended on host <host_name>
2009 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2011 Alarm_Type: equipment
2012 Probable_Cause: unspecified-reason
2013 Service_Affecting: False
2017 Description: "Resume issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2018 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2020 Alarm_Type: equipment
2021 Probable_Cause: unspecified-reason
2022 Service_Affecting: False
2026 Description: Resume inprogress for instance <instance_name> on host <host_name>
2027 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2029 Alarm_Type: equipment
2030 Probable_Cause: unspecified-reason
2031 Service_Affecting: False
2035 Description: "Resume rejected for instance <instance_name> suspended on host <host_name>[, reason = <reason_text>]"
2036 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2038 Alarm_Type: equipment
2039 Probable_Cause: unspecified-reason
2040 Service_Affecting: False
2044 Description: "Resume cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2045 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2047 Alarm_Type: equipment
2048 Probable_Cause: unspecified-reason
2049 Service_Affecting: False
2053 Description: "Resume failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2054 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2056 Alarm_Type: equipment
2057 Probable_Cause: unspecified-reason
2058 Service_Affecting: False
2062 Description: Resume complete for instance <instance_name> now enabled on host <host_name>
2063 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2065 Alarm_Type: equipment
2066 Probable_Cause: unspecified-reason
2067 Service_Affecting: False
2071 Description: "Start issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2072 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2074 Alarm_Type: equipment
2075 Probable_Cause: unspecified-reason
2076 Service_Affecting: False
2080 Description: Start inprogress for instance <instance_name> on host <host_name>
2081 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2083 Alarm_Type: equipment
2084 Probable_Cause: unspecified-reason
2085 Service_Affecting: False
2089 Description: "Start rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2090 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2092 Alarm_Type: equipment
2093 Probable_Cause: unspecified-reason
2094 Service_Affecting: False
2098 Description: "Start cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2099 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2101 Alarm_Type: equipment
2102 Probable_Cause: unspecified-reason
2103 Service_Affecting: False
2107 Description: "Start failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2108 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2110 Alarm_Type: equipment
2111 Probable_Cause: unspecified-reason
2112 Service_Affecting: False
2116 Description: Start complete for instance <instance_name> now enabled on host <host_name>
2117 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2119 Alarm_Type: equipment
2120 Probable_Cause: unspecified-reason
2121 Service_Affecting: False
2125 Description: "Stop issued <by <tenant_name>|by the system|by the instance> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2126 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2128 Alarm_Type: equipment
2129 Probable_Cause: unspecified-reason
2130 Service_Affecting: False
2134 Description: Stop inprogress for instance <instance_name> on host <host_name>
2135 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2137 Alarm_Type: equipment
2138 Probable_Cause: unspecified-reason
2139 Service_Affecting: False
2143 Description: "Stop rejected for instance <instance_name> enabled on host <host_name>[, reason = <reason_text>]"
2144 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2146 Alarm_Type: equipment
2147 Probable_Cause: unspecified-reason
2148 Service_Affecting: False
2152 Description: "Stop cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2153 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2155 Alarm_Type: equipment
2156 Probable_Cause: unspecified-reason
2157 Service_Affecting: False
2161 Description: "Stop failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2162 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2164 Alarm_Type: equipment
2165 Probable_Cause: unspecified-reason
2166 Service_Affecting: False
2170 Description: Stop complete for instance <instance_name> now disabled on host <host_name>
2171 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2173 Alarm_Type: equipment
2174 Probable_Cause: unspecified-reason
2175 Service_Affecting: False
2179 Description: "Live-Migrate issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> from host <host_name>[, reason = <reason_text>]"
2180 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2182 Alarm_Type: equipment
2183 Probable_Cause: unspecified-reason
2184 Service_Affecting: False
2188 Description: Live-Migrate inprogress for instance <instance_name> from host <host_name>
2189 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2191 Alarm_Type: equipment
2192 Probable_Cause: unspecified-reason
2193 Service_Affecting: False
2197 Description: "Live-Migrate rejected for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2198 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2200 Alarm_Type: equipment
2201 Probable_Cause: unspecified-reason
2202 Service_Affecting: False
2206 Description: "Live-Migrate cancelled for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2207 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2209 Alarm_Type: equipment
2210 Probable_Cause: unspecified-reason
2211 Service_Affecting: False
2215 Description: "Live-Migrate failed for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2216 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2218 Alarm_Type: equipment
2219 Probable_Cause: unspecified-reason
2220 Service_Affecting: False
2224 Description: Live-Migrate complete for instance <instance_name> now enabled on host <host_name>
2225 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2227 Alarm_Type: equipment
2228 Probable_Cause: unspecified-reason
2229 Service_Affecting: False
2233 Description: "Cold-Migrate issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> from host <host_name>[, reason = <reason_text>]"
2234 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2236 Alarm_Type: equipment
2237 Probable_Cause: unspecified-reason
2238 Service_Affecting: False
2242 Description: Cold-Migrate inprogress for instance <instance_name> from host <host_name>
2243 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2245 Alarm_Type: equipment
2246 Probable_Cause: unspecified-reason
2247 Service_Affecting: False
2251 Description: "Cold-Migrate rejected for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2252 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2254 Alarm_Type: equipment
2255 Probable_Cause: unspecified-reason
2256 Service_Affecting: False
2260 Description: "Cold-Migrate cancelled for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2261 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2263 Alarm_Type: equipment
2264 Probable_Cause: unspecified-reason
2265 Service_Affecting: False
2269 Description: "Cold-Migrate failed for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2270 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2272 Alarm_Type: equipment
2273 Probable_Cause: unspecified-reason
2274 Service_Affecting: False
2278 Description: Cold-Migrate complete for instance <instance_name> now enabled on host <host_name>
2279 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2281 Alarm_Type: equipment
2282 Probable_Cause: unspecified-reason
2283 Service_Affecting: False
2287 Description: "Cold-Migrate-Confirm issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2288 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2290 Alarm_Type: equipment
2291 Probable_Cause: unspecified-reason
2292 Service_Affecting: False
2296 Description: Cold-Migrate-Confirm inprogress for instance <instance_name> on host <host_name>
2297 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2299 Alarm_Type: equipment
2300 Probable_Cause: unspecified-reason
2301 Service_Affecting: False
2305 Description: "Cold-Migrate-Confirm rejected for instance <instance_name> now enabled on host <host_name>[, reason = <reason_text>]"
2306 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2308 Alarm_Type: equipment
2309 Probable_Cause: unspecified-reason
2310 Service_Affecting: False
2314 Description: "Cold-Migrate-Confirm cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2315 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2317 Alarm_Type: equipment
2318 Probable_Cause: unspecified-reason
2319 Service_Affecting: False
2323 Description: "Cold-Migrate-Confirm failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2324 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2326 Alarm_Type: equipment
2327 Probable_Cause: unspecified-reason
2328 Service_Affecting: False
2332 Description: Cold-Migrate-Confirm complete for instance <instance_name> enabled on host <host_name>
2333 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2335 Alarm_Type: equipment
2336 Probable_Cause: unspecified-reason
2337 Service_Affecting: False
2341 Description: "Cold-Migrate-Revert issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2342 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2344 Alarm_Type: equipment
2345 Probable_Cause: unspecified-reason
2346 Service_Affecting: False
2350 Description: Cold-Migrate-Revert inprogress for instance <instance_name> from host <host_name>
2351 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2353 Alarm_Type: equipment
2354 Probable_Cause: unspecified-reason
2355 Service_Affecting: False
2359 Description: "Cold-Migrate-Revert rejected for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2360 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2362 Alarm_Type: equipment
2363 Probable_Cause: unspecified-reason
2364 Service_Affecting: False
2368 Description: "Cold-Migrate-Revert cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2369 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2371 Alarm_Type: equipment
2372 Probable_Cause: unspecified-reason
2373 Service_Affecting: False
2377 Description: "Cold-Migrate-Revert failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2378 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2380 Alarm_Type: equipment
2381 Probable_Cause: unspecified-reason
2382 Service_Affecting: False
2386 Description: Cold-Migrate-Revert complete for instance <instance_name> now enabled on host <host_name>
2387 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2389 Alarm_Type: equipment
2390 Probable_Cause: unspecified-reason
2391 Service_Affecting: False
2395 Description: "Evacuate issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2396 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2398 Alarm_Type: equipment
2399 Probable_Cause: unspecified-reason
2400 Service_Affecting: False
2404 Description: Evacuating instance <instance_name> owned by <tenant_name> from host <host_name>
2405 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2407 Alarm_Type: equipment
2408 Probable_Cause: unspecified-reason
2409 Service_Affecting: False
2413 Description: "Evacuate rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2414 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2416 Alarm_Type: equipment
2417 Probable_Cause: unspecified-reason
2418 Service_Affecting: False
2422 Description: "Evacuate cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2423 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2425 Alarm_Type: equipment
2426 Probable_Cause: unspecified-reason
2427 Service_Affecting: False
2431 Description: "Evacuate failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2432 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2434 Alarm_Type: equipment
2435 Probable_Cause: unspecified-reason
2436 Service_Affecting: False
2440 Description: Evacuate complete for instance <instance_name> now enabled on host <host_name>
2441 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2443 Alarm_Type: equipment
2444 Probable_Cause: unspecified-reason
2445 Service_Affecting: False
2449 Description: "Reboot <(soft-reboot)|(hard-reboot)> issued <by <tenant_name>|by the system|by the instance> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2450 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2452 Alarm_Type: equipment
2453 Probable_Cause: unspecified-reason
2454 Service_Affecting: False
2458 Description: Reboot inprogress for instance <instance_name> on host <host_name>
2459 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2461 Alarm_Type: equipment
2462 Probable_Cause: unspecified-reason
2463 Service_Affecting: False
2467 Description: "Reboot rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2468 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2470 Alarm_Type: equipment
2471 Probable_Cause: unspecified-reason
2472 Service_Affecting: False
2476 Description: "Reboot cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2477 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2479 Alarm_Type: equipment
2480 Probable_Cause: unspecified-reason
2481 Service_Affecting: False
2485 Description: "Reboot failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2486 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2488 Alarm_Type: equipment
2489 Probable_Cause: unspecified-reason
2490 Service_Affecting: False
2494 Description: Reboot complete for instance <instance_name> now enabled on host <host_name>
2495 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2497 Alarm_Type: equipment
2498 Probable_Cause: unspecified-reason
2499 Service_Affecting: False
2503 Description: "Rebuild issued <by <tenant_name>|by the system> against instance <instance_name> using image <image_name> on host <host_name>[, reason = <reason_text>]"
2504 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2506 Alarm_Type: equipment
2507 Probable_Cause: unspecified-reason
2508 Service_Affecting: False
2512 Description: Rebuild inprogress for instance <instance_name> on host <host_name>
2513 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2515 Alarm_Type: equipment
2516 Probable_Cause: unspecified-reason
2517 Service_Affecting: False
2521 Description: "Rebuild rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2522 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2524 Alarm_Type: equipment
2525 Probable_Cause: unspecified-reason
2526 Service_Affecting: False
2530 Description: "Rebuild cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2531 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2533 Alarm_Type: equipment
2534 Probable_Cause: unspecified-reason
2535 Service_Affecting: False
2539 Description: "Rebuild failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2540 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2542 Alarm_Type: equipment
2543 Probable_Cause: unspecified-reason
2544 Service_Affecting: False
2548 Description: Rebuild complete for instance <instance_name> now enabled on host <host_name>
2549 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2551 Alarm_Type: equipment
2552 Probable_Cause: unspecified-reason
2553 Service_Affecting: False
2557 Description: "Resize issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2558 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2560 Alarm_Type: equipment
2561 Probable_Cause: unspecified-reason
2562 Service_Affecting: False
2566 Description: Resize inprogress for instance <instance_name> on host <host_name>
2567 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2569 Alarm_Type: equipment
2570 Probable_Cause: unspecified-reason
2571 Service_Affecting: False
2575 Description: "Resize rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2576 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2578 Alarm_Type: equipment
2579 Probable_Cause: unspecified-reason
2580 Service_Affecting: False
2584 Description: "Resize cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2585 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2587 Alarm_Type: equipment
2588 Probable_Cause: unspecified-reason
2589 Service_Affecting: False
2593 Description: "Resize failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2594 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2596 Alarm_Type: equipment
2597 Probable_Cause: unspecified-reason
2598 Service_Affecting: False
2602 Description: Resize complete for instance <instance_name> enabled on host <host_name> waiting for confirmation
2603 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2605 Alarm_Type: equipment
2606 Probable_Cause: unspecified-reason
2607 Service_Affecting: False
2611 Description: "Resize-Confirm issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2612 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2614 Alarm_Type: equipment
2615 Probable_Cause: unspecified-reason
2616 Service_Affecting: False
2620 Description: Resize-Confirm inprogress for instance <instance_name> on host <host_name>
2621 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2623 Alarm_Type: equipment
2624 Probable_Cause: unspecified-reason
2625 Service_Affecting: False
2629 Description: "Resize-Confirm rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2630 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2632 Alarm_Type: equipment
2633 Probable_Cause: unspecified-reason
2634 Service_Affecting: False
2638 Description: "Resize-Confirm cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2639 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2641 Alarm_Type: equipment
2642 Probable_Cause: unspecified-reason
2643 Service_Affecting: False
2647 Description: "Resize-Confirm failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2648 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2650 Alarm_Type: equipment
2651 Probable_Cause: unspecified-reason
2652 Service_Affecting: False
2656 Description: Resize-Confirm complete for instance <instance_name> enabled on host <host_name>
2657 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2659 Alarm_Type: equipment
2660 Probable_Cause: unspecified-reason
2661 Service_Affecting: False
2665 Description: "Resize-Revert issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2666 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2668 Alarm_Type: equipment
2669 Probable_Cause: unspecified-reason
2670 Service_Affecting: False
2674 Description: Resize-Revert inprogress for instance <instance_name> on host <host_name>
2675 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2677 Alarm_Type: equipment
2678 Probable_Cause: unspecified-reason
2679 Service_Affecting: False
2683 Description: "Resize-Revert rejected for instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2684 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2686 Alarm_Type: equipment
2687 Probable_Cause: unspecified-reason
2688 Service_Affecting: False
2692 Description: "Resize-Revert cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2693 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2695 Alarm_Type: equipment
2696 Probable_Cause: unspecified-reason
2697 Service_Affecting: False
2701 Description: "Resize-Revert failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2702 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2704 Alarm_Type: equipment
2705 Probable_Cause: unspecified-reason
2706 Service_Affecting: False
2710 Description: Resize-Revert complete for instance <instance_name> enabled on host <host_name>
2711 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2713 Alarm_Type: equipment
2714 Probable_Cause: unspecified-reason
2715 Service_Affecting: False
2719 Description: Guest Heartbeat established for instance <instance_name> on host <host_name>
2720 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2722 Alarm_Type: equipment
2723 Probable_Cause: unspecified-reason
2724 Service_Affecting: False
2728 Description: Guest Heartbeat disconnected for instance <instance_name> on host <host_name>
2729 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2731 Alarm_Type: equipment
2732 Probable_Cause: unspecified-reason
2733 Service_Affecting: False
2737 Description: "Guest Heartbeat failed for instance <instance_name>[, reason = <reason_text>]"
2738 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2740 Alarm_Type: equipment
2741 Probable_Cause: unspecified-reason
2742 Service_Affecting: False
2746 Description: Instance <instance_name> has been renamed to <new_instance_name> owned by <tenant_name> on host <host_name>
2747 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2749 Alarm_Type: equipment
2750 Probable_Cause: unspecified-reason
2751 Service_Affecting: False
2755 Description: "Guest Health Check failed for instance <instance_name>[, reason = <reason_text>]"
2756 Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2758 Alarm_Type: equipment
2759 Probable_Cause: unspecified-reason
2760 Service_Affecting: False
2764 Description: "Entered Multi-Node Recovery Mode"
2765 Entity_Instance_ID: subsystem=vim
2767 Alarm_Type: equipment
2768 Probable_Cause: unspecified-reason
2769 Service_Affecting: False
2774 Description: "Exited Multi-Node Recovery Mode"
2775 Entity_Instance_ID: subsystem=vim
2777 Alarm_Type: equipment
2778 Probable_Cause: unspecified-reason
2779 Service_Affecting: False
2781 #---------------------------------------------------------------------------
2783 #---------------------------------------------------------------------------
2787 Description: "Application Upload Failure"
2788 Entity_Instance_ID: k8s_application=<appname>
2790 Proposed_Repair_Action: "Check system inventory log for cause."
2793 Alarm_Type: processing-error
2794 Probable_Cause: unknown
2795 Service_Affecting: False
2797 Management_Affecting_Severity: none
2798 Degrade_Affecting_Severity: none
2802 Description: "Application Apply Failure"
2803 Entity_Instance_ID: k8s_application=<appname>
2805 Proposed_Repair_Action: "Retry applying the application. Check application is managed by the system application framework.
2806 If the issue persists, please check system inventory log for cause."
2809 Alarm_Type: processing-error
2810 Probable_Cause: unknown
2811 Service_Affecting: True
2813 Management_Affecting_Severity: none
2814 Degrade_Affecting_Severity: none
2818 Description: "Application Remove Failure"
2819 Entity_Instance_ID: k8s_application=<appname>
2821 Proposed_Repair_Action: "Retry removing the application. If the issue persists, please check system inventory log for cause."
2824 Alarm_Type: processing-error
2825 Probable_Cause: unknown
2826 Service_Affecting: True
2828 Management_Affecting_Severity: none
2829 Degrade_Affecting_Severity: none
2833 Description: "Application Apply In Progress"
2834 Entity_Instance_ID: k8s_application=<appname>
2836 Proposed_Repair_Action: "No action required."
2840 Probable_Cause: unknown
2841 Service_Affecting: True
2843 Management_Affecting_Severity: warning
2844 Degrade_Affecting_Severity: none
2848 Description: "Application Update In Progress"
2849 Entity_Instance_ID: k8s_application=<appname>
2851 Proposed_Repair_Action: "No action required."
2855 Probable_Cause: unknown
2856 Service_Affecting: True
2858 Management_Affecting_Severity: warning
2859 Degrade_Affecting_Severity: none
2863 Description: "Automatic Application Re-Apply Is Pending"
2864 Entity_Instance_ID: k8s_application=<appname>
2866 Proposed_Repair_Action: "Ensure all hosts are either locked or unlocked. When the system is stable the application will be automatically reapplied."
2870 Probable_Cause: unknown
2871 Service_Affecting: False
2873 Management_Affecting_Severity: none
2874 Degrade_Affecting_Severity: none
2876 #---------------------------------------------------------------------------
2878 #---------------------------------------------------------------------------
2883 Storage Alarm Condition:
2884 1 mons down, quorum 1,2 controller-1,storage-0
2885 Entity_Instance_ID: cluster=<dist-fs-uuid>
2886 Severity: [critical, major]
2887 Proposed_Repair_Action: "If problem persists, contact next level of support."
2890 Alarm_Type: equipment
2891 Probable_Cause: equipment-malfunction
2896 Management_Affecting_Severity: warning
2897 Degrade_Affecting_Severity: none
2902 Potential data loss. No available OSDs in storage replication group.
2903 Entity_Instance_ID: cluster=<dist-fs-uuid>.peergroup=<group-x>
2904 Severity: [critical]
2905 Proposed_Repair_Action: "Ensure storage hosts from replication group are unlocked and available.
2906 Check if OSDs of each storage host are up and running.
2907 If problem persists contact next level of support."
2910 Alarm_Type: equipment
2911 Probable_Cause: equipment-malfunction
2915 Management_Affecting_Severity: warning
2916 Degrade_Affecting_Severity: none
2921 Loss of replication in peergroup.
2922 Entity_Instance_ID: cluster=<dist-fs-uuid>.peergroup=<group-x>
2924 Proposed_Repair_Action: "Ensure storage hosts from replication group are unlocked and available.
2925 Check if OSDs of each storage host are up and running.
2926 If problem persists contact next level of support."
2929 Alarm_Type: equipment
2930 Probable_Cause: equipment-malfunction
2934 Management_Affecting_Severity: warning
2935 Degrade_Affecting_Severity: none
2939 Description: ["Image storage media is full: There is not enough disk space on the image storage media.",
2940 "Instance <instance name> snapshot failed: There is not enough disk space on the image storage media.",
2941 "Supplied <attrs> (<supplied>) and <attrs> generated from uploaded image (<actual>) did not match. Setting image status to 'killed'.",
2942 "Error in store configuration. Adding images to store is disabled.",
2943 "Forbidden upload attempt: <exception>",
2944 "Insufficient permissions on image storage media: <exception>",
2945 "Denying attempt to upload image larger than <size> bytes.",
2946 "Denying attempt to upload image because it exceeds the quota: <exception>",
2947 "Received HTTP error while uploading image <image_id>",
2948 "Client disconnected before sending all data to backend",
2949 "Failed to upload image <image_id>"]
2950 Entity_Instance_ID: ["image=<image-uuid>, instance=<instance-uuid>",
2951 "tenant=<tenant-uuid>, instance=<instance-uuid>",
2952 "image=<image-uuid>, instance=<instance-uuid>",
2953 "image=<image-uuid>, instance=<instance-uuid>",
2954 "image=<image-uuid>, instance=<instance-uuid>",
2955 "image=<image-uuid>, instance=<instance-uuid>",
2956 "image=<image-uuid>, instance=<instance-uuid>",
2957 "image=<image-uuid>, instance=<instance-uuid>",
2958 "image=<image-uuid>, instance=<instance-uuid>",
2959 "image=<image-uuid>, instance=<instance-uuid>",
2960 "image=<image-uuid>, instance=<instance-uuid>"]
2961 Alarm_Type: [physical-violation,
2963 integrity-violation,
2964 integrity-violation,
2965 security-service-or-mechanism-violation,
2966 security-service-or-mechanism-violation,
2967 security-service-or-mechanism-violation,
2968 security-service-or-mechanism-violation,
2971 operational-violation]
2973 Proposed_Repair_Action:
2976 Probable_Cause: unspecified-reason
2977 Service_Affecting: False
2979 Management_Affecting_Severity: none
2980 Degrade_Affecting_Severity: none
2985 Storage Alarm Condition:
2986 Cinder I/O Congestion is above normal range and is building
2987 Entity_Instance_ID: cinder_io_monitor
2989 Proposed_Repair_Action: "Reduce the I/O load on the Cinder LVM backend. Use
2990 Cinder QoS mechanisms on high usage volumes."
2994 Probable_Cause: congestion
2995 Service_Affecting: False
2997 Management_Affecting_Severity: none
2998 Degrade_Affecting_Severity: none
3003 Storage Alarm Condition:
3004 Cinder I/O Congestion is high and impacting guest performance
3005 Entity_Instance_ID: cinder_io_monitor
3007 Proposed_Repair_Action: "Reduce the I/O load on the Cinder LVM backend.
3008 Cinder actions may fail until congestion is reduced.
3009 Use Cinder QoS mechanisms on high usage volumes."
3013 Probable_Cause: congestion
3014 Service_Affecting: False
3016 Management_Affecting_Severity: warning
3017 Degrade_Affecting_Severity: none
3022 Storage Alarm Condition:
3023 [ Metadata usage for LVM thin pool <VG name>/<Pool name> exceeded threshold and automatic extension failed,
3024 Metadata usage for LVM thin pool <VG name>/<Pool name> exceeded threshold ]; threshold x%, actual y%.
3025 Entity_Instance_ID: <hostname>.lvmthinpool=<VG name>/<Pool name>
3027 Proposed_Repair_Action: "Increase Storage Space Allotment for Cinder on the 'lvm' backend.
3028 Consult the System Administration Manual for more details.
3029 If problem persists, contact next level of support."
3032 Alarm_Type: operational-violation
3033 Probable_Cause: threshold-crossed
3034 Service_Affecting: False
3036 Management_Affecting_Severity: major
3037 Degrade_Affecting_Severity: none
3042 Storage Alarm Condition:
3043 <storage-backend-name> configuration failed to apply on host: <host-uuid>.
3044 Entity_Instance_ID: storage_backend=<storage-backend-name>
3046 Proposed_Repair_Action: "Update backend setting to reapply configuration.
3047 Consult the System Administration Manual for more details.
3048 If problem persists, contact next level of support."
3051 Alarm_Type: equipment
3052 Probable_Cause: configuration-or-customization-error
3053 Service_Affecting: True
3055 Management_Affecting_Severity: major
3056 Degrade_Affecting_Severity: none
3058 #---------------------------------------------------------------------------
3060 #---------------------------------------------------------------------------
3064 Description: Persistent Volume Migration Error
3065 Entity_Instance_ID: kubernetes=PV-migration-failed
3067 Proposed_Repair_Action: "Manually execute /usr/bin/ceph_k8s_update_monitors.sh
3068 to confirm PVs are updated, then lock/unlock to clear
3069 alarms. If problem persists, contact next level of
3073 Alarm_Type: processing-error
3074 Probable_Cause: communication-subsystem-failure
3075 Service_Affecting: False
3077 Management_Affecting_Severity: none
3078 Degrade_Affecting_Severity: none
3080 #---------------------------------------------------------------------------
3082 #---------------------------------------------------------------------------
3086 Description: Patching operation in progress.
3087 Entity_Instance_ID: host=controller
3089 Proposed_Repair_Action: Complete reboots of affected hosts.
3092 Alarm_Type: environmental
3093 Probable_Cause: unspecified-reason
3094 Service_Affecting: False
3096 Management_Affecting_Severity: warning
3097 Degrade_Affecting_Severity: none
3101 Description: Patch host install failure. Command "sw-patch host-install" failed.
3102 Entity_Instance_ID: host=<hostname>
3104 Proposed_Repair_Action: Undo patching operation. Check patch logs on the target host (i.e. /var/log/patching.log)
3107 Alarm_Type: environmental
3108 Probable_Cause: unspecified-reason
3109 Service_Affecting: False
3111 Management_Affecting_Severity: warning
3112 Degrade_Affecting_Severity: none
3116 Description: A patch with state 'obsolete' in its metadata has been uploaded.
3117 Entity_Instance_ID: host=controller
3119 Proposed_Repair_Action: Remove and delete obsolete patches.
3122 Alarm_Type: environmental
3123 Probable_Cause: unspecified-reason
3124 Service_Affecting: False
3126 Management_Affecting_Severity: warning
3127 Degrade_Affecting_Severity: none
3131 Description: The upgrade and running software version do not match. Command host-upgrade failed.
3132 Entity_Instance_ID: host=<hostname>
3134 Proposed_Repair_Action: Reinstall host to update applied load.
3137 Alarm_Type: operational-violation
3138 Probable_Cause: unspecified-reason
3139 Service_Affecting: True
3141 Management_Affecting_Severity: warning
3142 Degrade_Affecting_Severity: none
3146 Description: System Upgrade in progress.
3147 Entity_Instance_ID: host=controller
3149 Proposed_Repair_Action: No action required.
3152 Alarm_Type: operational-violation
3153 Probable_Cause: unspecified-reason
3154 Service_Affecting: False
3156 Management_Affecting_Severity: warning
3157 Degrade_Affecting_Severity: none
3161 Description: Device image update operation in progress.
3162 Entity_Instance_ID: host=controller
3164 Proposed_Repair_Action: Complete reboots of affected hosts.
3167 Alarm_Type: environmental
3168 Probable_Cause: unspecified-reason
3169 Service_Affecting: False
3171 Management_Affecting_Severity: warning
3172 Degrade_Affecting_Severity: none
3176 Description: Kubernetes upgrade in progress.
3177 Entity_Instance_ID: host=controller
3179 Proposed_Repair_Action: No action required.
3182 Alarm_Type: operational-violation
3183 Probable_Cause: unspecified-reason
3184 Service_Affecting: False
3186 Management_Affecting_Severity: warning
3187 Degrade_Affecting_Severity: none
3191 Description: Kubernetes rootca update in progress
3192 Entity_Instance_ID: host=controller
3194 Proposed_Repair_Action: Wait for kubernetes rootca procedure to complete
3197 Alarm_Type: operational-violation
3198 Probable_Cause: unspecified-reason
3199 Service_Affecting: False
3201 Management_Affecting_Severity: warning
3202 Degrade_Affecting_Severity: none
3206 Description: Kubernetes root CA update aborted, certificates may not be fully updated. Command "system kube-rootca-update-abort" has been run.
3207 Entity_Instance_ID: host=controller
3209 Proposed_Repair_Action: Fully update certificates by a new root CA update.
3212 Alarm_Type: operational-violation
3213 Probable_Cause: unspecified-reason
3214 Service_Affecting: False
3216 Management_Affecting_Severity: warning
3217 Degrade_Affecting_Severity: none
3221 Description: Software patch auto-apply inprogress
3222 Entity_Instance_ID: orchestration=sw-patch
3224 Proposed_Repair_Action: Wait for software patch auto-apply to complete; if problem persists contact next level of support
3227 Alarm_Type: equipment
3228 Probable_Cause: unspecified-reason
3229 Service_Affecting: True
3231 Management_Affecting_Severity: warning
3232 Degrade_Affecting_Severity: none
3236 Description: Software patch auto-apply aborting
3237 Entity_Instance_ID: orchestration=sw-patch
3239 Proposed_Repair_Action: Wait for software patch auto-apply abort to complete; if problem persists contact next level of support
3242 Alarm_Type: equipment
3243 Probable_Cause: unspecified-reason
3244 Service_Affecting: True
3246 Management_Affecting_Severity: warning
3247 Degrade_Affecting_Severity: none
3251 Description: Software patch auto-apply failed. Command "sw-manager patch-strategy apply" failed.
3252 Entity_Instance_ID: orchestration=sw-patch
3254 Proposed_Repair_Action: Attempt to apply software patches manually; if problem persists contact next level of support
3257 Alarm_Type: equipment
3258 Probable_Cause: underlying-resource-unavailable
3259 Service_Affecting: True
3261 Management_Affecting_Severity: warning
3262 Degrade_Affecting_Severity: none
3266 Description: Software patch auto-apply start
3267 Entity_Instance_ID: orchestration=sw-patch
3269 Alarm_Type: equipment
3270 Probable_Cause: unspecified-reason
3271 Service_Affecting: False
3275 Description: Software patch auto-apply inprogress
3276 Entity_Instance_ID: orchestration=sw-patch
3278 Alarm_Type: equipment
3279 Probable_Cause: unspecified-reason
3280 Service_Affecting: False
3284 Description: Software patch auto-apply rejected
3285 Entity_Instance_ID: orchestration=sw-patch
3287 Alarm_Type: equipment
3288 Probable_Cause: unspecified-reason
3289 Service_Affecting: False
3293 Description: Software patch auto-apply cancelled
3294 Entity_Instance_ID: orchestration=sw-patch
3296 Alarm_Type: equipment
3297 Probable_Cause: unspecified-reason
3298 Service_Affecting: False
3302 Description: Software patch auto-apply failed
3303 Entity_Instance_ID: orchestration=sw-patch
3305 Alarm_Type: equipment
3306 Probable_Cause: unspecified-reason
3307 Service_Affecting: False
3311 Description: Software patch auto-apply completed
3312 Entity_Instance_ID: orchestration=sw-patch
3314 Alarm_Type: equipment
3315 Probable_Cause: unspecified-reason
3316 Service_Affecting: False
3320 Description: Software patch auto-apply abort
3321 Entity_Instance_ID: orchestration=sw-patch
3323 Alarm_Type: equipment
3324 Probable_Cause: unspecified-reason
3325 Service_Affecting: False
3329 Description: Software patch auto-apply aborting
3330 Entity_Instance_ID: orchestration=sw-patch
3332 Alarm_Type: equipment
3333 Probable_Cause: unspecified-reason
3334 Service_Affecting: False
3338 Description: Software patch auto-apply abort rejected
3339 Entity_Instance_ID: orchestration=sw-patch
3341 Alarm_Type: equipment
3342 Probable_Cause: unspecified-reason
3343 Service_Affecting: False
3347 Description: Software patch auto-apply abort failed
3348 Entity_Instance_ID: orchestration=sw-patch
3350 Alarm_Type: equipment
3351 Probable_Cause: unspecified-reason
3352 Service_Affecting: False
3356 Description: Software patch auto-apply aborted
3357 Entity_Instance_ID: orchestration=sw-patch
3359 Alarm_Type: equipment
3360 Probable_Cause: unspecified-reason
3361 Service_Affecting: False
3365 Description: Software upgrade auto-apply inprogress
3366 Entity_Instance_ID: orchestration=sw-upgrade
3368 Proposed_Repair_Action: Wait for software upgrade auto-apply to complete; if problem persists contact next level of support
3371 Alarm_Type: equipment
3372 Probable_Cause: unspecified-reason
3373 Service_Affecting: True
3375 Management_Affecting_Severity: warning
3376 Degrade_Affecting_Severity: none
3380 Description: Software upgrade auto-apply aborting
3381 Entity_Instance_ID: orchestration=sw-upgrade
3383 Proposed_Repair_Action: Wait for software upgrade auto-apply abort to complete; if problem persists contact next level of support
3386 Alarm_Type: equipment
3387 Probable_Cause: unspecified-reason
3388 Service_Affecting: True
3390 Management_Affecting_Severity: warning
3391 Degrade_Affecting_Severity: none
3395 Description: Software upgrade auto-apply failed. Command "sw-manager update-strategy apply" failed
3396 Entity_Instance_ID: orchestration=sw-upgrade
3398 Proposed_Repair_Action: Attempt to apply software upgrade manually; if problem persists contact next level of support
3401 Alarm_Type: equipment
3402 Probable_Cause: underlying-resource-unavailable
3403 Service_Affecting: True
3405 Management_Affecting_Severity: warning
3406 Degrade_Affecting_Severity: none
3410 Description: Software upgrade auto-apply start
3411 Entity_Instance_ID: orchestration=sw-upgrade
3413 Alarm_Type: equipment
3414 Probable_Cause: unspecified-reason
3415 Service_Affecting: False
3419 Description: Software upgrade auto-apply inprogress
3420 Entity_Instance_ID: orchestration=sw-upgrade
3422 Alarm_Type: equipment
3423 Probable_Cause: unspecified-reason
3424 Service_Affecting: False
3428 Description: Software upgrade auto-apply rejected
3429 Entity_Instance_ID: orchestration=sw-upgrade
3431 Alarm_Type: equipment
3432 Probable_Cause: unspecified-reason
3433 Service_Affecting: False
3437 Description: Software upgrade auto-apply cancelled
3438 Entity_Instance_ID: orchestration=sw-upgrade
3440 Alarm_Type: equipment
3441 Probable_Cause: unspecified-reason
3442 Service_Affecting: False
3446 Description: Software upgrade auto-apply failed
3447 Entity_Instance_ID: orchestration=sw-upgrade
3449 Alarm_Type: equipment
3450 Probable_Cause: unspecified-reason
3451 Service_Affecting: False
3455 Description: Software upgrade auto-apply completed
3456 Entity_Instance_ID: orchestration=sw-upgrade
3458 Alarm_Type: equipment
3459 Probable_Cause: unspecified-reason
3460 Service_Affecting: False
3464 Description: Software upgrade auto-apply abort
3465 Entity_Instance_ID: orchestration=sw-upgrade
3467 Alarm_Type: equipment
3468 Probable_Cause: unspecified-reason
3469 Service_Affecting: False
3473 Description: Software upgrade auto-apply aborting
3474 Entity_Instance_ID: orchestration=sw-upgrade
3476 Alarm_Type: equipment
3477 Probable_Cause: unspecified-reason
3478 Service_Affecting: False
3482 Description: Software upgrade auto-apply abort rejected
3483 Entity_Instance_ID: orchestration=sw-upgrade
3485 Alarm_Type: equipment
3486 Probable_Cause: unspecified-reason
3487 Service_Affecting: False
3491 Description: Software upgrade auto-apply abort failed
3492 Entity_Instance_ID: orchestration=sw-upgrade
3494 Alarm_Type: equipment
3495 Probable_Cause: unspecified-reason
3496 Service_Affecting: False
3500 Description: Software upgrade auto-apply aborted
3501 Entity_Instance_ID: orchestration=sw-upgrade
3503 Alarm_Type: equipment
3504 Probable_Cause: unspecified-reason
3505 Service_Affecting: False
3509 Description: Firmware Update auto-apply inprogress
3510 Entity_Instance_ID: orchestration=fw-update
3512 Proposed_Repair_Action: Wait for firmware update auto-apply to complete; if problem persists contact next level of support
3515 Alarm_Type: equipment
3516 Probable_Cause: unspecified-reason
3517 Service_Affecting: True
3519 Management_Affecting_Severity: warning
3520 Degrade_Affecting_Severity: none
3524 Description: Firmware Update auto-apply aborting
3525 Entity_Instance_ID: orchestration=fw-update
3527 Proposed_Repair_Action: Wait for firmware update auto-apply abort to complete; if problem persists contact next level of support
3530 Alarm_Type: equipment
3531 Probable_Cause: unspecified-reason
3532 Service_Affecting: True
3534 Management_Affecting_Severity: warning
3535 Degrade_Affecting_Severity: none
3539 Description: Firmware Update auto-apply failed. Command "sw-manager kube-rootca-update-strategy apply" failed.
3540 Entity_Instance_ID: orchestration=fw-update
3542 Proposed_Repair_Action: Attempt to apply firmware update manually; if problem persists contact next level of support
3545 Alarm_Type: equipment
3546 Probable_Cause: underlying-resource-unavailable
3547 Service_Affecting: True
3549 Management_Affecting_Severity: warning
3550 Degrade_Affecting_Severity: none
3554 Description: Firmware update auto-apply start
3555 Entity_Instance_ID: orchestration=fw-update
3557 Alarm_Type: equipment
3558 Probable_Cause: unspecified-reason
3559 Service_Affecting: False
3563 Description: Firmware update auto-apply inprogress
3564 Entity_Instance_ID: orchestration=fw-update
3566 Alarm_Type: equipment
3567 Probable_Cause: unspecified-reason
3568 Service_Affecting: False
3572 Description: Firmware update auto-apply rejected
3573 Entity_Instance_ID: orchestration=fw-update
3575 Alarm_Type: equipment
3576 Probable_Cause: unspecified-reason
3577 Service_Affecting: False
3581 Description: Firmware update auto-apply cancelled
3582 Entity_Instance_ID: orchestration=fw-update
3584 Alarm_Type: equipment
3585 Probable_Cause: unspecified-reason
3586 Service_Affecting: False
3590 Description: Firmware update auto-apply failed
3591 Entity_Instance_ID: orchestration=fw-update
3593 Alarm_Type: equipment
3594 Probable_Cause: unspecified-reason
3595 Service_Affecting: False
3599 Description: Firmware update auto-apply completed
3600 Entity_Instance_ID: orchestration=fw-update
3602 Alarm_Type: equipment
3603 Probable_Cause: unspecified-reason
3604 Service_Affecting: False
3608 Description: Firmware update auto-apply abort
3609 Entity_Instance_ID: orchestration=fw-update
3611 Alarm_Type: equipment
3612 Probable_Cause: unspecified-reason
3613 Service_Affecting: False
3617 Description: Firmware update auto-apply aborting
3618 Entity_Instance_ID: orchestration=fw-update
3620 Alarm_Type: equipment
3621 Probable_Cause: unspecified-reason
3622 Service_Affecting: False
3626 Description: Firmware update auto-apply abort rejected
3627 Entity_Instance_ID: orchestration=fw-update
3629 Alarm_Type: equipment
3630 Probable_Cause: unspecified-reason
3631 Service_Affecting: False
3635 Description: Firmware update auto-apply abort failed
3636 Entity_Instance_ID: orchestration=fw-update
3638 Alarm_Type: equipment
3639 Probable_Cause: unspecified-reason
3640 Service_Affecting: False
3644 Description: Firmware update auto-apply aborted
3645 Entity_Instance_ID: orchestration=fw-update
3647 Alarm_Type: equipment
3648 Probable_Cause: unspecified-reason
3649 Service_Affecting: False
3653 Description: Kubernetes upgrade auto-apply inprogress
3654 Entity_Instance_ID: orchestration=kube-upgrade
3656 Proposed_Repair_Action: Wait for kubernetes upgrade auto-apply to complete; if problem persists contact next level of support
3659 Alarm_Type: equipment
3660 Probable_Cause: unspecified-reason
3661 Service_Affecting: True
3663 Management_Affecting_Severity: warning
3664 Degrade_Affecting_Severity: none
3668 Description: Kubernetes upgrade auto-apply aborting
3669 Entity_Instance_ID: orchestration=kube-upgrade
3671 Proposed_Repair_Action: Wait for kubernetes upgrade auto-apply abort to complete; if problem persists contact next level of support
3674 Alarm_Type: equipment
3675 Probable_Cause: unspecified-reason
3676 Service_Affecting: True
3678 Management_Affecting_Severity: warning
3679 Degrade_Affecting_Severity: none
3683 Description: Kubernetes upgrade auto-apply failed
3684 Entity_Instance_ID: orchestration=kube-upgrade
3686 Proposed_Repair_Action: Attempt to apply kubernetes upgrade manually; if problem persists contact next level of support
3689 Alarm_Type: equipment
3690 Probable_Cause: underlying-resource-unavailable
3691 Service_Affecting: True
3693 Management_Affecting_Severity: warning
3694 Degrade_Affecting_Severity: none
3698 Description: Kubernetes upgrade auto-apply start
3699 Entity_Instance_ID: orchestration=kube-upgrade
3701 Alarm_Type: equipment
3702 Probable_Cause: unspecified-reason
3703 Service_Affecting: False
3707 Description: Kubernetes upgrade auto-apply inprogress
3708 Entity_Instance_ID: orchestration=kube-upgrade
3710 Alarm_Type: equipment
3711 Probable_Cause: unspecified-reason
3712 Service_Affecting: False
3716 Description: Kubernetes upgrade auto-apply rejected
3717 Entity_Instance_ID: orchestration=kube-upgrade
3719 Alarm_Type: equipment
3720 Probable_Cause: unspecified-reason
3721 Service_Affecting: False
3725 Description: Kubernetes upgrade auto-apply cancelled
3726 Entity_Instance_ID: orchestration=kube-upgrade
3728 Alarm_Type: equipment
3729 Probable_Cause: unspecified-reason
3730 Service_Affecting: False
3734 Description: Kubernetes upgrade auto-apply failed
3735 Entity_Instance_ID: orchestration=kube-upgrade
3737 Alarm_Type: equipment
3738 Probable_Cause: unspecified-reason
3739 Service_Affecting: False
3743 Description: Kubernetes upgrade auto-apply completed
3744 Entity_Instance_ID: orchestration=kube-upgrade
3746 Alarm_Type: equipment
3747 Probable_Cause: unspecified-reason
3748 Service_Affecting: False
3752 Description: Kubernetes upgrade auto-apply abort
3753 Entity_Instance_ID: orchestration=kube-upgrade
3755 Alarm_Type: equipment
3756 Probable_Cause: unspecified-reason
3757 Service_Affecting: False
3761 Description: Kubernetes upgrade auto-apply aborting
3762 Entity_Instance_ID: orchestration=kube-upgrade
3764 Alarm_Type: equipment
3765 Probable_Cause: unspecified-reason
3766 Service_Affecting: False
3770 Description: Kubernetes upgrade auto-apply abort rejected
3771 Entity_Instance_ID: orchestration=kube-upgrade
3773 Alarm_Type: equipment
3774 Probable_Cause: unspecified-reason
3775 Service_Affecting: False
3779 Description: Kubernetes upgrade auto-apply abort failed
3780 Entity_Instance_ID: orchestration=kube-upgrade
3782 Alarm_Type: equipment
3783 Probable_Cause: unspecified-reason
3784 Service_Affecting: False
3788 Description: Kubernetes upgrade auto-apply aborted
3789 Entity_Instance_ID: orchestration=kube-upgrade
3791 Alarm_Type: equipment
3792 Probable_Cause: unspecified-reason
3793 Service_Affecting: False
3797 Description: Kubernetes rootca update auto-apply inprogress
3798 Entity_Instance_ID: orchestration=kube-rootca-update
3800 Proposed_Repair_Action: Wait for kubernetes rootca update auto-apply to complete; if problem persists contact next level of support
3803 Alarm_Type: equipment
3804 Probable_Cause: unspecified-reason
3805 Service_Affecting: True
3807 Management_Affecting_Severity: warning
3808 Degrade_Affecting_Severity: none
3812 Description: Kubernetes rootca update auto-apply aborting
3813 Entity_Instance_ID: orchestration=kube-rootca-update
3815 Proposed_Repair_Action: Wait for kubernetes rootca update auto-apply abort to complete; if problem persists contact next level of support
3818 Alarm_Type: equipment
3819 Probable_Cause: unspecified-reason
3820 Service_Affecting: True
3822 Management_Affecting_Severity: warning
3823 Degrade_Affecting_Severity: none
3827 Description: Kubernetes rootca update auto-apply failed. Command "sw-manager kube-upgrade-strategy apply" failed.
3828 Entity_Instance_ID: orchestration=kube-rootca-update
3830 Proposed_Repair_Action: Attempt to apply kubernetes rootca update manually; if problem persists contact next level of support
3833 Alarm_Type: equipment
3834 Probable_Cause: underlying-resource-unavailable
3835 Service_Affecting: True
3837 Management_Affecting_Severity: warning
3838 Degrade_Affecting_Severity: none
3842 Description: Kubernetes rootca update auto-apply start
3843 Entity_Instance_ID: orchestration=kube-rootca-update
3845 Alarm_Type: equipment
3846 Probable_Cause: unspecified-reason
3847 Service_Affecting: False
3851 Description: Kubernetes rootca update auto-apply inprogress
3852 Entity_Instance_ID: orchestration=kube-rootca-update
3854 Alarm_Type: equipment
3855 Probable_Cause: unspecified-reason
3856 Service_Affecting: False
3860 Description: Kubernetes rootca update auto-apply rejected
3861 Entity_Instance_ID: orchestration=kube-rootca-update
3863 Alarm_Type: equipment
3864 Probable_Cause: unspecified-reason
3865 Service_Affecting: False
3869 Description: Kubernetes rootca update auto-apply cancelled
3870 Entity_Instance_ID: orchestration=kube-rootca-update
3872 Alarm_Type: equipment
3873 Probable_Cause: unspecified-reason
3874 Service_Affecting: False
3878 Description: Kubernetes rootca update auto-apply failed
3879 Entity_Instance_ID: orchestration=kube-rootca-update
3881 Alarm_Type: equipment
3882 Probable_Cause: unspecified-reason
3883 Service_Affecting: False
3887 Description: Kubernetes rootca update auto-apply completed
3888 Entity_Instance_ID: orchestration=kube-rootca-update
3890 Alarm_Type: equipment
3891 Probable_Cause: unspecified-reason
3892 Service_Affecting: False
3896 Description: Kubernetes rootca update auto-apply abort
3897 Entity_Instance_ID: orchestration=kube-rootca-update
3899 Alarm_Type: equipment
3900 Probable_Cause: unspecified-reason
3901 Service_Affecting: False
3905 Description: Kubernetes rootca update auto-apply aborting
3906 Entity_Instance_ID: orchestration=kube-rootca-update
3908 Alarm_Type: equipment
3909 Probable_Cause: unspecified-reason
3910 Service_Affecting: False
3914 Description: Kubernetes rootca update auto-apply abort rejected
3915 Entity_Instance_ID: orchestration=kube-rootca-update
3917 Alarm_Type: equipment
3918 Probable_Cause: unspecified-reason
3919 Service_Affecting: False
3923 Description: Kubernetes rootca update auto-apply abort failed
3924 Entity_Instance_ID: orchestration=kube-rootca-update
3926 Alarm_Type: equipment
3927 Probable_Cause: unspecified-reason
3928 Service_Affecting: False
3932 Description: Kubernetes rootca update auto-apply aborted
3933 Entity_Instance_ID: orchestration=kube-rootca-update
3935 Alarm_Type: equipment
3936 Probable_Cause: unspecified-reason
3937 Service_Affecting: False