configs/events.yaml

   1 ---
   2
   3 #
   4 # Copyright (c) 2013-2024 Wind River Systems, Inc.
   5 #
   6 # SPDX-License-Identifier: Apache-2.0
   7 #
   8
   9 ############################################################################
  10 #
  11 # ALARM & CUSTOMER LOG DOCUMENTATION
  12 #
  13 ############################################################################
  14
  15 ############################################################################
  16 #
  17 # Record Format ... for documentation
  18 #
  19 # 100.001:
  20 #   Type: < Alarm |  Log >
  21 #   Description: < yaml string >
  22 #                OR
  23 #                [ < yaml string >,      // list of yaml strings
  24 #                  < yaml string >  ]
  25 #                OR
  26 #                critical: < yaml string >     // i.e. dictionary of yaml strings indexed by severity
  27 #                major:    < yaml string >
  28 #                minor:    < yaml string >
  29 #                warning:  < yaml string >
  30 #   Entity_Instance_ID: < yaml string ... e.g. host=<hostname>.interface=<ifname> >
  31 #                       OR
  32 #                       [ < yaml string >,      // list of yaml strings
  33 #                         < yaml string >  ]
  34 #   Severity: < critical |  major |  minor |  warning >
  35 #                       OR
  36 #                       [ critical, major ]      // list of severity values
  37 #   Proposed_Repair_Action: < yaml string >      // NOTE ALARM ONLY FIELD
  38 #                           OR
  39 #                           critical: < yaml string >     // i.e. dictionary of yaml strings indexed by severity
  40 #                           major:    < yaml string >
  41 #                           minor:    < yaml string >
  42 #                           warning:  < yaml string >
  43 #   Maintenance_Action: < yaml string >          // NOTE ALARM ONLY FIELD
  44 #                       OR
  45 #                       critical: < yaml string >     // i.e. dictionary of yaml strings indexed by severity
  46 #                       major:    < yaml string >
  47 #                       minor:    < yaml string >
  48 #                       warning:  < yaml string >
  49 #   Inhibit_Alarms: < True | False >            // NOTE ALARM ONLY FIELD
  50 #   Alarm_Type: < operational-violation | ... >
  51 #   Probable_Cause: < timing-problem | ... >
  52 #                   OR
  53 #                   [ < timing-problem | ... >,      // list of probable-causes
  54 #                     < timing-problem | ... >  ]
  55 #   Service_Affecting: < True | False >
  56 #   Suppression: < True | False >               // NOTE ALARM ONLY FIELD
  57 #   Management_Affecting_Severity: < none | critical | major | minor | warning >
  58 #       // lowest alarm level of this type that will block forced upgrades & orchestration actions
  59 #   Degrade_Affecting_Severity: < none | critical | major | minor >
  60 #       // lowest alarm level of this type sets a host to 'degraded'
  61 #   Context: < none | starlingx | openstack >
  62 #       // Identifies where the alarm/log is used. If it should be ignored by
  63 #       // the documentation generating scripts, the value has to be 'none'.
  64 #       // If any of the other values is used, the alarm/log will be included
  65 #       // in the documentation and classified by the chosen value.
  66 #
  67 #
  68 #   Other Notes:
  69 #      - use general record format above
  70 #      - the only dictionaries allowed are ones indexed by severity
  71 #      - if there are multiple lists in a record,
  72 #        then they should all have the same # of items and corresponding list items represent instance of alarm
  73 #      - if you can't describe the alarm/log based on the above rules,
  74 #        then you can use a multi-line string format
  75 #      - DELETING alarms from events.yaml: alarms should only be deleted when going to a new Titanium Cloud release
  76 #      - if all possible alarm severities are mgmt affecting, the convention is to
  77 #        use 'warning' as the Management_Affecting_Severity, even if warning is not a possible severity for that alarm
  78 #
  79 #   Testing:
  80 #      - Testing of events.yaml can be done by running regular make command
  81 #        and specifying fm-doc:
  82 #                nice -n 20 ionice -c Idle make -C build fm-doc.rebuild
  83 #      - When building, events.yaml will be parsed for correct format, and also
  84 #        to ensure that Alarm IDs defined in constants.py and fmAlarm.h are
  85 #        listed in events.yaml
  86 #
  87 ############################################################################
  88
  89
  90 #---------------------------------------------------------------------------
  91 #   Monitored Resource Alarms
  92 #---------------------------------------------------------------------------
  93
  94
  95 100.101:
  96     Type: Alarm
  97     Description: |-
  98         Platform CPU threshold exceeded; threshold x%, actual y% .
  99              CRITICAL @ 95%
 100              MAJOR    @ 90%
 101     Entity_Instance_ID: host=<hostname>
 102     Severity: [critical, major]
 103     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
 104     Maintenance_Action:
 105         critical: degrade
 106         major: degrade
 107     Inhibit_Alarms:
 108     Alarm_Type: operational-violation
 109     Probable_Cause: threshold-crossed
 110     Service_Affecting: False
 111     Suppression: True
 112     Management_Affecting_Severity: major
 113     Degrade_Affecting_Severity: critical
 114     Context: starlingx
 115
 116 100.102:
 117     Type: Alarm
 118     Description: |-
 119         VSwitch CPU threshold exceeded; threshold x%, actual y% .
 120              CRITICAL @ 95%
 121              MAJOR    @ 90%
 122              MINOR    @ 80%
 123     Entity_Instance_ID: host=<hostname>
 124     Severity: [critical, major, minor]
 125     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
 126     Maintenance_Action:
 127         critical: degrade
 128         major: degrade
 129     Inhibit_Alarms:
 130     Alarm_Type: operational-violation
 131     Probable_Cause: threshold-crossed
 132     Service_Affecting: False
 133     Suppression: True
 134     Management_Affecting_Severity: none
 135     Degrade_Affecting_Severity: none
 136     Context: none
 137
 138 100.103:
 139     Type: Alarm
 140     Description: |-
 141         Memory threshold exceeded; threshold x%, actual y% .
 142              CRITICAL @ 90%
 143              MAJOR    @ 80%
 144     Entity_Instance_ID: |-
 145         host=<hostname>
 146         OR
 147         host=<hostname>.memory=total
 148         OR
 149         host=<hostname>.memory=platform
 150         OR
 151         host=<hostname>.numa=node<number>
 152     Severity: [critical, major]
 153     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support; may require additional memory on Host."
 154     Maintenance_Action:
 155         critical: degrade
 156         major: degrade
 157     Inhibit_Alarms:
 158     Alarm_Type: operational-violation
 159     Probable_Cause: threshold-crossed
 160     Service_Affecting: False
 161     Suppression: True
 162     Management_Affecting_Severity: none
 163     Degrade_Affecting_Severity: critical
 164     Context: starlingx
 165
 166 100.104:    # NOTE This should really be split into two different Alarms.
 167     Type: Alarm
 168     Description: |-
 169         host=<hostname>.filesystem=<mount-dir>
 170             File System threshold exceeded; threshold x%, actual y% .
 171                 CRITICAL @ 90%
 172                 MAJOR    @ 80%
 173         OR
 174         host=<hostname>.volumegroup=<volumegroup-name>
 175             Monitor and if condition persists, consider adding additional physical volumes to the volume group.
 176     Entity_Instance_ID: |-
 177         host=<hostname>.filesystem=<mount-dir>
 178         OR
 179         host=<hostname>.volumegroup=<volumegroup-name>
 180     Severity: [critical, major]
 181     Proposed_Repair_Action: "Reduce usage or resize filesystem."
 182     Maintenance_Action:
 183         critical: degrade
 184         major: degrade
 185     Inhibit_Alarms:
 186     Alarm_Type: operational-violation
 187     Probable_Cause: threshold-crossed
 188     Service_Affecting: False
 189     Suppression: True
 190     Management_Affecting_Severity: critical
 191     Degrade_Affecting_Severity: critical
 192     Context: starlingx
 193
 194 100.105:
 195     Type: Alarm
 196     Description: |-
 197         Filesystem Alarm Condition:
 198         <fs_name> filesystem is not added on both controllers and/or does not have the same size: <hostname>.
 199     Entity_Instance_ID: fs_name=<image-conversion>
 200     Severity: critical
 201     Proposed_Repair_Action: "Add image-conversion filesystem on both controllers.
 202                              See the |prod-long| documentation at |docs-url| for more details.
 203                              If problem persists, contact next level of support."
 204     Maintenance_Action: degrade
 205     Inhibit_Alarms:
 206     Alarm_Type: equipment
 207     Probable_Cause: configuration-or-customization-error
 208     Service_Affecting: True
 209     Suppression: False
 210     Management_Affecting_Severity: major
 211     Degrade_Affecting_Severity: none
 212     Context: openstack
 213
 214 #--------
 215 # 100.105: Retired (with R2 release): previously monitored /etc/nova/instances
 216 #          NFS mount from controller to computes
 217 #--------
 218
 219 100.106:
 220     Type: Alarm
 221     Description: "'OAM' Port failed."
 222     Entity_Instance_ID: host=<hostname>.port=<port-name>
 223     Severity: major
 224     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
 225     Maintenance_Action: degrade
 226     Inhibit_Alarms:
 227     Alarm_Type: operational-violation
 228     Probable_Cause: unknown
 229     Service_Affecting: True
 230     Suppression: True
 231     Management_Affecting_Severity: warning
 232     Degrade_Affecting_Severity: major
 233     Context: starlingx
 234
 235 100.107:
 236     Type: Alarm
 237     Description: |-
 238         'OAM' Interface degraded.
 239         OR
 240         'OAM' Interface failed.
 241     Entity_Instance_ID: host=<hostname>.interface=<if-name>
 242     Severity: [critical, major]
 243     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
 244     Maintenance_Action:
 245         critical: degrade
 246         major: degrade
 247     Inhibit_Alarms:
 248     Alarm_Type: operational-violation
 249     Probable_Cause: unknown
 250     Service_Affecting: True
 251     Suppression: True
 252     Management_Affecting_Severity: warning
 253     Degrade_Affecting_Severity: major
 254     Context: starlingx
 255
 256 100.108:
 257     Type: Alarm
 258     Description: "'MGMT' Port failed."
 259     Entity_Instance_ID: host=<hostname>.port=<port-name>
 260     Severity: major
 261     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
 262     Maintenance_Action: degrade
 263     Inhibit_Alarms:
 264     Alarm_Type: operational-violation
 265     Probable_Cause: unknown
 266     Service_Affecting: True
 267     Suppression: True
 268     Management_Affecting_Severity: warning
 269     Degrade_Affecting_Severity: major
 270     Context: starlingx
 271
 272 100.109:
 273     Type: Alarm
 274     Description: |-
 275         'MGMT' Interface degraded.
 276         OR
 277         'MGMT' Interface failed.
 278     Entity_Instance_ID: host=<hostname>.interface=<if-name>
 279     Severity: [critical, major]
 280     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
 281     Maintenance_Action:
 282         critical: degrade
 283         major: degrade
 284     Inhibit_Alarms:
 285     Alarm_Type: operational-violation
 286     Probable_Cause: unknown
 287     Service_Affecting: True
 288     Suppression: True
 289     Management_Affecting_Severity: warning
 290     Degrade_Affecting_Severity: major
 291     Context: starlingx
 292
 293 100.110:
 294     Type: Alarm
 295     Description: "'CLUSTER-HOST' Port failed."
 296     Entity_Instance_ID: host=<hostname>.port=<port-name>
 297     Severity: major
 298     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
 299     Maintenance_Action: degrade
 300     Inhibit_Alarms:
 301     Alarm_Type: operational-violation
 302     Probable_Cause: unknown
 303     Service_Affecting: True
 304     Suppression: True
 305     Management_Affecting_Severity: warning
 306     Degrade_Affecting_Severity: major
 307     Context: starlingx
 308
 309 100.111:
 310     Type: Alarm
 311     Description: |-
 312         'CLUSTER-HOST' Interface degraded.
 313         OR
 314         'CLUSTER-HOST' Interface failed.
 315     Entity_Instance_ID: host=<hostname>.interface=<if-name>
 316     Severity: [critical, major]
 317     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
 318     Maintenance_Action:
 319         critical: degrade
 320         major: degrade
 321     Inhibit_Alarms:
 322     Alarm_Type: operational-violation
 323     Probable_Cause: unknown
 324     Service_Affecting: True
 325     Suppression: True
 326     Management_Affecting_Severity: warning
 327     Degrade_Affecting_Severity: major
 328     Context: starlingx
 329
 330 100.112:
 331     Type: Alarm
 332     Description: "'DATA-VRS' Port down."
 333     Entity_Instance_ID: host=<hostname>.port=<port-name>
 334     Severity: major
 335     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
 336     Maintenance_Action: degrade
 337     Inhibit_Alarms:
 338     Alarm_Type: operational-violation
 339     Probable_Cause: unknown
 340     Service_Affecting: True
 341     Suppression: True
 342     Management_Affecting_Severity: none
 343     Degrade_Affecting_Severity: major
 344     Context: openstack
 345
 346 100.113:
 347     Type: Alarm
 348     Description: |-
 349         'DATA-VRS' Interface degraded.
 350         OR
 351         'DATA-VRS' Interface down.
 352     Entity_Instance_ID: host=<hostname>.interface=<if-name>
 353     Severity: [critical, major]
 354     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
 355     Maintenance_Action:
 356         major: degrade
 357     Inhibit_Alarms:
 358     Alarm_Type: operational-violation
 359     Probable_Cause: unknown
 360     Service_Affecting: True
 361     Suppression: True
 362     Management_Affecting_Severity: none
 363     Degrade_Affecting_Severity: major
 364     Context: openstack
 365
 366 100.114:
 367     Type: Alarm
 368     Description:
 369         major: "NTP configuration does not contain any valid or reachable NTP servers."
 370         minor: "NTP address <IP address>  is not a valid or a reachable NTP server."
 371     Entity_Instance_ID:
 372         major: host=<hostname>.ntp
 373         minor: host=<hostname>.ntp=<IP address>
 374     Severity: [major, minor]
 375     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
 376     Maintenance_Action: none
 377     Inhibit_Alarms:
 378     Alarm_Type: communication
 379     Probable_Cause: unknown
 380     Service_Affecting: False
 381     Suppression: False
 382     Management_Affecting_Severity: none
 383     Degrade_Affecting_Severity: none
 384     Context: starlingx
 385
 386 100.115:
 387     Type: Alarm
 388     Description: "VSwitch Memory Usage, processor <processor> threshold exceeded; threshold x%, actual y% ."
 389     Entity_Instance_ID: host=<hostname>.processor=<processor>
 390     Severity: [critical, major, minor]
 391     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
 392     Maintenance_Action:
 393         critical: degrade
 394         major: degrade
 395     Inhibit_Alarms:
 396     Alarm_Type: operational-violation
 397     Probable_Cause: threshold-crossed
 398     Service_Affecting: False
 399     Suppression: True
 400     Management_Affecting_Severity: none
 401     Degrade_Affecting_Severity: critical
 402     Context: none
 403
 404 100.116:
 405     Type: Alarm
 406     Description: "Cinder LVM Thinpool Usage threshold exceeded; threshold x%, actual y% ."
 407     Entity_Instance_ID: host=<hostname>
 408     Severity: [critical, major, minor]
 409     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
 410     Maintenance_Action:
 411         critical: degrade
 412         major: degrade
 413     Inhibit_Alarms:
 414     Alarm_Type: operational-violation
 415     Probable_Cause: threshold-crossed
 416     Service_Affecting: False
 417     Suppression: True
 418     Management_Affecting_Severity: none
 419     Degrade_Affecting_Severity: critical
 420     Context: none
 421
 422 100.117:
 423     Type: Alarm
 424     Description: "Nova LVM Thinpool Usage threshold exceeded; threshold x%, actual y% ."
 425     Entity_Instance_ID: host=<hostname>
 426     Severity: [critical, major, minor]
 427     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
 428     Maintenance_Action:
 429         critical: degrade
 430         major: degrade
 431     Inhibit_Alarms:
 432     Alarm_Type: operational-violation
 433     Probable_Cause: threshold-crossed
 434     Service_Affecting: False
 435     Suppression: True
 436     Management_Affecting_Severity: major
 437     Degrade_Affecting_Severity: critical
 438     Context: none
 439
 440 100.118:
 441     Type: Alarm
 442     Description: Controller cannot establish connection with remote logging server.
 443     Entity_Instance_ID: host=<hostname>
 444     Severity: minor
 445     Proposed_Repair_Action: "Ensure Remote Log Server IP is reachable from Controller through OAM interface; otherwise contact next level of support."
 446     Maintenance_Action: none
 447     Inhibit_Alarms: False
 448     Alarm_Type: communication
 449     Probable_Cause: communication-subsystem-failure
 450     Service_Affecting: False
 451     Suppression: False
 452     Management_Affecting_Severity: none
 453     Degrade_Affecting_Severity: none
 454     Context: starlingx
 455
 456 100.119:
 457     Type: Alarm
 458     Description: |-
 459         <hostname> does not support the provisioned PTP mode
 460         OR
 461         <hostname> PTP clocking is out-of-tolerance
 462         OR
 463         <hostname> is not locked to remote PTP Primary source
 464         OR
 465         <hostname> GNSS signal loss state:<state>
 466         OR
 467         <hostname> 1PPS signal loss state:<state>
 468     Entity_Instance_ID: |-
 469         host=<hostname>.ptp
 470         OR
 471         host=<hostname>.ptp=no-lock
 472         OR
 473         host=<hostname>.ptp=<interface>.unsupported=hardware-timestamping
 474         OR
 475         host=<hostname>.ptp=<interface>.unsupported=software-timestamping
 476         OR
 477         host=<hostname>.ptp=<interface>.unsupported=legacy-timestamping
 478         OR
 479         host=<hostname>.ptp=out-of-tolerance
 480         OR
 481         host=<hostname>.instance=<instance>.ptp=out-of-tolerance
 482         OR
 483         host=<hostname>.interface=<interface>.ptp=signal-loss
 484     Severity: [major, minor]
 485     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
 486     Maintenance_Action: none
 487     Inhibit_Alarms:
 488     Alarm_Type: communication
 489     Probable_Cause: unknown
 490     Service_Affecting: False
 491     Suppression: False
 492     Management_Affecting_Severity: none
 493     Degrade_Affecting_Severity: none
 494     Context: starlingx
 495
 496 100.120:
 497     Type: Alarm
 498     Description: Controllers running mismatched kernels.
 499     Entity_Instance_ID: host=<hostname>.kernel=<kernel>
 500     Severity: minor
 501     Proposed_Repair_Action: "Modify controllers using 'system host-kernel-modify' so that both are running the desired 'standard' or 'lowlatency' kernel."
 502     Maintenance_Action: none
 503     Inhibit_Alarms: False
 504     Alarm_Type: equipment
 505     Probable_Cause: unspecified-reason
 506     Service_Affecting: False
 507     Suppression: False
 508     Management_Affecting_Severity: none
 509     Degrade_Affecting_Severity: none
 510     Context: starlingx
 511
 512 100.121:
 513     Type: Alarm
 514     Description: Host not running the provisioned kernel.
 515     Entity_Instance_ID: host=<hostname>.kernel=<kernel>
 516     Severity: major
 517     Proposed_Repair_Action: "Retry 'system host-kernel-modify' and if condition persists, contact next level of support."
 518     Maintenance_Action: none
 519     Inhibit_Alarms: False
 520     Alarm_Type: equipment
 521     Probable_Cause: unspecified-reason
 522     Service_Affecting: False
 523     Suppression: False
 524     Management_Affecting_Severity: major
 525     Degrade_Affecting_Severity: none
 526     Context: starlingx
 527
 528 100.150:
 529     Type: Alarm
 530     Description:
 531         critical: "service open file descriptor has reached its limit"
 532         major: "service open file descriptor is approaching to its limit"
 533     Entity_Instance_ID: |-
 534         host=<hostname>.resource_type=file-descriptor.service_name=<service-name>
 535     Severity: [critical, major]
 536     Proposed_Repair_Action: "swact to the other controller if it is available"
 537     Maintenance_Action: none
 538     Inhibit_Alarms:
 539     Alarm_Type: operational-violation
 540     Probable_Cause: threshold-crossed
 541     Service_Affecting: True
 542     Suppression: False
 543     Management_Affecting_Severity: critical
 544     Degrade_Affecting_Severity: critical
 545     Context: starlingx
 546
 547
 548 #---------------------------------------------------------------------------
 549 #   MAINTENANCE
 550 #---------------------------------------------------------------------------
 551
 552
 553 200.001:
 554     Type: Alarm
 555     Description: <hostname> was administratively locked to take it out-of-service.
 556     Entity_Instance_ID: host=<hostname>
 557     Severity: warning
 558     Proposed_Repair_Action: Administratively unlock Host to bring it back in-service.
 559     Maintenance_Action: none
 560     Inhibit_Alarms: True
 561     Alarm_Type: operational-violation
 562     Probable_Cause: out-of-service
 563     Service_Affecting: True
 564     Suppression: False
 565     Management_Affecting_Severity: warning
 566     Degrade_Affecting_Severity: none
 567     Context: starlingx
 568
 569 200.003:
 570     Type: Alarm
 571     Description: <hostname> pxeboot network communication failure.
 572     Entity_Instance_ID: host=<hostname>
 573     Severity: minor
 574     Proposed_Repair_Action: Administratively Lock and Unlock host to recover. If problem persists, contact next level of support.
 575     Maintenance_Action: none
 576     Inhibit_Alarms: False
 577     Alarm_Type: communication
 578     Probable_Cause: unknown
 579     Service_Affecting: False
 580     Suppression: False
 581     Management_Affecting_Severity: warning
 582     Degrade_Affecting_Severity: none
 583     Context: starlingx
 584
 585 200.004:
 586     Type: Alarm
 587     Description: |-
 588         <hostname> experienced a service-affecting failure.
 589         Host is being auto recovered by Reboot.
 590     Entity_Instance_ID: host=<hostname>
 591     Severity: critical
 592     Proposed_Repair_Action: If auto-recovery is consistently unable to recover host to the unlocked-enabled state contact next level of support or lock and replace failing host.
 593     Maintenance_Action: auto recover
 594     Inhibit_Alarms: False
 595     Alarm_Type: operational-violation
 596     Probable_Cause: application-subsystem-failure
 597     Service_Affecting: True
 598     Suppression: True
 599     Management_Affecting_Severity: warning
 600     Degrade_Affecting_Severity: none
 601     Context: starlingx
 602
 603 200.011:
 604     Type: Alarm
 605     Description: <hostname> experienced a configuration failure during initialization. Host is being re-configured by Reboot.
 606     Entity_Instance_ID: host=<hostname>
 607     Severity: critical
 608     Proposed_Repair_Action: If auto-recovery is consistently unable to recover host to the unlocked-enabled state contact next level of support or lock and replace failing host.
 609     Maintenance_Action: auto-recover
 610     Inhibit_Alarms: False
 611     Alarm_Type: operational-violation
 612     Probable_Cause: configuration-or-customization-error
 613     Service_Affecting: True
 614     Suppression: True
 615     Management_Affecting_Severity: warning
 616     Degrade_Affecting_Severity: none
 617     Context: starlingx
 618
 619 200.010:
 620     Type: Alarm
 621     Description: <hostname> access to board management module has failed.
 622     Entity_Instance_ID: host=<hostname>
 623     Severity: warning
 624     Proposed_Repair_Action: Check Host's board management configuration and connectivity.
 625     Maintenance_Action: auto recover
 626     Inhibit_Alarms: False
 627     Alarm_Type: operational-violation
 628     Probable_Cause: communication-subsystem-failure
 629     Service_Affecting: False
 630     Suppression: False
 631     Management_Affecting_Severity: none
 632     Degrade_Affecting_Severity: none
 633     Context: starlingx
 634
 635 200.013:
 636     Type: Alarm
 637     Description: <hostname> compute service of the only available controller is not proportional. Auto-recovery is disabled. Degrading host instead.
 638     Entity_Instance_ID: host=<hostname>
 639     Severity: major
 640     Proposed_Repair_Action: Enable second controller and Switch Activity (Swact) over to it as soon as possible. Then Lock and Unlock host to recover its local compute service.
 641     Maintenance_Action: "degrade - requires manual action"
 642     Inhibit_Alarms: False
 643     Alarm_Type: operational-violation
 644     Probable_Cause: communication-subsystem-failure
 645     Service_Affecting: True
 646     Suppression: True
 647     Management_Affecting_Severity: warning
 648     Degrade_Affecting_Severity: major
 649     Context: starlingx
 650
 651 200.005:
 652     Type: Alarm
 653     Description: |-
 654         Degrade:
 655         <hostname> is experiencing an intermittent 'Management Network' communication failure that have exceeded its lower alarming threshold.
 656
 657         Failure:
 658         <hostname> is experiencing a persistent critical 'Management Network' communication failure."
 659     Entity_Instance_ID: host=<hostname>
 660     Severity: [critical, major]
 661     Proposed_Repair_Action: "Check 'Management Network' connectivity and support for multicast messaging. If problem consistently occurs after that and Host is reset, then contact next level of support or lock and replace failing host."
 662     Maintenance_Action: auto recover
 663     Inhibit_Alarms: False
 664     Alarm_Type: communication
 665     Probable_Cause: unknown
 666     Service_Affecting: True
 667     Suppression: True
 668     Management_Affecting_Severity: warning
 669     Degrade_Affecting_Severity: none
 670     Context: starlingx
 671
 672 200.009:
 673     Type: Alarm
 674     Description: |-
 675         Degrade:
 676         <hostname> is experiencing an intermittent 'Cluster-host Network'  communication failures that have exceeded its lower alarming threshold.
 677
 678         Failure:
 679         <hostname> is experiencing a persistent critical 'Cluster-host Network' communication failure."
 680     Entity_Instance_ID: host=<hostname>
 681     Severity: [critical, major]
 682     Proposed_Repair_Action: "Check 'Cluster-host Network' connectivity and support for multicast messaging. If problem consistently occurs after that and Host is reset, then contact next level of support or lock and replace failing host."
 683     Maintenance_Action: auto recover
 684     Inhibit_Alarms: False
 685     Alarm_Type: communication
 686     Probable_Cause: unknown
 687     Service_Affecting: True
 688     Suppression: True
 689     Management_Affecting_Severity: warning
 690     Degrade_Affecting_Severity: none
 691     Context: starlingx
 692
 693
 694 200.006:
 695     Type: Alarm
 696     Description: |-
 697         Main Process Monitor Daemon Failure (major):
 698             <hostname> 'Process Monitor' (pmond) process is not running or functioning properly. The system is trying to recover this process.
 699
 700         Monitored Process Failure (critical/major/minor):
 701             Critical: <hostname> critical '<processname>' process has failed and could not be auto-recovered gracefully.
 702                       Auto-recovery progression by host reboot is required and in progress.
 703             Major:    <hostname> is degraded due to the failure of its '<processname>' process. Auto recovery of this major process is in progress.
 704             Minor:    <hostname> '<processname>' process has failed. Auto recovery of this minor process is in progress.
 705                       OR
 706                       <hostname> '<processname>' process has failed. Manual recovery is required.
 707     Entity_Instance_ID: host=<hostname>.process=<processname>
 708     Severity: [critical, major, minor]
 709     Proposed_Repair_Action: |-
 710         If this alarm does not automatically clear after some time and continues to be asserted after Host is locked and unlocked then contact next level of support for root cause analysis and recovery.
 711
 712         If problem consistently occurs after Host is locked and unlocked then contact next level of support for root cause analysis and recovery."
 713     Maintenance_Action:
 714         critical: auto-recover
 715         major: degrade
 716         minor:
 717     Inhibit_Alarms: False
 718     Alarm_Type: operational-violation
 719     Probable_Cause: unknown
 720     Service_Affecting:
 721         critical: True
 722         major: True
 723         minor: False
 724     Suppression: True
 725     Management_Affecting_Severity: warning
 726     Degrade_Affecting_Severity: major
 727     Context: starlingx
 728
 729 # 200.006:      // NOTE using duplicate ID of a completely analogous Alarm for this
 730 #     Type: Log
 731 #     Description: |-
 732 #         Main Process Monitor Daemon Failure (major)
 733 #             <hostname> 'Process Monitor' (pmond) process is not running or functioning properly.
 734 #             The system is trying to recover this process.
 735 #
 736 #         Monitored Process Failure (critical/major/minor)
 737 #             critical: <hostname> critical '<processname>' process has failed and could not be auto-recovered gracefully.
 738 #                       Auto-recovery progression by host reboot is required and in progress.
 739 #             major:    <hostname> is degraded due to the failure of its '<processname>' process. Auto recovery of this major process is in progress.
 740 #             minor:    <hostname> '<processname>' process has failed. Auto recovery of this minor process is in progress.
 741 #                       OR
 742 #                       <hostname> '<processname>' process has failed. Manual recovery is required.
 743 #     Entity_Instance_ID: host=<hostname>.process=<process-name>
 744 #     Severity: minor
 745 #     Alarm_Type: other
 746 #     Probable_Cause: unspecified-reason
 747 #     Service_Affecting: True
 748
 749
 750 200.007:
 751     Type: Alarm
 752     Description:
 753         critical: "Host is degraded due to a 'critical' out-of-tolerance reading from  the '<sensorname>' sensor"
 754         major: "Host is degraded due to a 'major' out-of-tolerance reading from  the '<sensorname>' sensor"
 755         minor: "Host is reporting a 'minor' out-of-tolerance reading from the '<sensorname>' sensor"
 756     Entity_Instance_ID: host=<hostname>.sensor=<sensorname>
 757     Severity: [critical, major, minor]
 758     Proposed_Repair_Action: "If problem consistently occurs after Host is power cycled and or reset, contact next level of support or lock and replace failing host."
 759     Maintenance_Action:
 760         critical: degrade
 761         major: degrade
 762         minor: auto-recover (polling)
 763     Inhibit_Alarms:
 764     Alarm_Type: operational-violation
 765     Probable_Cause: unspecified-reason
 766     Service_Affecting:
 767         critical: True
 768         major: False
 769         minor: False
 770     Suppression: True
 771     Management_Affecting_Severity: none
 772     Degrade_Affecting_Severity: critical
 773     Context: starlingx
 774
 775 200.014:
 776     Type: Alarm
 777     Description: The Hardware Monitor was unable to load, configure and monitor one or more hardware sensors.
 778     Entity_Instance_ID: host=<hostname>
 779     Severity: minor
 780     Proposed_Repair_Action: Check Board Management Controller provisioning. Try reprovisioning the BMC. If problem persists, try power cycling the host and then the entire server including the BMC power. If problem persists, then contact next level of support.
 781     Maintenance_Action: None
 782     Inhibit_Alarms: False
 783     Alarm_Type: operational-violation
 784     Probable_Cause: unknown
 785     Service_Affecting: False
 786     Suppression: True
 787     Management_Affecting_Severity: none
 788     Degrade_Affecting_Severity: none
 789     Context: starlingx
 790
 791 200.015:
 792     Type: Alarm
 793     Description: Unable to read one or more sensor groups from this host's board management controller
 794     Entity_Instance_ID: host=<hostname>
 795     Severity: major
 796     Proposed_Repair_Action: Check board management connectivity and try rebooting the board management controller. If problem persists, contact next level of support or lock and replace failing host.
 797     Maintenance_Action: None
 798     Inhibit_Alarms: False
 799     Alarm_Type: operational-violation
 800     Probable_Cause: unknown
 801     Service_Affecting: False
 802     Suppression: False
 803     Management_Affecting_Severity: none
 804     Degrade_Affecting_Severity: none
 805     Context: starlingx
 806
 807 200.016:
 808     Type: Alarm
 809     Description: Issue in creation or unsealing of LUKS volume
 810     Entity_Instance_ID: host=<hostname>
 811     Severity: critical
 812     Proposed_Repair_Action: If auto-recovery is consistently unable to recover host to the unlocked-enabled state contact next level of support or lock and replace failing host.
 813     Maintenance_Action: None
 814     Inhibit_Alarms: False
 815     Alarm_Type: operational-violation
 816     Probable_Cause: unknown
 817     Service_Affecting: False
 818     Suppression: False
 819     Management_Affecting_Severity: major
 820     Degrade_Affecting_Severity: none
 821     Context: starlingx
 822
 823 200.020:
 824     Type: Log
 825     Description: |-
 826         <hostname> has been 'discovered' on the network
 827         OR
 828         <hostname> has been 'added' to the system
 829         OR
 830         <hostname> has 'entered' multi-node failure avoidance
 831         OR
 832         <hostname> has 'exited' multi-node failure avoidance
 833     Entity_Instance_ID:
 834         host=<hostname>.event=discovered
 835         OR
 836         host=<hostname>.event=add
 837         OR
 838         host=<hostname>.event=mnfa_enter
 839         OR
 840         host=<hostname>.event=mnfa_exit
 841     Severity: warning
 842     Alarm_Type: other
 843     Probable_Cause: unspecified-reason
 844     Service_Affecting: True
 845     Context: starlingx
 846
 847
 848 200.021:
 849     Type: Log
 850     Description: |-
 851         <hostname> board management controller has been 'provisioned'
 852         OR
 853         <hostname> board management controller has been 're-provisioned'
 854         OR
 855         <hostname> board management controller has been 'de-provisioned'
 856         OR
 857         <hostname> manual 'unlock' request
 858         OR
 859         <hostname> manual 'reboot' request
 860         OR
 861         <hostname> manual 'reset' request
 862         OR
 863         <hostname> manual 'power-off' request
 864         OR
 865         <hostname> manual 'power-on' request
 866         OR
 867         <hostname> manual 'reinstall' request
 868         OR
 869         <hostname> manual 'force-lock' request
 870         OR
 871         <hostname> manual 'delete' request
 872         OR
 873         <hostname> manual 'controller switchover' request
 874     Entity_Instance_ID: |-
 875         host=<hostname>.command=provision
 876         OR
 877         host=<hostname>.command=reprovision
 878         OR
 879         host=<hostname>.command=deprovision
 880         OR
 881         host=<hostname>.command=unlock
 882         OR
 883         host=<hostname>.command=reboot
 884         OR
 885         host=<hostname>.command=reset
 886         OR
 887         host=<hostname>.command=power-off
 888         OR
 889         host=<hostname>.command=power-on
 890         OR
 891         host=<hostname>.command=reinstall
 892         OR
 893         host=<hostname>.command=force-lock
 894         OR
 895         host=<hostname>.command=delete
 896         OR
 897         host=<hostname>.command=swact
 898     Severity: warning
 899     Alarm_Type: other
 900     Probable_Cause: unspecified-reason
 901     Service_Affecting: False
 902     Context: starlingx
 903
 904
 905 200.022:
 906     Type: Log
 907     Description: |-
 908         <hostname> is now 'disabled'
 909         OR
 910         <hostname> is now 'enabled'
 911         OR
 912         <hostname> is now 'online'
 913         OR
 914         <hostname> is now 'offline'
 915         OR
 916         <hostname> is 'disabled-failed' to the system
 917         OR
 918         <hostname> reinstall failed
 919         OR
 920         <hostname> reinstall completed successfully
 921     Entity_Instance_ID: |-
 922         host=<hostname>.state=disabled
 923         OR
 924         host=<hostname>.state=enabled
 925         OR
 926         host=<hostname>.status=online
 927         OR
 928         host=<hostname>.status=offline
 929         OR
 930         host=<hostname>.status=failed
 931         OR
 932         host=<hostname>.status=reinstall-failed
 933         OR
 934         host=<hostname>.status=reinstall-complete
 935     Severity: warning
 936     Alarm_Type: other
 937     Probable_Cause: unspecified-reason
 938     Service_Affecting: True
 939     Context: starlingx
 940
 941
 942 #---------------------------------------------------------------------------
 943 #   BACKUP AND RESTORE
 944 #---------------------------------------------------------------------------
 945
 946 210.001:
 947     Type: Alarm
 948     Description: System Backup in progress.
 949     Entity_Instance_ID: host=controller
 950     Severity: minor
 951     Proposed_Repair_Action: No action required.
 952     Maintenance_Action:
 953     Inhibit_Alarms:
 954     Alarm_Type: operational-violation
 955     Probable_Cause: unspecified-reason
 956     Service_Affecting: False
 957     Suppression: False
 958     Management_Affecting_Severity: warning
 959     Degrade_Affecting_Severity: none
 960     Context: starlingx
 961
 962 210.002:
 963     Type: Alarm
 964     Description: System Restore in progress.
 965     Entity_Instance_ID: host=controller
 966     Severity: minor
 967     Proposed_Repair_Action: Run 'system restore-complete' to complete restore if running restore manually.
 968     Maintenance_Action:
 969     Inhibit_Alarms:
 970     Alarm_Type: operational-violation
 971     Probable_Cause: unspecified-reason
 972     Service_Affecting: False
 973     Suppression: False
 974     Management_Affecting_Severity: warning
 975     Degrade_Affecting_Severity: none
 976     Context: starlingx
 977
 978
 979 #---------------------------------------------------------------------------
 980 #   SYSTEM CONFIGURATION
 981 #---------------------------------------------------------------------------
 982
 983 250.001:
 984     Type: Alarm
 985     Description: <hostname> Configuration is out-of-date.
 986     Entity_Instance_ID: host=<hostname>
 987     Severity: major
 988     Proposed_Repair_Action: Administratively lock and unlock <hostname>  to update config.
 989     Maintenance_Action:
 990     Inhibit_Alarms:
 991     Alarm_Type: operational-violation
 992     Probable_Cause: unspecified-reason
 993     Service_Affecting: True
 994     Suppression: False
 995     Management_Affecting_Severity: warning
 996     Degrade_Affecting_Severity: none
 997     Context: starlingx
 998
 999
1000 250.003:
1001     Type: Alarm
1002     Description: "Kubernetes certificates rotation failed on host[, reason = <reason_text>]"
1003     Entity_Instance_ID: host=<hostname>
1004     Severity: major
1005     Proposed_Repair_Action: Lock and unlock the host to update services with new certificates (Manually renew kubernetes certificates first if renewal failed).
1006     Maintenance_Action:
1007     Inhibit_Alarms:
1008     Alarm_Type: operational-violation
1009     Probable_Cause: unspecified-reason
1010     Service_Affecting: False
1011     Suppression: False
1012     Management_Affecting_Severity: warning
1013     Degrade_Affecting_Severity: none
1014     Context: starlingx
1015
1016 250.004:
1017     Type: Alarm
1018     Description: "IPsec certificates renewal failed on host[, reason = <reason_text>]"
1019     Entity_Instance_ID: host=<hostname>
1020     Severity: major
1021     Proposed_Repair_Action: Check cron.log and ipsec-auth.log, fix the issue and rerun the renewal cron job.
1022     Maintenance_Action:
1023     Inhibit_Alarms:
1024     Alarm_Type: operational-violation
1025     Probable_Cause: unspecified-reason
1026     Service_Affecting: False
1027     Suppression: False
1028     Management_Affecting_Severity: warning
1029     Degrade_Affecting_Severity: none
1030     Context: starlingx
1031
1032 #---------------------------------------------------------------------------
1033 #   DEPLOYMENT
1034 #---------------------------------------------------------------------------
1035 260.001:
1036     Type: Alarm
1037     Description: "Deployment resource not reconciled: <name>"
1038     Entity_Instance_ID: resource=<crd-resource>,name=<resource-name>
1039     Severity: major
1040     Proposed_Repair_Action: Monitor and if condition persists, validate deployment configuration.
1041     Maintenance_Action:
1042     Inhibit_Alarms:
1043     Alarm_Type: operational-violation
1044     Probable_Cause: configuration-out-of-date
1045     Service_Affecting: True
1046     Suppression: True
1047     Management_Affecting_Severity: warning
1048     Degrade_Affecting_Severity: none
1049     Context: starlingx
1050
1051 260.002:
1052     Type: Alarm
1053     Description: "Deployment resource not synchronized: <name>"
1054     Entity_Instance_ID: resource=<crd-resource>,name=<resource-name>
1055     Severity: minor
1056     Proposed_Repair_Action: Monitor and if condition persists, validate deployment configuration.
1057     Maintenance_Action:
1058     Inhibit_Alarms:
1059     Alarm_Type: operational-violation
1060     Probable_Cause: configuration-out-of-date
1061     Service_Affecting: False
1062     Suppression: True
1063     Management_Affecting_Severity: none
1064     Degrade_Affecting_Severity: none
1065     Context: starlingx
1066
1067 #---------------------------------------------------------------------------
1068 #   VM Compute Services
1069 #---------------------------------------------------------------------------
1070 270.101:
1071     Type: Log
1072     Description: "Host <host_name> compute services failure[, reason = <reason_text>]"
1073     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1074     Severity: critical
1075     Alarm_Type: equipment
1076     Probable_Cause: unspecified-reason
1077     Service_Affecting: False
1078     Context: none
1079
1080 270.102:
1081     Type: Log
1082     Description: Host <host_name> compute services enabled
1083     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1084     Severity: critical
1085     Alarm_Type: equipment
1086     Probable_Cause: unspecified-reason
1087     Service_Affecting: False
1088     Context: none
1089
1090 270.103:
1091     Type: Log
1092     Description: Host <host_name> compute services disabled
1093     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1094     Severity: critical
1095     Alarm_Type: equipment
1096     Probable_Cause: unspecified-reason
1097     Service_Affecting: False
1098     Context: none
1099
1100
1101 275.001:
1102     Type: Log
1103     Description: Host <host_name> hypervisor is now <administrative_state>-<operational_state>
1104     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1105     Severity: critical
1106     Alarm_Type: equipment
1107     Probable_Cause: unspecified-reason
1108     Service_Affecting: False
1109     Context: none
1110
1111
1112 #---------------------------------------------------------------------------
1113 #   DISTRIBUTED CLOUD
1114 #---------------------------------------------------------------------------
1115
1116 280.001:
1117     Type: Alarm
1118     Description: <subcloud> is offline
1119     Entity_Instance_ID: subcloud=<subcloud>
1120     Severity: critical
1121     Proposed_Repair_Action: Wait for subcloud to become online; if problem persists contact next level of support
1122     Maintenance_Action:
1123     Inhibit_Alarms:
1124     Alarm_Type: communication
1125     Probable_Cause: loss-of-signal
1126     Service_Affecting: False
1127     Suppression: False
1128     Management_Affecting_Severity: none
1129     Degrade_Affecting_Severity: none
1130     Context: starlingx
1131
1132 280.002:
1133     Type: Alarm
1134     Description: <subcloud> <resource> sync_status is out-of-sync
1135     Entity_Instance_ID: [subcloud=<subcloud>.resource=<compute | network | platform | volumev2>]
1136     Severity: major
1137     Proposed_Repair_Action: If problem persists contact next level of support
1138     Maintenance_Action:
1139     Inhibit_Alarms:
1140     Alarm_Type: other
1141     Probable_Cause: application-subsystem-failure
1142     Service_Affecting: False
1143     Suppression: False
1144     Management_Affecting_Severity: none
1145     Degrade_Affecting_Severity: none
1146     Context: starlingx
1147
1148 280.003:
1149     Type: Alarm
1150     Description: Subcloud backup failure
1151     Entity_Instance_ID: subcloud=<subcloud>
1152     Severity: minor
1153     Proposed_Repair_Action: Retry subcloud backup after checking backup input file. If problem persists, contact next level of support.
1154     Maintenance_Action:
1155     Inhibit_Alarms:
1156     Alarm_Type: processing-error
1157     Probable_Cause: unknown
1158     Service_Affecting: False
1159     Suppression: False
1160     Management_Affecting_Severity: none
1161     Degrade_Affecting_Severity: none
1162     Context: none
1163
1164 280.004:
1165     Type: Alarm
1166     Description: |-
1167         Critical: Peer <peer_uuid> is in disconnected state. The following subcloud peer groups are impacted: <peer-groups>.
1168         Major:    Peer <peer_uuid> connections in disconnected state.
1169     Entity_Instance_ID: |-
1170         peer=<peer_uuid>
1171     Severity: [critical, major]
1172     Proposed_Repair_Action: "Check the connectivity between the current system and the reported peer site. If the peer system is down, migrate the affected peer group(s) to the current system for continued subcloud management."
1173     Maintenance_Action:
1174     Inhibit_Alarms:
1175     Alarm_Type: communication
1176     Probable_Cause: unknown
1177     Service_Affecting: False
1178     Suppression: True
1179     Management_Affecting_Severity: none
1180     Degrade_Affecting_Severity: none
1181     Context: starlingx
1182
1183 280.005:
1184     Type: Alarm
1185     Description: |-
1186         Subcloud peer group <peer_group_name> is managed by remote system <peer_uuid> with a lower priority.
1187     Entity_Instance_ID: peer_group=<peer_group_name>,peer=<peer_uuid>
1188     Severity: [major]
1189     Proposed_Repair_Action: "Check the reported peer group state. Migrate it back to the current system if the state is 'rehomed' and the current system is stable. Otherwise, wait until these conditions are met."
1190     Maintenance_Action:
1191     Inhibit_Alarms: False
1192     Alarm_Type: other
1193     Probable_Cause: unknown
1194     Service_Affecting: False
1195     Suppression: True
1196     Management_Affecting_Severity: none
1197     Degrade_Affecting_Severity: none
1198     Context: starlingx
1199
1200 #---------------------------------------------------------------------------
1201 #   NETWORK
1202 #---------------------------------------------------------------------------
1203
1204 300.001:
1205     Type: Alarm
1206     Description: "'Data' Port failed."
1207     Entity_Instance_ID: host=<hostname>.port=<port-uuid>
1208     Severity: major
1209     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1210     Maintenance_Action:
1211     Inhibit_Alarms:
1212     Alarm_Type: equipment
1213     Probable_Cause: loss-of-signal
1214     Service_Affecting: True
1215     Suppression: False
1216     Management_Affecting_Severity: warning
1217     Degrade_Affecting_Severity: none
1218     Context: starlingx
1219
1220
1221 300.002:
1222     Type: Alarm
1223     Description: |-
1224         'Data' Interface degraded.
1225         OR
1226         'Data' Interface failed.
1227     Entity_Instance_ID: host=<hostname>.interface=<if-uuid>
1228     Severity: [critical, major]
1229     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1230     Maintenance_Action:
1231     Inhibit_Alarms:
1232     Alarm_Type: equipment
1233     Probable_Cause: loss-of-signal
1234     Service_Affecting: True
1235     Suppression: False
1236     Management_Affecting_Severity: warning
1237     Degrade_Affecting_Severity: critical
1238     Context: openstack
1239
1240
1241 300.003:
1242     Type: Alarm
1243     Description: Networking Agent not responding.
1244     Entity_Instance_ID: host=<hostname>.agent=<agent-uuid>
1245     Severity: major
1246     Proposed_Repair_Action: "If condition persists, attempt to clear issue by administratively locking and unlocking the Host."
1247     Maintenance_Action:
1248     Inhibit_Alarms:
1249     Alarm_Type: operational-violation
1250     Probable_Cause: underlying-resource-unavailable
1251     Service_Affecting: True
1252     Suppression: False
1253     Management_Affecting_Severity: warning
1254     Degrade_Affecting_Severity: none
1255     Context: openstack
1256
1257
1258 300.004:
1259     Type: Alarm
1260     Description: No enabled compute host with connectivity to provider network.
1261     Entity_Instance_ID: service=networking.providernet=<pnet-uuid>
1262     Severity: major
1263     Proposed_Repair_Action: Enable compute hosts with required provider network connectivity.
1264     Maintenance_Action:
1265     Inhibit_Alarms:
1266     Alarm_Type: operational-violation
1267     Probable_Cause: underlying-resource-unavailable
1268     Service_Affecting: True
1269     Suppression: False
1270     Management_Affecting_Severity: warning
1271     Degrade_Affecting_Severity: none
1272     Context: openstack
1273
1274
1275 300.005:
1276     Type: Alarm
1277     Description: |-
1278         Communication failure detected over provider network x% for ranges y% on host z%.
1279         OR
1280         Communication failure detected over provider network x% on host z%.
1281     Entity_Instance_ID: host=<hostname>.service=networking.providernet=<pnet-uuid>
1282     Severity: major
1283     Proposed_Repair_Action: Check neighbor switch port VLAN assignments.
1284     Maintenance_Action:
1285     Inhibit_Alarms:
1286     Alarm_Type: operational-violation
1287     Probable_Cause: underlying-resource-unavailable
1288     Service_Affecting: True
1289     Suppression: False
1290     Management_Affecting_Severity: warning
1291     Degrade_Affecting_Severity: none
1292     Context: openstack
1293
1294
1295 300.010:
1296     Type: Alarm
1297     Description: |-
1298         ML2 Driver Agent non-reachable
1299         OR
1300         ML2 Driver Agent reachable but non-responsive
1301         OR
1302         ML2 Driver Agent authentication failure
1303         OR
1304         ML2 Driver Agent is unable to sync Neutron database
1305     Entity_Instance_ID: host=<hostname>.ml2driver=<driver>
1306     Severity: major
1307     Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
1308     Maintenance_Action:
1309     Inhibit_Alarms:
1310     Alarm_Type: processing-error
1311     Probable_Cause: underlying-resource-unavailable
1312     Service_Affecting: True
1313     Suppression: True
1314     Management_Affecting_Severity: warning
1315     Degrade_Affecting_Severity: none
1316     Context: openstack
1317
1318
1319 300.012:
1320     Type: Alarm
1321     Description: "Openflow Controller connection failed."
1322     Entity_Instance_ID: host=<hostname>.openflow-controller=<uri>
1323     Severity: major
1324     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1325     Maintenance_Action:
1326     Inhibit_Alarms:
1327     Alarm_Type: equipment
1328     Probable_Cause: loss-of-signal
1329     Service_Affecting: True
1330     Suppression: False
1331     Management_Affecting_Severity: warning
1332     Degrade_Affecting_Severity: critical
1333     Context: openstack
1334
1335
1336 300.013:
1337     Type: Alarm
1338     Description: |-
1339         No active Openflow controller connections found for this network.
1340         OR
1341         One or more Openflow controller connections in disconnected state for this network.
1342     Entity_Instance_ID: host=<hostname>.openflow-network=<name>
1343     Severity: [critical, major]
1344     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1345     Maintenance_Action:
1346     Inhibit_Alarms:
1347     Alarm_Type: equipment
1348     Probable_Cause: loss-of-signal
1349     Service_Affecting: True
1350     Suppression: False
1351     Management_Affecting_Severity: warning
1352     Degrade_Affecting_Severity: critical
1353     Context: openstack
1354
1355
1356 300.014:
1357     Type: Alarm
1358     Description: "OVSDB Manager connection failed."
1359     Entity_Instance_ID: host=<hostname>.sdn-controller=<uuid>
1360     Severity: major
1361     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1362     Maintenance_Action:
1363     Inhibit_Alarms:
1364     Alarm_Type: equipment
1365     Probable_Cause: loss-of-signal
1366     Service_Affecting: True
1367     Suppression: False
1368     Management_Affecting_Severity: warning
1369     Degrade_Affecting_Severity: critical
1370     Context: none
1371
1372
1373 300.015:
1374     Type: Alarm
1375     Description: "No active OVSDB connections found."
1376     Entity_Instance_ID: host=<hostname>
1377     Severity: critical
1378     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1379     Maintenance_Action:
1380     Inhibit_Alarms:
1381     Alarm_Type: equipment
1382     Probable_Cause: loss-of-signal
1383     Service_Affecting: True
1384     Suppression: False
1385     Management_Affecting_Severity: warning
1386     Degrade_Affecting_Severity: critical
1387     Context: openstack
1388
1389 300.016:
1390     Type: Alarm
1391     Description: "Dynamic routing agent x% lost connectivity to peer y%."
1392     Entity_Instance_ID: host=<hostname>,agent=<agent-uuid>,bgp-peer=<bgp-peer>
1393     Severity: major
1394     Proposed_Repair_Action: If condition persists, fix connectivity to peer.
1395     Maintenance_Action:
1396     Inhibit_Alarms:
1397     Alarm_Type: operational-violation
1398     Probable_Cause: loss-of-signal
1399     Service_Affecting: True
1400     Suppression: True
1401     Management_Affecting_Severity: warning
1402     Degrade_Affecting_Severity: none
1403     Context: openstack
1404
1405
1406 #---------------------------------------------------------------------------
1407 #   HIGH AVAILABILITY
1408 #---------------------------------------------------------------------------
1409
1410 400.001:
1411     Type: Alarm
1412     Description: |-
1413         Service group failure; <list of affected services>.
1414         OR
1415         Service group degraded; <list of affected services>.
1416         OR
1417         Service group warning; <list of affected services>.
1418     Entity_Instance_ID: service_domain=<domain_name>.service_group=<group_name>.host=<hostname>
1419     Severity: [critical, major, minor]
1420     Proposed_Repair_Action: Contact next level of support.
1421     Maintenance_Action:
1422     Inhibit_Alarms: False
1423     Alarm_Type: processing-error
1424     Probable_Cause: underlying-resource-unavailable
1425     Service_Affecting: True
1426     Suppression: True
1427     Management_Affecting_Severity: warning
1428     Degrade_Affecting_Severity: major
1429     Context: starlingx
1430
1431
1432 400.002:
1433     Type: Alarm
1434     Description: |-
1435         Service group loss of redundancy; expected <num> standby member<s> but only <num> standby member<s> available.
1436         OR
1437         Service group loss of redundancy; expected <num> standby member<s> but only <num> standby member<s> available.
1438         OR
1439         Service group loss of redundancy; expected <num> active member<s> but no active members available.
1440         OR
1441         Service group loss of redundancy; expected <num> active member<s> but only <num> active member<s> available.
1442     Entity_Instance_ID: service_domain=<domain_name>.service_group=<group_name>
1443     Severity: major
1444     Proposed_Repair_Action: "Bring a controller node back in to service, otherwise contact next level of support."
1445     Maintenance_Action:
1446     Inhibit_Alarms: False
1447     Alarm_Type: processing-error
1448     Probable_Cause: underlying-resource-unavailable
1449     Service_Affecting: True
1450     Suppression: True
1451     Management_Affecting_Severity: warning
1452     Degrade_Affecting_Severity: none
1453     Context: starlingx
1454
1455
1456 400.003:
1457     Type: Alarm
1458     Description: |-
1459         License key is not installed; a valid license key is required for operation.
1460         OR
1461         License key has expired or is invalid; a valid license key is required for operation.
1462         OR
1463         Evaluation license key will expire on <date>; there are <num_days> days remaining in this evaluation.
1464         OR
1465         Evaluation license key will expire on <date>; there is only 1 day remaining in this evaluation.
1466     Entity_Instance_ID: host=<hostname>
1467     Severity: critical
1468     Proposed_Repair_Action: Contact next level of support to obtain a new license key.
1469     Maintenance_Action:
1470     Inhibit_Alarms: False
1471     Alarm_Type: processing-error
1472     Probable_Cause: key-expired
1473     Service_Affecting: True
1474     Suppression: False
1475     Management_Affecting_Severity: critical
1476     Degrade_Affecting_Severity: none
1477     Context: starlingx
1478
1479
1480 # 400.004:    // NOTE Removed
1481 #     Type: Alarm
1482 #     Description: Service group software modification detected; <list of affected files>.
1483 #     Entity_Instance_ID: host=<hostname>
1484 #     Severity: major
1485 #     Proposed_Repair_Action: Contact next level of support.
1486 #     Maintenance_Action:
1487 #     Inhibit_Alarms: False
1488 #     Alarm_Type: processing-error
1489 #     Probable_Cause: software-program-error
1490 #     Service_Affecting: True
1491 #     Suppression: False
1492
1493
1494 400.005:
1495     Type: Alarm
1496     Description: |-
1497         Communication failure detected with peer over port <linux-ifname>.
1498         OR
1499         Communication failure detected with peer over port <linux-ifname> within the last 30 seconds.
1500     Entity_Instance_ID: host=<hostname>.network=<mgmt | oam | cluster-host>
1501     Severity: major
1502     Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
1503     Maintenance_Action:
1504     Inhibit_Alarms: False
1505     Alarm_Type: communication
1506     Probable_Cause: underlying-resource-unavailable
1507     Service_Affecting: True
1508     Suppression: True
1509     Management_Affecting_Severity: warning
1510     Degrade_Affecting_Severity: none
1511     Context: starlingx
1512
1513
1514 #---------------------------------------------------------------------------
1515 #   SM
1516 #---------------------------------------------------------------------------
1517
1518 401.001:
1519     Type: Log
1520     Description: Service group <group> state change from <state> to <state> on host <host_name>
1521     Entity_Instance_ID: service_domain=<domain>.service_group=<group>.host=<host_name>
1522     Severity: critical
1523     Alarm_Type: processing-error
1524     Probable_Cause: unspecified-reason
1525     Service_Affecting: True
1526     Context: openstack
1527
1528 401.002:
1529     Type: Log
1530     Description: |-
1531         Service group <group> loss of redundancy; expected <X> standby member but no standby members available
1532         or
1533         Service group <group> loss of redundancy; expected <X> standby member but only <Y> standby member(s) available
1534         or
1535         Service group <group> has no active members available; expected <X> active member(s)
1536         or
1537         Service group <group> loss of redundancy; expected <X> active member(s) but only <Y> active member(s) available
1538     Entity_Instance_ID: service_domain=<domain>.service_group=<group>
1539     Severity: critical
1540     Alarm_Type: processing-error
1541     Probable_Cause: unspecified-reason
1542     Service_Affecting: True
1543     Context: openstack
1544
1545 401.003:
1546     Type: Log
1547     Description: |-
1548         License key has expired or is invalid
1549         or
1550         Evaluation license key will expire on <date>
1551         or
1552         License key is valid
1553     Entity_Instance_ID: host=<host_name>
1554     Severity: critical
1555     Alarm_Type: processing-error
1556     Probable_Cause: unspecified-reason
1557     Service_Affecting: True
1558     Context: starlingx
1559
1560 401.005:
1561     Type: Log
1562     Description: |-
1563         Communication failure detected with peer over port <port> on host <host name>
1564         or
1565         Communication failure detected with peer over port <port> on host <host name>  within the last <X> seconds
1566         or
1567         Communication established  with peer over port <port> on host <host name>
1568     Entity_Instance_ID: host=<host_name>.network=<network>
1569     Severity: critical
1570     Alarm_Type: processing-error
1571     Probable_Cause: unspecified-reason
1572     Service_Affecting: True
1573     Context: starlingx
1574
1575 401.007:
1576     Type: Log
1577     Description: Swact or swact-force
1578     Entity_Instance_ID: host=<host_name>
1579     Severity: critical
1580     Alarm_Type: processing-error
1581     Probable_Cause: unspecified-reason
1582     Service_Affecting: True
1583     Context: starlingx
1584
1585
1586 #---------------------------------------------------------------------------
1587 #   SECURITY
1588 #---------------------------------------------------------------------------
1589
1590 500.100:
1591     Type: Alarm
1592     Description: TPM initialization failed on host.
1593     Entity_Instance_ID: host=<hostname>
1594     Severity: major
1595     Proposed_Repair_Action: reinstall HTTPS certificate; if problem persists contact next level of support.
1596     Maintenance_Action: degrade
1597     Inhibit_Alarms:
1598     Alarm_Type: equipment
1599     Probable_Cause: procedural-error
1600     Service_Affecting: True
1601     Suppression: False
1602     Management_Affecting_Severity: none
1603     Degrade_Affecting_Severity: none
1604     Context: none
1605
1606 500.101:
1607     Type: Alarm
1608     Description: Developer patch certificate enabled.
1609     Entity_Instance_ID: host=controller
1610     Severity: critical
1611     Proposed_Repair_Action: Reinstall system to disable developer certificate and remove untrusted patches.
1612     Maintenance_Action:
1613     Inhibit_Alarms:
1614     Alarm_Type: operational-violation
1615     Probable_Cause: unspecified-reason
1616     Service_Affecting: False
1617     Suppression: False
1618     Management_Affecting_Severity: none
1619     Degrade_Affecting_Severity: none
1620     Context: starlingx
1621
1622 500.200:
1623     Type: Alarm
1624     Description: |-
1625         Certificate 'system certificate-show <uuid>' (mode=<ssl/ssl_ca/docker_registry/openstack/openstack_ca>) expiring soon on <date>.
1626         OR
1627         Certificate '<Namespace>/<Certificate/Secret>' expiring soon on <date>.
1628         OR
1629         Certificate '<k8sRootCA/EtcdCA>' expiring soon on <date>.
1630     Entity_Instance_ID: |-
1631         system.certificate.mode=<mode>.uuid=<uuid>
1632         OR
1633         namespace=<namespace-name>.certificate=<certificate-name>
1634         OR
1635         namespace=<namespace-name>.secret=<secret-name>
1636         OR
1637         system.certificate.k8sRootCA
1638     Severity: major
1639     Proposed_Repair_Action: Check certificate expiration time. Renew certificate for the entity identified.
1640     Maintenance_Action:
1641     Inhibit_Alarms:
1642     Alarm_Type: operational-violation
1643     Probable_Cause: certificate-expiration
1644     Service_Affecting: False
1645     Suppression: False
1646     Management_Affecting_Severity: none
1647     Degrade_Affecting_Severity: none
1648     Context: starlingx
1649
1650 500.210:
1651     Type: Alarm
1652     Description: |-
1653         Certificate 'system certificate-show <uuid>' (mode=<ssl/ssl_ca/docker_registry/openstack/openstack_ca>) expired.
1654         OR
1655         Certificate '<Namespace>/<Certificate/Secret>' expired.
1656         OR
1657         Certificate '<k8sRootCA/EtcdRootCA>' expired.
1658     Entity_Instance_ID: |-
1659         system.certificate.mode=<mode>.uuid=<uuid>
1660         OR
1661         namespace=<namespace-name>.certificate=<certificate-name>
1662         OR
1663         namespace=<namespace-name>.secret=<secret-name>
1664         OR
1665         system.certificate.k8sRootCA
1666     Severity: critical
1667     Proposed_Repair_Action: Check certificate expiration time. Renew certificate for the entity identified.
1668     Maintenance_Action:
1669     Inhibit_Alarms:
1670     Alarm_Type: operational-violation
1671     Probable_Cause: certificate-expiration
1672     Service_Affecting: False
1673     Suppression: False
1674     Management_Affecting_Severity: none
1675     Degrade_Affecting_Severity: none
1676     Context: starlingx
1677
1678 500.500:
1679     Type: Log
1680     Description: "Host <host_name> has IMA Appraisal failure for service <service> when executing <file>, reason = <reason_text>]"
1681     Entity_Instance_ID: host=<hostname>.service=<service>
1682     Severity: major
1683     Alarm_Type: integrity-violation
1684     Probable_Cause: information-modification-detected
1685     Service_Affecting: False
1686     Context: none
1687
1688
1689 #---------------------------------------------------------------------------
1690 #   VM
1691 #---------------------------------------------------------------------------
1692
1693 700.001:
1694     Type: Alarm
1695     Description: |-
1696         Instance <instance_name> owned by <tenant_name> has failed on host <host_name>
1697         Instance <instance_name> owned by <tenant_name> has failed to schedule
1698     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1699     Severity: critical
1700     Proposed_Repair_Action: The system will attempt recovery; no repair action required
1701     Maintenance_Action:
1702     Inhibit_Alarms:
1703     Alarm_Type: processing-error
1704     Probable_Cause: software-error
1705     Service_Affecting: True
1706     Suppression: True
1707     Management_Affecting_Severity: warning
1708     Degrade_Affecting_Severity: none
1709     Context: openstack
1710
1711 700.002:
1712     Type: Alarm
1713     Description: Instance <instance_name> owned by <tenant_name> is paused on host <host_name>
1714     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1715     Severity: critical
1716     Proposed_Repair_Action: Un-pause the instance
1717     Maintenance_Action:
1718     Inhibit_Alarms:
1719     Alarm_Type: processing-error
1720     Probable_Cause: procedural-error
1721     Service_Affecting: True
1722     Suppression: True
1723     Management_Affecting_Severity: warning
1724     Degrade_Affecting_Severity: none
1725     Context: openstack
1726
1727 700.003:
1728     Type: Alarm
1729     Description: Instance <instance_name> owned by <tenant_name> is suspended on host <host_name>
1730     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1731     Severity: critical
1732     Proposed_Repair_Action: Resume the instance
1733     Maintenance_Action:
1734     Inhibit_Alarms:
1735     Alarm_Type: processing-error
1736     Probable_Cause: procedural-error
1737     Service_Affecting: True
1738     Suppression: True
1739     Management_Affecting_Severity: warning
1740     Degrade_Affecting_Severity: none
1741     Context: openstack
1742
1743 700.004:
1744     Type: Alarm
1745     Description: Instance <instance_name> owned by <tenant_name> is stopped on host <host_name>
1746     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1747     Severity: critical
1748     Proposed_Repair_Action: Start the instance
1749     Maintenance_Action:
1750     Inhibit_Alarms:
1751     Alarm_Type: processing-error
1752     Probable_Cause: procedural-error
1753     Service_Affecting: True
1754     Suppression: True
1755     Management_Affecting_Severity: warning
1756     Degrade_Affecting_Severity: none
1757     Context: openstack
1758
1759 700.005:
1760     Type: Alarm
1761     Description: Instance <instance_name> owned by <tenant_name> is rebooting on host <host_name>
1762     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1763     Severity: critical
1764     Proposed_Repair_Action: Wait for reboot to complete; if problem persists contact next level of support
1765     Maintenance_Action:
1766     Inhibit_Alarms:
1767     Alarm_Type: processing-error
1768     Probable_Cause: unspecified-reason
1769     Service_Affecting: True
1770     Suppression: True
1771     Management_Affecting_Severity: warning
1772     Degrade_Affecting_Severity: none
1773     Context: openstack
1774
1775 700.006:
1776     Type: Alarm
1777     Description: Instance <instance_name> owned by <tenant_name> is rebuilding on host <host_name>
1778     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1779     Severity: critical
1780     Proposed_Repair_Action: Wait for rebuild to complete; if problem persists contact next level of support
1781     Maintenance_Action:
1782     Inhibit_Alarms:
1783     Alarm_Type: processing-error
1784     Probable_Cause: underlying-resource-unavailable
1785     Service_Affecting: True
1786     Suppression: True
1787     Management_Affecting_Severity: warning
1788     Degrade_Affecting_Severity: none
1789     Context: openstack
1790
1791 700.007:
1792     Type: Alarm
1793     Description: Instance <instance_name> owned by <tenant_name> is evacuating from host <host_name>
1794     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1795     Severity: critical
1796     Proposed_Repair_Action: Wait for evacuate to complete; if problem persists contact next level of support
1797     Maintenance_Action:
1798     Inhibit_Alarms:
1799     Alarm_Type: processing-error
1800     Probable_Cause: underlying-resource-unavailable
1801     Service_Affecting: True
1802     Suppression: True
1803     Management_Affecting_Severity: warning
1804     Degrade_Affecting_Severity: none
1805     Context: openstack
1806
1807 700.008:
1808     Type: Alarm
1809     Description: Instance <instance_name> owned by <tenant_name> is live migrating from host <host_name>
1810     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1811     Severity: warning
1812     Proposed_Repair_Action: Wait for live migration to complete; if problem persists contact next level of support
1813     Maintenance_Action:
1814     Inhibit_Alarms:
1815     Alarm_Type: processing-error
1816     Probable_Cause: unspecified-reason
1817     Service_Affecting: True
1818     Suppression: True
1819     Management_Affecting_Severity: warning
1820     Degrade_Affecting_Severity: none
1821     Context: openstack
1822
1823 700.009:
1824     Type: Alarm
1825     Description: Instance <instance_name> owned by <tenant_name> is cold migrating from host <host_name>
1826     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1827     Severity: critical
1828     Proposed_Repair_Action: Wait for cold migration to complete; if problem persists contact next level of support
1829     Maintenance_Action:
1830     Inhibit_Alarms:
1831     Alarm_Type: processing-error
1832     Probable_Cause: unspecified-reason
1833     Service_Affecting: True
1834     Suppression: True
1835     Management_Affecting_Severity: warning
1836     Degrade_Affecting_Severity: none
1837     Context: openstack
1838
1839 700.010:
1840     Type: Alarm
1841     Description: Instance <instance_name> owned by <tenant_name> has been cold-migrated to host <host_name> waiting for confirmation
1842     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1843     Severity: critical
1844     Proposed_Repair_Action: Confirm or revert cold-migrate of instance
1845     Maintenance_Action:
1846     Inhibit_Alarms:
1847     Alarm_Type: processing-error
1848     Probable_Cause: unspecified-reason
1849     Service_Affecting: True
1850     Suppression: True
1851     Management_Affecting_Severity: warning
1852     Degrade_Affecting_Severity: none
1853     Context: openstack
1854
1855 700.011:
1856     Type: Alarm
1857     Description: Instance <instance_name> owned by <tenant_name> is reverting cold migrate to host <host_name>
1858     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1859     Severity: critical
1860     Proposed_Repair_Action: "Wait for cold migration revert to complete; if problem persists contact next level of support"
1861     Maintenance_Action:
1862     Inhibit_Alarms:
1863     Alarm_Type: other
1864     Probable_Cause: unspecified-reason
1865     Service_Affecting: True
1866     Suppression: True
1867     Management_Affecting_Severity: warning
1868     Degrade_Affecting_Severity: none
1869     Context: openstack
1870
1871 700.012:
1872     Type: Alarm
1873     Description: Instance <instance_name> owned by <tenant_name> is resizing on host <host_name>
1874     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1875     Severity: critical
1876     Proposed_Repair_Action: Wait for resize to complete; if problem persists contact next level of support
1877     Maintenance_Action:
1878     Inhibit_Alarms:
1879     Alarm_Type: processing-error
1880     Probable_Cause: unspecified-reason
1881     Service_Affecting: True
1882     Suppression: True
1883     Management_Affecting_Severity: warning
1884     Degrade_Affecting_Severity: none
1885     Context: openstack
1886
1887 700.013:
1888     Type: Alarm
1889     Description: Instance <instance_name> owned by <tenant_name> has been resized on host <host_name> waiting for confirmation
1890     Entity_Instance_ID: itenant=<tenant-uuid>.instance=<instance-uuid>
1891     Severity: critical
1892     Proposed_Repair_Action: Confirm or revert resize of instance
1893     Maintenance_Action:
1894     Inhibit_Alarms:
1895     Alarm_Type: processing-error
1896     Probable_Cause: unspecified-reason
1897     Service_Affecting: True
1898     Suppression: True
1899     Management_Affecting_Severity: warning
1900     Degrade_Affecting_Severity: none
1901     Context: openstack
1902
1903 700.014:
1904     Type: Alarm
1905     Description: Instance <instance_name> owned by <tenant_name> is reverting resize on host <host_name>
1906     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1907     Severity: critical
1908     Proposed_Repair_Action: "Wait for resize revert to complete; if problem  persists contact next level of support"
1909     Maintenance_Action:
1910     Inhibit_Alarms:
1911     Alarm_Type: other
1912     Probable_Cause: unspecified-reason
1913     Service_Affecting: True
1914     Suppression: True
1915     Management_Affecting_Severity: warning
1916     Degrade_Affecting_Severity: none
1917     Context: openstack
1918
1919 700.015:
1920     Type: Alarm
1921     Description: Guest Heartbeat not established for instance <instance_name> owned by <tenant_name> on host <host_name>
1922     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1923     Severity: major
1924     Proposed_Repair_Action: "Verify that the instance is running the Guest-Client daemon, or disable Guest Heartbeat for the instance if no longer needed, otherwise contact next level of support"
1925     Maintenance_Action:
1926     Inhibit_Alarms:
1927     Alarm_Type: communication
1928     Probable_Cause: procedural-error
1929     Service_Affecting: True
1930     Suppression: True
1931     Management_Affecting_Severity: warning
1932     Degrade_Affecting_Severity: none
1933     Context: none
1934
1935 700.016:
1936     Type: Alarm
1937     Description: Multi-Node Recovery Mode
1938     Entity_Instance_ID: subsystem=vim
1939     Severity: minor
1940     Proposed_Repair_Action: "Wait for the system to exit out of this mode"
1941     Maintenance_Action:
1942     Inhibit_Alarms:
1943     Alarm_Type: equipment
1944     Probable_Cause: unspecified-reason
1945     Service_Affecting: True
1946     Suppression: True
1947     Management_Affecting_Severity: warning
1948     Degrade_Affecting_Severity: none
1949     Context: openstack
1950
1951 700.017:
1952     Type: Alarm
1953     Description: Server group <server_group_name> <policy> policy was not satisfied
1954     Entity_Instance_ID: server-group<server-group-uuid>
1955     Severity: minor
1956     Proposed_Repair_Action: "Migrate instances in an attempt to satisfy the policy; if problem persists contact next level of support"
1957     Maintenance_Action:
1958     Inhibit_Alarms:
1959     Alarm_Type: processing-error
1960     Probable_Cause: procedural-error
1961     Service_Affecting: True
1962     Suppression: True
1963     Management_Affecting_Severity: none
1964     Degrade_Affecting_Severity: none
1965     Context: openstack
1966
1967
1968 700.101:
1969     Type: Log
1970     Description: Instance <instance_name> is enabled on host <host_name>
1971     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1972     Severity: critical
1973     Alarm_Type: equipment
1974     Probable_Cause: unspecified-reason
1975     Service_Affecting: False
1976     Context: openstack
1977
1978 700.102:
1979     Type: Log
1980     Description: Instance <instance_name> owned by <tenant_name> has failed[, reason = <reason_text>]
1981         Instance <instance_name> owned by <tenant_name> has failed to schedule[, reason = <reason_text>]
1982     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1983     Severity: critical
1984     Alarm_Type: equipment
1985     Probable_Cause: unspecified-reason
1986     Service_Affecting: False
1987     Context: openstack
1988
1989 700.103:
1990     Type: Log
1991     Description: Create issued <by <tenant_name>|by the system> against <instance_name> owned by <tenant_name>
1992     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
1993     Severity: critical
1994     Alarm_Type: equipment
1995     Probable_Cause: unspecified-reason
1996     Service_Affecting: False
1997     Context: openstack
1998
1999 700.104:
2000     Type: Log
2001     Description: Creating instance <instance_name> owned by <tenant_name>
2002     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2003     Severity: critical
2004     Alarm_Type: equipment
2005     Probable_Cause: unspecified-reason
2006     Service_Affecting: False
2007     Context: openstack
2008
2009 700.105:
2010     Type: Log
2011     Description: "Create rejected for instance <instance_name>[, reason = <reason_text>]"
2012     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2013     Severity: critical
2014     Alarm_Type: equipment
2015     Probable_Cause: unspecified-reason
2016     Service_Affecting: False
2017     Context: openstack
2018
2019 700.106:
2020     Type: Log
2021     Description: "Create cancelled for instance <instance_name>[, reason = <reason_text>]"
2022     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2023     Severity: critical
2024     Alarm_Type: equipment
2025     Probable_Cause: unspecified-reason
2026     Service_Affecting: False
2027     Context: openstack
2028
2029 700.107:
2030     Type: Log
2031     Description: "Create failed for instance <instance_name>[, reason = <reason_text>]"
2032     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2033     Severity: critical
2034     Alarm_Type: equipment
2035     Probable_Cause: unspecified-reason
2036     Service_Affecting: False
2037     Context: openstack
2038
2039 700.108:
2040     Type: Log
2041     Description: Inance <instance_name> owned by <tenant_name> has been created
2042     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2043     Severity: critical
2044     Alarm_Type: equipment
2045     Probable_Cause: unspecified-reason
2046     Service_Affecting: False
2047     Context: openstack
2048
2049 700.109:
2050     Type: Log
2051     Description: "Delete issued <by tenant <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2052     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2053     Severity: critical
2054     Alarm_Type: equipment
2055     Probable_Cause: unspecified-reason
2056     Service_Affecting: False
2057     Context: openstack
2058
2059 700.110:
2060     Type: Log
2061     Description: Deleting instance <instance_name> owned by <tenant_name>
2062     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2063     Severity: critical
2064     Alarm_Type: equipment
2065     Probable_Cause: unspecified-reason
2066     Service_Affecting: False
2067     Context: openstack
2068
2069 700.111:
2070     Type: Log
2071     Description: "Delete rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2072     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2073     Severity: critical
2074     Alarm_Type: equipment
2075     Probable_Cause: unspecified-reason
2076     Service_Affecting: False
2077     Context: openstack
2078
2079 700.112:
2080     Type: Log
2081     Description: "Delete cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2082     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2083     Severity: critical
2084     Alarm_Type: equipment
2085     Probable_Cause: unspecified-reason
2086     Service_Affecting: False
2087     Context: openstack
2088
2089 700.113:
2090     Type: Log
2091     Description: "Delete failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2092     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2093     Severity: critical
2094     Alarm_Type: equipment
2095     Probable_Cause: unspecified-reason
2096     Service_Affecting: False
2097     Context: openstack
2098
2099 700.114:
2100     Type: Log
2101     Description: Deleted instance <instance_name> owned by <tenant_name>
2102     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2103     Severity: critical
2104     Alarm_Type: equipment
2105     Probable_Cause: unspecified-reason
2106     Service_Affecting: False
2107     Context: openstack
2108
2109 700.115:
2110     Type: Log
2111     Description: "Pause issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2112     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2113     Severity: critical
2114     Alarm_Type: equipment
2115     Probable_Cause: unspecified-reason
2116     Service_Affecting: False
2117     Context: openstack
2118
2119 700.116:
2120     Type: Log
2121     Description: Pause in progress for instance <instance_name> on host <host_name>
2122     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2123     Severity: critical
2124     Alarm_Type: equipment
2125     Probable_Cause: unspecified-reason
2126     Service_Affecting: False
2127     Context: openstack
2128
2129 700.117:
2130     Type: Log
2131     Description: "Pause rejected for instance <instance_name> enabled on host <host_name>[, reason = <reason_text>]"
2132     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2133     Severity: critical
2134     Alarm_Type: equipment
2135     Probable_Cause: unspecified-reason
2136     Service_Affecting: False
2137     Context: openstack
2138
2139 700.118:
2140     Type: Log
2141     Description: "Pause cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2142     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2143     Severity: critical
2144     Alarm_Type: equipment
2145     Probable_Cause: unspecified-reason
2146     Service_Affecting: False
2147     Context: openstack
2148
2149 700.119:
2150     Type: Log
2151     Description: "Pause failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2152     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2153     Severity: critical
2154     Alarm_Type: equipment
2155     Probable_Cause: unspecified-reason
2156     Service_Affecting: False
2157     Context: openstack
2158
2159 700.120:
2160     Type: Log
2161     Description: Pause complete for instance <instance_name> now paused on host <host_name>
2162     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2163     Severity: critical
2164     Alarm_Type: equipment
2165     Probable_Cause: unspecified-reason
2166     Service_Affecting: False
2167     Context: openstack
2168
2169 700.121:
2170     Type: Log
2171     Description: "Un-pause issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2172     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2173     Severity: critical
2174     Alarm_Type: equipment
2175     Probable_Cause: unspecified-reason
2176     Service_Affecting: False
2177     Context: openstack
2178
2179 700.122:
2180     Type: Log
2181     Description: Un-pause in-progress for instance <instance_name> on host <host_name>
2182     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2183     Severity: critical
2184     Alarm_Type: equipment
2185     Probable_Cause: unspecified-reason
2186     Service_Affecting: False
2187     Context: openstack
2188
2189 700.123:
2190     Type: Log
2191     Description: "Un-pause rejected for instance <instance_name> paused on host <host_name>[, reason = <reason_text>]"
2192     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2193     Severity: critical
2194     Alarm_Type: equipment
2195     Probable_Cause: unspecified-reason
2196     Service_Affecting: False
2197     Context: openstack
2198
2199 700.124:
2200     Type: Log
2201     Description: "Un-pause cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2202     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2203     Severity: critical
2204     Alarm_Type: equipment
2205     Probable_Cause: unspecified-reason
2206     Service_Affecting: False
2207     Context: openstack
2208
2209 700.125:
2210     Type: Log
2211     Description: "Un-pause failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2212     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2213     Severity: critical
2214     Alarm_Type: equipment
2215     Probable_Cause: unspecified-reason
2216     Service_Affecting: False
2217     Context: openstack
2218
2219 700.126:
2220     Type: Log
2221     Description: Un-pause complete for instance <instance_name> now enabled on host <host_name>
2222     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2223     Severity: critical
2224     Alarm_Type: equipment
2225     Probable_Cause: unspecified-reason
2226     Service_Affecting: False
2227     Context: openstack
2228
2229 700.127:
2230     Type: Log
2231     Description: "Suspend issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2232     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2233     Severity: critical
2234     Alarm_Type: equipment
2235     Probable_Cause: unspecified-reason
2236     Service_Affecting: False
2237     Context: openstack
2238
2239 700.128:
2240     Type: Log
2241     Description: Suspend in-progress for instance <instance_name> on host <host_name>
2242     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2243     Severity: critical
2244     Alarm_Type: equipment
2245     Probable_Cause: unspecified-reason
2246     Service_Affecting: False
2247     Context: openstack
2248
2249 700.129:
2250     Type: Log
2251     Description: "Suspend rejected for instance <instance_name> enabled on host <host_name>[, reason = <reason_text>]"
2252     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2253     Severity: critical
2254     Alarm_Type: equipment
2255     Probable_Cause: unspecified-reason
2256     Service_Affecting: False
2257     Context: openstack
2258
2259 700.130:
2260     Type: Log
2261     Description: "Suspend cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2262     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2263     Severity: critical
2264     Alarm_Type: equipment
2265     Probable_Cause: unspecified-reason
2266     Service_Affecting: False
2267     Context: openstack
2268
2269 700.131:
2270     Type: Log
2271     Description: "Suspend failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2272     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2273     Severity: critical
2274     Alarm_Type: equipment
2275     Probable_Cause: unspecified-reason
2276     Service_Affecting: False
2277     Context: openstack
2278
2279 700.132:
2280     Type: Log
2281     Description: Suspend complete for instance <instance_name> now suspended on host <host_name>
2282     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2283     Severity: critical
2284     Alarm_Type: equipment
2285     Probable_Cause: unspecified-reason
2286     Service_Affecting: False
2287     Context: openstack
2288
2289 700.133:
2290     Type: Log
2291     Description: "Resume issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2292     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2293     Severity: critical
2294     Alarm_Type: equipment
2295     Probable_Cause: unspecified-reason
2296     Service_Affecting: False
2297     Context: openstack
2298
2299 700.134:
2300     Type: Log
2301     Description: Resume in-progress for instance <instance_name> on host <host_name>
2302     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2303     Severity: critical
2304     Alarm_Type: equipment
2305     Probable_Cause: unspecified-reason
2306     Service_Affecting: False
2307     Context: openstack
2308
2309 700.135:
2310     Type: Log
2311     Description: "Resume rejected for instance <instance_name> suspended on host <host_name>[, reason = <reason_text>]"
2312     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2313     Severity: critical
2314     Alarm_Type: equipment
2315     Probable_Cause: unspecified-reason
2316     Service_Affecting: False
2317     Context: openstack
2318
2319 700.136:
2320     Type: Log
2321     Description: "Resume cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2322     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2323     Severity: critical
2324     Alarm_Type: equipment
2325     Probable_Cause: unspecified-reason
2326     Service_Affecting: False
2327     Context: openstack
2328
2329 700.137:
2330     Type: Log
2331     Description: "Resume failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2332     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2333     Severity: critical
2334     Alarm_Type: equipment
2335     Probable_Cause: unspecified-reason
2336     Service_Affecting: False
2337     Context: openstack
2338
2339 700.138:
2340     Type: Log
2341     Description: Resume complete for instance <instance_name> now enabled on host <host_name>
2342     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2343     Severity: critical
2344     Alarm_Type: equipment
2345     Probable_Cause: unspecified-reason
2346     Service_Affecting: False
2347     Context: openstack
2348
2349 700.139:
2350     Type: Log
2351     Description: "Start issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2352     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2353     Severity: critical
2354     Alarm_Type: equipment
2355     Probable_Cause: unspecified-reason
2356     Service_Affecting: False
2357     Context: openstack
2358
2359 700.140:
2360     Type: Log
2361     Description: Start in-progress for instance <instance_name> on host <host_name>
2362     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2363     Severity: critical
2364     Alarm_Type: equipment
2365     Probable_Cause: unspecified-reason
2366     Service_Affecting: False
2367     Context: openstack
2368
2369 700.141:
2370     Type: Log
2371     Description: "Start rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2372     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2373     Severity: critical
2374     Alarm_Type: equipment
2375     Probable_Cause: unspecified-reason
2376     Service_Affecting: False
2377     Context: openstack
2378
2379 700.142:
2380     Type: Log
2381     Description: "Start cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2382     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2383     Severity: critical
2384     Alarm_Type: equipment
2385     Probable_Cause: unspecified-reason
2386     Service_Affecting: False
2387     Context: openstack
2388
2389 700.143:
2390     Type: Log
2391     Description: "Start failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2392     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2393     Severity: critical
2394     Alarm_Type: equipment
2395     Probable_Cause: unspecified-reason
2396     Service_Affecting: False
2397     Context: openstack
2398
2399 700.144:
2400     Type: Log
2401     Description: Start complete for instance <instance_name> now enabled on host <host_name>
2402     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2403     Severity: critical
2404     Alarm_Type: equipment
2405     Probable_Cause: unspecified-reason
2406     Service_Affecting: False
2407     Context: openstack
2408
2409 700.145:
2410     Type: Log
2411     Description: "Stop issued <by <tenant_name>|by the system|by the instance>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2412     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2413     Severity: critical
2414     Alarm_Type: equipment
2415     Probable_Cause: unspecified-reason
2416     Service_Affecting: False
2417     Context: openstack
2418
2419 700.146:
2420     Type: Log
2421     Description: Stop in progress for instance <instance_name> on host <host_name>
2422     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2423     Severity: critical
2424     Alarm_Type: equipment
2425     Probable_Cause: unspecified-reason
2426     Service_Affecting: False
2427     Context: openstack
2428
2429 700.147:
2430     Type: Log
2431     Description: "Stop rejected for instance <instance_name> enabled on host <host_name>[, reason = <reason_text>]"
2432     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2433     Severity: critical
2434     Alarm_Type: equipment
2435     Probable_Cause: unspecified-reason
2436     Service_Affecting: False
2437     Context: openstack
2438
2439 700.148:
2440     Type: Log
2441     Description: "Stop cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2442     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2443     Severity: critical
2444     Alarm_Type: equipment
2445     Probable_Cause: unspecified-reason
2446     Service_Affecting: False
2447     Context: openstack
2448
2449 700.149:
2450     Type: Log
2451     Description: "Stop failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2452     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2453     Severity: critical
2454     Alarm_Type: equipment
2455     Probable_Cause: unspecified-reason
2456     Service_Affecting: False
2457     Context: openstack
2458
2459 700.150:
2460     Type: Log
2461     Description: Stop complete for instance <instance_name> now disabled on host <host_name>
2462     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2463     Severity: critical
2464     Alarm_Type: equipment
2465     Probable_Cause: unspecified-reason
2466     Service_Affecting: False
2467     Context: openstack
2468
2469 700.151:
2470     Type: Log
2471     Description: "Live-Migrate issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> from host <host_name>[, reason = <reason_text>]"
2472     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2473     Severity: critical
2474     Alarm_Type: equipment
2475     Probable_Cause: unspecified-reason
2476     Service_Affecting: False
2477     Context: openstack
2478
2479 700.152:
2480     Type: Log
2481     Description: Live-Migrate in progress for instance <instance_name> from host <host_name>
2482     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2483     Severity: critical
2484     Alarm_Type: equipment
2485     Probable_Cause: unspecified-reason
2486     Service_Affecting: False
2487     Context: openstack
2488
2489 700.153:
2490     Type: Log
2491     Description: "Live-Migrate rejected for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2492     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2493     Severity: critical
2494     Alarm_Type: equipment
2495     Probable_Cause: unspecified-reason
2496     Service_Affecting: False
2497     Context: openstack
2498
2499 700.154:
2500     Type: Log
2501     Description: "Live-Migrate cancelled for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2502     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2503     Severity: critical
2504     Alarm_Type: equipment
2505     Probable_Cause: unspecified-reason
2506     Service_Affecting: False
2507     Context: openstack
2508
2509 700.155:
2510     Type: Log
2511     Description: "Live-Migrate failed for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2512     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2513     Severity: critical
2514     Alarm_Type: equipment
2515     Probable_Cause: unspecified-reason
2516     Service_Affecting: False
2517     Context: openstack
2518
2519 700.156:
2520     Type: Log
2521     Description: Live-Migrate complete for instance <instance_name> now enabled on host <host_name>
2522     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2523     Severity: critical
2524     Alarm_Type: equipment
2525     Probable_Cause: unspecified-reason
2526     Service_Affecting: False
2527     Context: openstack
2528
2529 700.157:
2530     Type: Log
2531     Description: "Cold-Migrate issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> from host <host_name>[, reason = <reason_text>]"
2532     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2533     Severity: critical
2534     Alarm_Type: equipment
2535     Probable_Cause: unspecified-reason
2536     Service_Affecting: False
2537     Context: openstack
2538
2539 700.158:
2540     Type: Log
2541     Description: Cold-Migrate in progress for instance <instance_name> from host <host_name>
2542     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2543     Severity: critical
2544     Alarm_Type: equipment
2545     Probable_Cause: unspecified-reason
2546     Service_Affecting: False
2547     Context: openstack
2548
2549 700.159:
2550     Type: Log
2551     Description: "Cold-Migrate rejected for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2552     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2553     Severity: critical
2554     Alarm_Type: equipment
2555     Probable_Cause: unspecified-reason
2556     Service_Affecting: False
2557     Context: openstack
2558
2559 700.160:
2560     Type: Log
2561     Description: "Cold-Migrate cancelled for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2562     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2563     Severity: critical
2564     Alarm_Type: equipment
2565     Probable_Cause: unspecified-reason
2566     Service_Affecting: False
2567     Context: openstack
2568
2569 700.161:
2570     Type: Log
2571     Description: "Cold-Migrate failed for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2572     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2573     Severity: critical
2574     Alarm_Type: equipment
2575     Probable_Cause: unspecified-reason
2576     Service_Affecting: False
2577     Context: openstack
2578
2579 700.162:
2580     Type: Log
2581     Description: Cold-Migrate complete for instance <instance_name> now enabled on host <host_name>
2582     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2583     Severity: critical
2584     Alarm_Type: equipment
2585     Probable_Cause: unspecified-reason
2586     Service_Affecting: False
2587     Context: openstack
2588
2589 700.163:
2590     Type: Log
2591     Description: "Cold-Migrate-Confirm issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2592     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2593     Severity: critical
2594     Alarm_Type: equipment
2595     Probable_Cause: unspecified-reason
2596     Service_Affecting: False
2597     Context: openstack
2598
2599 700.164:
2600     Type: Log
2601     Description: Cold-Migrate-Confirm in progress for instance <instance_name> on host <host_name>
2602     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2603     Severity: critical
2604     Alarm_Type: equipment
2605     Probable_Cause: unspecified-reason
2606     Service_Affecting: False
2607     Context: openstack
2608
2609 700.165:
2610     Type: Log
2611     Description: "Cold-Migrate-Confirm rejected for instance <instance_name> now enabled on host <host_name>[, reason = <reason_text>]"
2612     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2613     Severity: critical
2614     Alarm_Type: equipment
2615     Probable_Cause: unspecified-reason
2616     Service_Affecting: False
2617     Context: openstack
2618
2619 700.166:
2620     Type: Log
2621     Description: "Cold-Migrate-Confirm cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2622     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2623     Severity: critical
2624     Alarm_Type: equipment
2625     Probable_Cause: unspecified-reason
2626     Service_Affecting: False
2627     Context: openstack
2628
2629 700.167:
2630     Type: Log
2631     Description: "Cold-Migrate-Confirm failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2632     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2633     Severity: critical
2634     Alarm_Type: equipment
2635     Probable_Cause: unspecified-reason
2636     Service_Affecting: False
2637     Context: openstack
2638
2639 700.168:
2640     Type: Log
2641     Description: Cold-Migrate-Confirm complete for instance <instance_name> enabled on host <host_name>
2642     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2643     Severity: critical
2644     Alarm_Type: equipment
2645     Probable_Cause: unspecified-reason
2646     Service_Affecting: False
2647     Context: openstack
2648
2649 700.169:
2650     Type: Log
2651     Description: "Cold-Migrate-Revert issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2652     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2653     Severity: critical
2654     Alarm_Type: equipment
2655     Probable_Cause: unspecified-reason
2656     Service_Affecting: False
2657     Context: openstack
2658
2659 700.170:
2660     Type: Log
2661     Description: Cold-Migrate-Revert in progress for instance <instance_name> from host <host_name>
2662     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2663     Severity: critical
2664     Alarm_Type: equipment
2665     Probable_Cause: unspecified-reason
2666     Service_Affecting: False
2667     Context: openstack
2668
2669 700.171:
2670     Type: Log
2671     Description: "Cold-Migrate-Revert rejected for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
2672     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2673     Severity: critical
2674     Alarm_Type: equipment
2675     Probable_Cause: unspecified-reason
2676     Service_Affecting: False
2677     Context: openstack
2678
2679 700.172:
2680     Type: Log
2681     Description: "Cold-Migrate-Revert cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2682     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2683     Severity: critical
2684     Alarm_Type: equipment
2685     Probable_Cause: unspecified-reason
2686     Service_Affecting: False
2687     Context: openstack
2688
2689 700.173:
2690     Type: Log
2691     Description: "Cold-Migrate-Revert failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2692     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2693     Severity: critical
2694     Alarm_Type: equipment
2695     Probable_Cause: unspecified-reason
2696     Service_Affecting: False
2697     Context: openstack
2698
2699 700.174:
2700     Type: Log
2701     Description: Cold-Migrate-Revert complete for instance <instance_name> now enabled on host <host_name>
2702     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2703     Severity: critical
2704     Alarm_Type: equipment
2705     Probable_Cause: unspecified-reason
2706     Service_Affecting: False
2707     Context: openstack
2708
2709 700.175:
2710     Type: Log
2711     Description: "Evacuate issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2712     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2713     Severity: critical
2714     Alarm_Type: equipment
2715     Probable_Cause: unspecified-reason
2716     Service_Affecting: False
2717     Context: openstack
2718
2719 700.176:
2720     Type: Log
2721     Description: Evacuating instance <instance_name> owned by <tenant_name> from host <host_name>
2722     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2723     Severity: critical
2724     Alarm_Type: equipment
2725     Probable_Cause: unspecified-reason
2726     Service_Affecting: False
2727     Context: openstack
2728
2729 700.177:
2730     Type: Log
2731     Description: "Evacuate rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2732     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2733     Severity: critical
2734     Alarm_Type: equipment
2735     Probable_Cause: unspecified-reason
2736     Service_Affecting: False
2737     Context: openstack
2738
2739 700.178:
2740     Type: Log
2741     Description: "Evacuate cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2742     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2743     Severity: critical
2744     Alarm_Type: equipment
2745     Probable_Cause: unspecified-reason
2746     Service_Affecting: False
2747     Context: openstack
2748
2749 700.179:
2750     Type: Log
2751     Description: "Evacuate failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2752     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2753     Severity: critical
2754     Alarm_Type: equipment
2755     Probable_Cause: unspecified-reason
2756     Service_Affecting: False
2757     Context: openstack
2758
2759 700.180:
2760     Type: Log
2761     Description: Evacuate complete for instance <instance_name> now enabled on host <host_name>
2762     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2763     Severity: critical
2764     Alarm_Type: equipment
2765     Probable_Cause: unspecified-reason
2766     Service_Affecting: False
2767     Context: openstack
2768
2769 700.181:
2770     Type: Log
2771     Description: "Reboot <(soft-reboot)|(hard-reboot)> issued <by <tenant_name>|by the system|by the instance>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2772     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2773     Severity: critical
2774     Alarm_Type: equipment
2775     Probable_Cause: unspecified-reason
2776     Service_Affecting: False
2777     Context: openstack
2778
2779 700.182:
2780     Type: Log
2781     Description: Reboot in progress for instance <instance_name> on host <host_name>
2782     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2783     Severity: critical
2784     Alarm_Type: equipment
2785     Probable_Cause: unspecified-reason
2786     Service_Affecting: False
2787     Context: openstack
2788
2789 700.183:
2790     Type: Log
2791     Description: "Reboot rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2792     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2793     Severity: critical
2794     Alarm_Type: equipment
2795     Probable_Cause: unspecified-reason
2796     Service_Affecting: False
2797     Context: openstack
2798
2799 700.184:
2800     Type: Log
2801     Description: "Reboot cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2802     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2803     Severity: critical
2804     Alarm_Type: equipment
2805     Probable_Cause: unspecified-reason
2806     Service_Affecting: False
2807     Context: openstack
2808
2809 700.185:
2810     Type: Log
2811     Description: "Reboot failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2812     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2813     Severity: critical
2814     Alarm_Type: equipment
2815     Probable_Cause: unspecified-reason
2816     Service_Affecting: False
2817     Context: openstack
2818
2819 700.186:
2820     Type: Log
2821     Description: Reboot complete for instance <instance_name> now enabled on host <host_name>
2822     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2823     Severity: critical
2824     Alarm_Type: equipment
2825     Probable_Cause: unspecified-reason
2826     Service_Affecting: False
2827     Context: openstack
2828
2829 700.187:
2830     Type: Log
2831     Description: "Rebuild issued <by <tenant_name>|by the system> against instance <instance_name> using image <image_name> on host <host_name>[, reason = <reason_text>]"
2832     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2833     Severity: critical
2834     Alarm_Type: equipment
2835     Probable_Cause: unspecified-reason
2836     Service_Affecting: False
2837     Context: openstack
2838
2839 700.188:
2840     Type: Log
2841     Description: Rebuild in progress for instance <instance_name> on host <host_name>
2842     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2843     Severity: critical
2844     Alarm_Type: equipment
2845     Probable_Cause: unspecified-reason
2846     Service_Affecting: False
2847     Context: openstack
2848
2849 700.189:
2850     Type: Log
2851     Description: "Rebuild rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2852     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2853     Severity: critical
2854     Alarm_Type: equipment
2855     Probable_Cause: unspecified-reason
2856     Service_Affecting: False
2857     Context: openstack
2858
2859 700.190:
2860     Type: Log
2861     Description: "Rebuild cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2862     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2863     Severity: critical
2864     Alarm_Type: equipment
2865     Probable_Cause: unspecified-reason
2866     Service_Affecting: False
2867     Context: openstack
2868
2869 700.191:
2870     Type: Log
2871     Description: "Rebuild failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2872     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2873     Severity: critical
2874     Alarm_Type: equipment
2875     Probable_Cause: unspecified-reason
2876     Service_Affecting: False
2877     Context: openstack
2878
2879 700.192:
2880     Type: Log
2881     Description: Rebuild complete for instance <instance_name> now enabled on host <host_name>
2882     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2883     Severity: critical
2884     Alarm_Type: equipment
2885     Probable_Cause: unspecified-reason
2886     Service_Affecting: False
2887     Context: openstack
2888
2889 700.193:
2890     Type: Log
2891     Description: "Resize issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2892     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2893     Severity: critical
2894     Alarm_Type: equipment
2895     Probable_Cause: unspecified-reason
2896     Service_Affecting: False
2897     Context: openstack
2898
2899 700.194:
2900     Type: Log
2901     Description: Resize in progress for instance <instance_name> on host <host_name>
2902     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2903     Severity: critical
2904     Alarm_Type: equipment
2905     Probable_Cause: unspecified-reason
2906     Service_Affecting: False
2907     Context: openstack
2908
2909 700.195:
2910     Type: Log
2911     Description: "Resize rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2912     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2913     Severity: critical
2914     Alarm_Type: equipment
2915     Probable_Cause: unspecified-reason
2916     Service_Affecting: False
2917     Context: openstack
2918
2919 700.196:
2920     Type: Log
2921     Description: "Resize cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2922     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2923     Severity: critical
2924     Alarm_Type: equipment
2925     Probable_Cause: unspecified-reason
2926     Service_Affecting: False
2927     Context: openstack
2928
2929 700.197:
2930     Type: Log
2931     Description: "Resize failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2932     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2933     Severity: critical
2934     Alarm_Type: equipment
2935     Probable_Cause: unspecified-reason
2936     Service_Affecting: False
2937     Context: openstack
2938
2939 700.198:
2940     Type: Log
2941     Description: Resize complete for instance <instance_name> enabled on host <host_name> waiting for confirmation
2942     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2943     Severity: critical
2944     Alarm_Type: equipment
2945     Probable_Cause: unspecified-reason
2946     Service_Affecting: False
2947     Context: openstack
2948
2949 700.199:
2950     Type: Log
2951     Description: "Resize-Confirm issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
2952     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2953     Severity: critical
2954     Alarm_Type: equipment
2955     Probable_Cause: unspecified-reason
2956     Service_Affecting: False
2957     Context: openstack
2958
2959 700.200:
2960     Type: Log
2961     Description: Resize-Confirm in progress for instance <instance_name> on host <host_name>
2962     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2963     Severity: critical
2964     Alarm_Type: equipment
2965     Probable_Cause: unspecified-reason
2966     Service_Affecting: False
2967     Context: openstack
2968
2969 700.201:
2970     Type: Log
2971     Description: "Resize-Confirm rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2972     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2973     Severity: critical
2974     Alarm_Type: equipment
2975     Probable_Cause: unspecified-reason
2976     Service_Affecting: False
2977     Context: openstack
2978
2979 700.202:
2980     Type: Log
2981     Description: "Resize-Confirm cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2982     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2983     Severity: critical
2984     Alarm_Type: equipment
2985     Probable_Cause: unspecified-reason
2986     Service_Affecting: False
2987     Context: openstack
2988
2989 700.203:
2990     Type: Log
2991     Description: "Resize-Confirm failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
2992     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
2993     Severity: critical
2994     Alarm_Type: equipment
2995     Probable_Cause: unspecified-reason
2996     Service_Affecting: False
2997     Context: openstack
2998
2999 700.204:
3000     Type: Log
3001     Description: Resize-Confirm complete for instance <instance_name> enabled on host <host_name>
3002     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
3003     Severity: critical
3004     Alarm_Type: equipment
3005     Probable_Cause: unspecified-reason
3006     Service_Affecting: False
3007     Context: openstack
3008
3009 700.205:
3010     Type: Log
3011     Description: "Resize-Revert issued <by <tenant_name>|by the system>  against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
3012     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
3013     Severity: critical
3014     Alarm_Type: equipment
3015     Probable_Cause: unspecified-reason
3016     Service_Affecting: False
3017     Context: openstack
3018
3019 700.206:
3020     Type: Log
3021     Description: Resize-Revert in progress for instance <instance_name> on host <host_name>
3022     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
3023     Severity: critical
3024     Alarm_Type: equipment
3025     Probable_Cause: unspecified-reason
3026     Service_Affecting: False
3027     Context: openstack
3028
3029 700.207:
3030     Type: Log
3031     Description: "Resize-Revert rejected for instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
3032     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
3033     Severity: critical
3034     Alarm_Type: equipment
3035     Probable_Cause: unspecified-reason
3036     Service_Affecting: False
3037     Context: openstack
3038
3039 700.208:
3040     Type: Log
3041     Description: "Resize-Revert cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
3042     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
3043     Severity: critical
3044     Alarm_Type: equipment
3045     Probable_Cause: unspecified-reason
3046     Service_Affecting: False
3047     Context: openstack
3048
3049 700.209:
3050     Type: Log
3051     Description: "Resize-Revert failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
3052     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
3053     Severity: critical
3054     Alarm_Type: equipment
3055     Probable_Cause: unspecified-reason
3056     Service_Affecting: False
3057     Context: openstack
3058
3059 700.210:
3060     Type: Log
3061     Description: Resize-Revert complete for instance <instance_name> enabled on host <host_name>
3062     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
3063     Severity: critical
3064     Alarm_Type: equipment
3065     Probable_Cause: unspecified-reason
3066     Service_Affecting: False
3067     Context: openstack
3068
3069 700.211:
3070     Type: Log
3071     Description: Guest Heartbeat established for instance <instance_name> on host <host_name>
3072     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
3073     Severity: major
3074     Alarm_Type: equipment
3075     Probable_Cause: unspecified-reason
3076     Service_Affecting: False
3077     Context: none
3078
3079 700.212:
3080     Type: Log
3081     Description: Guest Heartbeat disconnected for instance <instance_name> on host <host_name>
3082     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
3083     Severity: major
3084     Alarm_Type: equipment
3085     Probable_Cause: unspecified-reason
3086     Service_Affecting: False
3087     Context: none
3088
3089 700.213:
3090     Type: Log
3091     Description: "Guest Heartbeat failed for instance <instance_name>[, reason = <reason_text>]"
3092     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
3093     Severity: critical
3094     Alarm_Type: equipment
3095     Probable_Cause: unspecified-reason
3096     Service_Affecting: False
3097     Context: none
3098
3099 700.214:
3100     Type: Log
3101     Description: Instance <instance_name> has been renamed to <new_instance_name> owned by <tenant_name> on host <host_name>
3102     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
3103     Severity: critical
3104     Alarm_Type: equipment
3105     Probable_Cause: unspecified-reason
3106     Service_Affecting: False
3107     Context: openstack
3108
3109 700.215:
3110     Type: Log
3111     Description: "Guest Health Check failed for instance <instance_name>[, reason = <reason_text>]"
3112     Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
3113     Severity: critical
3114     Alarm_Type: equipment
3115     Probable_Cause: unspecified-reason
3116     Service_Affecting: False
3117     Context: openstack
3118
3119 700.216:
3120     Type: Log
3121     Description: "Entered Multi-Node Recovery Mode"
3122     Entity_Instance_ID: subsystem=vim
3123     Severity: critical
3124     Alarm_Type: equipment
3125     Probable_Cause: unspecified-reason
3126     Service_Affecting: False
3127     Context: openstack
3128
3129
3130 700.217:
3131     Type: Log
3132     Description: "Exited Multi-Node Recovery Mode"
3133     Entity_Instance_ID: subsystem=vim
3134     Severity: critical
3135     Alarm_Type: equipment
3136     Probable_Cause: unspecified-reason
3137     Service_Affecting: False
3138     Context: openstack
3139
3140 #---------------------------------------------------------------------------
3141 #   APPLICATION
3142 #---------------------------------------------------------------------------
3143
3144 750.001:
3145     Type: Alarm
3146     Description: "Application Upload Failure"
3147     Entity_Instance_ID: k8s_application=<appname>
3148     Severity: warning
3149     Proposed_Repair_Action: "Check system inventory log for cause."
3150     Maintenance_Action:
3151     Inhibit_Alarms:
3152     Alarm_Type: processing-error
3153     Probable_Cause: unknown
3154     Service_Affecting: False
3155     Suppression: True
3156     Management_Affecting_Severity: none
3157     Degrade_Affecting_Severity: none
3158     Context: starlingx
3159
3160 750.002:
3161     Type: Alarm
3162     Description: "Application Apply Failure"
3163     Entity_Instance_ID: k8s_application=<appname>
3164     Severity: major
3165     Proposed_Repair_Action: "Retry applying the application. Check application is managed by the system application framework.
3166                              If the issue persists, please check system inventory log for cause."
3167     Maintenance_Action:
3168     Inhibit_Alarms:
3169     Alarm_Type: processing-error
3170     Probable_Cause: unknown
3171     Service_Affecting: True
3172     Suppression: True
3173     Management_Affecting_Severity: none
3174     Degrade_Affecting_Severity: none
3175     Context: starlingx
3176
3177 750.003:
3178     Type: Alarm
3179     Description: "Application Remove Failure"
3180     Entity_Instance_ID: k8s_application=<appname>
3181     Severity: major
3182     Proposed_Repair_Action: "Retry removing the application. If the issue persists, please check system inventory log for cause."
3183     Maintenance_Action:
3184     Inhibit_Alarms:
3185     Alarm_Type: processing-error
3186     Probable_Cause: unknown
3187     Service_Affecting: True
3188     Suppression: True
3189     Management_Affecting_Severity: none
3190     Degrade_Affecting_Severity: none
3191     Context: starlingx
3192
3193 750.004:
3194     Type: Alarm
3195     Description: "Application Apply In Progress"
3196     Entity_Instance_ID: k8s_application=<appname>
3197     Severity: warning
3198     Proposed_Repair_Action: "No action required."
3199     Maintenance_Action:
3200     Inhibit_Alarms:
3201     Alarm_Type: other
3202     Probable_Cause: unknown
3203     Service_Affecting: True
3204     Suppression: True
3205     Management_Affecting_Severity: warning
3206     Degrade_Affecting_Severity: none
3207     Context: starlingx
3208
3209 750.005:
3210     Type: Alarm
3211     Description: "Application Update In Progress"
3212     Entity_Instance_ID: k8s_application=<appname>
3213     Severity: warning
3214     Proposed_Repair_Action: "No action required."
3215     Maintenance_Action:
3216     Inhibit_Alarms:
3217     Alarm_Type: other
3218     Probable_Cause: unknown
3219     Service_Affecting: True
3220     Suppression: True
3221     Management_Affecting_Severity: warning
3222     Degrade_Affecting_Severity: none
3223     Context: starlingx
3224
3225 750.006:
3226     Type: Alarm
3227     Description: "Automatic Application Re-Apply Is Pending"
3228     Entity_Instance_ID: k8s_application=<appname>
3229     Severity: warning
3230     Proposed_Repair_Action: "Ensure all hosts are either locked or unlocked.  When the system is stable the application will be automatically reapplied."
3231     Maintenance_Action:
3232     Inhibit_Alarms:
3233     Alarm_Type: other
3234     Probable_Cause: unknown
3235     Service_Affecting: False
3236     Suppression: True
3237     Management_Affecting_Severity: none
3238     Degrade_Affecting_Severity: none
3239     Context: starlingx
3240
3241 #---------------------------------------------------------------------------
3242 #   STORAGE
3243 #---------------------------------------------------------------------------
3244
3245 800.001:
3246     Type: Alarm
3247     Description: |-
3248         Possible data loss. Any mds, mon or osd is unavailable in storage replication group.
3249     Entity_Instance_ID: cluster=<dist-fs-uuid>
3250     Severity: [critical, major]
3251     Proposed_Repair_Action: "Manually restart Ceph processes and check the state of the Ceph cluster with
3252                              'ceph -s'
3253                              If problem persists, contact next level of support."
3254     Maintenance_Action:
3255     Inhibit_Alarms:
3256     Alarm_Type: equipment
3257     Probable_Cause: equipment-malfunction
3258     Service_Affecting:
3259         critical: True
3260         major: False
3261     Suppression: False
3262     Management_Affecting_Severity: warning
3263     Degrade_Affecting_Severity: none
3264     Context: starlingx
3265
3266 800.010:
3267     Type: Alarm
3268     Description: |-
3269         Potential data loss. No available OSDs in storage replication group.
3270     Entity_Instance_ID: cluster=<dist-fs-uuid>.peergroup=<group-x>
3271     Severity: [critical]
3272     Proposed_Repair_Action: "Ensure storage hosts from replication group are unlocked and available.
3273                              Check replication group state with 'system host-list'
3274                              Check if OSDs of each storage host are up and running.
3275                              Manually restart Ceph processes and check the state of the Ceph OSDs with
3276                              'ceph osd stat' OR 'ceph osd tree'
3277                              If problem persists, contact next level of support."
3278     Maintenance_Action:
3279     Inhibit_Alarms:
3280     Alarm_Type: equipment
3281     Probable_Cause: equipment-malfunction
3282     Service_Affecting:
3283         critical: True
3284     Suppression: False
3285     Management_Affecting_Severity: warning
3286     Degrade_Affecting_Severity: none
3287     Context: starlingx
3288
3289 800.011:
3290     Type: Alarm
3291     Description: |-
3292         Loss of replication in peergroup.
3293     Entity_Instance_ID: cluster=<dist-fs-uuid>.peergroup=<group-x>
3294     Severity: [major]
3295     Proposed_Repair_Action: "Ensure storage hosts from replication group are unlocked and available.
3296                              Check replication group state with 'system host-list'
3297                              Check if OSDs of each storage host are up and running.
3298                              Manually restart Ceph processes and check the state of the Ceph OSDs with
3299                              'ceph osd stat' AND/OR 'ceph osd tree'
3300                              If problem persists, contact next level of support."
3301     Maintenance_Action:
3302     Inhibit_Alarms:
3303     Alarm_Type: equipment
3304     Probable_Cause: equipment-malfunction
3305     Service_Affecting:
3306         major: True
3307     Suppression: False
3308     Management_Affecting_Severity: warning
3309     Degrade_Affecting_Severity: none
3310     Context: starlingx
3311
3312 800.002:
3313     Type: Alarm
3314     Description: |-
3315         Image storage media is full: There is not enough disk space on the image storage media.
3316         OR
3317         Instance <instance name> snapshot failed: There is not enough disk space on the image storage media.
3318         OR
3319         Supplied <attrs> (<supplied>) and <attrs> generated from uploaded image (<actual>) did not match. Setting image status to 'killed'.
3320         OR
3321         Error in store configuration. Adding images to store is disabled.
3322         OR
3323         Forbidden upload attempt: <exception>.
3324         OR
3325         Insufficient permissions on image storage media: <exception>.
3326         OR
3327         Denying attempt to upload image larger than <size> bytes.
3328         OR
3329         Denying attempt to upload image because it exceeds the quota: <exception>.
3330         OR
3331         Received HTTP error while uploading image <image_id>.
3332         OR
3333         Client disconnected before sending all data to backend.
3334         OR
3335         Failed to upload image <image_id>.
3336     Entity_Instance_ID:
3337         image=<image-uuid> instance=<instance-uuid>
3338         OR
3339         tenant=<tenant-uuid> instance=<instance-uuid>
3340         OR
3341         image=<image-uuid> instance=<instance-uuid>
3342         OR
3343         image=<image-uuid> instance=<instance-uuid>
3344         OR
3345         image=<image-uuid> instance=<instance-uuid>
3346         OR
3347         image=<image-uuid> instance=<instance-uuid>
3348         OR
3349         image=<image-uuid> instance=<instance-uuid>
3350         OR
3351         image=<image-uuid> instance=<instance-uuid>
3352         OR
3353         image=<image-uuid> instance=<instance-uuid>
3354         OR
3355         image=<image-uuid> instance=<instance-uuid>
3356         OR
3357         image=<image-uuid> instance=<instance-uuid>
3358     Alarm_Type: [physical-violation,
3359                  physical-violation,
3360                  integrity-violation,
3361                  integrity-violation,
3362                  security-service-or-mechanism-violation,
3363                  security-service-or-mechanism-violation,
3364                  security-service-or-mechanism-violation,
3365                  security-service-or-mechanism-violation,
3366                  communication,
3367                  communication,
3368                  operational-violation]
3369     Severity: warning
3370     Proposed_Repair_Action:
3371     Maintenance_Action:
3372     Inhibit_Alarms:
3373     Probable_Cause: unspecified-reason
3374     Service_Affecting: False
3375     Suppression: False
3376     Management_Affecting_Severity: none
3377     Degrade_Affecting_Severity: none
3378     Context: openstack
3379
3380 800.100:
3381     Type: Alarm
3382     Description: |-
3383         Storage Alarm Condition:
3384         Cinder I/O Congestion is above normal range and is building
3385     Entity_Instance_ID: cinder_io_monitor
3386     Severity: major
3387     Proposed_Repair_Action: "Reduce the I/O load on the Cinder LVM backend. Use
3388                              Cinder QoS mechanisms on high usage volumes."
3389     Maintenance_Action:
3390     Inhibit_Alarms:
3391     Alarm_Type: qos
3392     Probable_Cause: congestion
3393     Service_Affecting: False
3394     Suppression: False
3395     Management_Affecting_Severity: none
3396     Degrade_Affecting_Severity: none
3397     Context: openstack
3398
3399 800.101:
3400     Type: Alarm
3401     Description: |-
3402         Storage Alarm Condition:
3403         Cinder I/O Congestion is high and impacting guest performance
3404     Entity_Instance_ID: cinder_io_monitor
3405     Severity: critical
3406     Proposed_Repair_Action: "Reduce the I/O load on the Cinder LVM backend.
3407                              Cinder actions may fail until congestion is reduced.
3408                              Use Cinder QoS mechanisms on high usage volumes."
3409     Maintenance_Action:
3410     Inhibit_Alarms:
3411     Alarm_Type: qos
3412     Probable_Cause: congestion
3413     Service_Affecting: False
3414     Suppression: False
3415     Management_Affecting_Severity: warning
3416     Degrade_Affecting_Severity: none
3417     Context: openstack
3418
3419 800.104:
3420     Type: Alarm
3421     Description: |-
3422         Storage Alarm Condition:
3423         <storage-backend-name> configuration failed to apply on host: <host-uuid>.
3424     Entity_Instance_ID: storage_backend=<storage-backend-name>
3425     Severity: critical
3426     Proposed_Repair_Action: "Update backend setting to reapply configuration.
3427                              Use the following commands to try again:
3428                              'system storage-backend-delete <storage-backend-name>'
3429                              AND
3430                              'system storage-backend-add <storage-backend-name>'
3431                              See the |prod-long| documentation at |docs-url| for more details.
3432                              If problem persists, contact next level of support."
3433     Maintenance_Action:
3434     Inhibit_Alarms:
3435     Alarm_Type: equipment
3436     Probable_Cause: configuration-or-customization-error
3437     Service_Affecting: True
3438     Suppression: False
3439     Management_Affecting_Severity: major
3440     Degrade_Affecting_Severity: none
3441     Context: starlingx
3442
3443 800.105:
3444     Type: Alarm
3445     Description: |-
3446         Filesystem Alarm Condition:
3447         <controllerfs_name> controller filesystem was not created/deleted successfully.
3448     Entity_Instance_ID: host=<hostname>.controllerfs=<controllerfs_name>
3449     Severity: major
3450     Proposed_Repair_Action: "Use the create or delete command again:
3451                              'system controllerfs-delete' or 'system controllerfs-add'.
3452                              If problem persists, contact next level of support."
3453     Maintenance_Action:
3454     Inhibit_Alarms:
3455     Alarm_Type: processing-error
3456     Probable_Cause: unspecified-reason
3457     Service_Affecting: True
3458     Suppression: False
3459     Management_Affecting_Severity: major
3460     Degrade_Affecting_Severity: none
3461     Context: starlingx
3462
3463 #---------------------------------------------------------------------------
3464 #   KUBERNETES
3465 #---------------------------------------------------------------------------
3466
3467 850.001:
3468     Type: Alarm
3469     Description: Persistent Volume Migration Error
3470     Entity_Instance_ID: kubernetes=PV-migration-failed
3471     Severity: major
3472     Proposed_Repair_Action: "Manually execute /usr/bin/ceph_k8s_update_monitors.sh
3473                              to confirm PVs are updated, then lock/unlock to clear
3474                              alarms. If problem persists, contact next level of
3475                              support."
3476     Maintenance_Action:
3477     Inhibit_Alarms:
3478     Alarm_Type: processing-error
3479     Probable_Cause: communication-subsystem-failure
3480     Service_Affecting: False
3481     Suppression: False
3482     Management_Affecting_Severity: none
3483     Degrade_Affecting_Severity: none
3484     Context: none
3485
3486 850.002:
3487     Type: Alarm
3488     Description: Kubernetes cluster unreachable
3489     Entity_Instance_ID: kubernetes=k8s-health-check-failed
3490     Severity: major
3491     Proposed_Repair_Action: "If problem persists
3492                              contact next level of support."
3493     Maintenance_Action:
3494     Inhibit_Alarms:
3495     Alarm_Type: communication
3496     Probable_Cause: out-of-service
3497     Service_Affecting: True
3498     Suppression: False
3499     Management_Affecting_Severity: major
3500     Degrade_Affecting_Severity: none
3501     Context: none
3502
3503 #---------------------------------------------------------------------------
3504 #   SOFTWARE
3505 #---------------------------------------------------------------------------
3506
3507 900.001:
3508     Type: Alarm
3509     Description: Patching operation in progress.
3510     Entity_Instance_ID: host=controller
3511     Severity: minor
3512     Proposed_Repair_Action: Complete reboots of affected hosts.
3513     Maintenance_Action:
3514     Inhibit_Alarms:
3515     Alarm_Type: environmental
3516     Probable_Cause: unspecified-reason
3517     Service_Affecting: False
3518     Suppression: False
3519     Management_Affecting_Severity: warning
3520     Degrade_Affecting_Severity: none
3521     Context: starlingx
3522
3523 900.002:
3524     Type: Alarm
3525     Description: Patch host install failure. Command "sw-patch host-install" failed.
3526     Entity_Instance_ID: host=<hostname>
3527     Severity: major
3528     Proposed_Repair_Action: Undo patching operation. Check patch logs on the target host (i.e. /var/log/patching.log)
3529     Maintenance_Action:
3530     Inhibit_Alarms:
3531     Alarm_Type: environmental
3532     Probable_Cause: unspecified-reason
3533     Service_Affecting: False
3534     Suppression: False
3535     Management_Affecting_Severity: warning
3536     Degrade_Affecting_Severity: none
3537     Context: starlingx
3538
3539 900.003:
3540     Type: Alarm
3541     Description: A patch with state 'obsolete' in its metadata has been uploaded.
3542     Entity_Instance_ID: host=controller
3543     Severity: warning
3544     Proposed_Repair_Action: Remove and delete obsolete patches.
3545     Maintenance_Action:
3546     Inhibit_Alarms:
3547     Alarm_Type: environmental
3548     Probable_Cause: unspecified-reason
3549     Service_Affecting: False
3550     Suppression: False
3551     Management_Affecting_Severity: warning
3552     Degrade_Affecting_Severity: none
3553     Context: starlingx
3554
3555 900.004:
3556     Type: Alarm
3557     Description: The upgrade and running software version do not match. Command host-upgrade failed.
3558     Entity_Instance_ID: host=<hostname>
3559     Severity: major
3560     Proposed_Repair_Action: Reinstall host to update applied load.
3561     Maintenance_Action:
3562     Inhibit_Alarms:
3563     Alarm_Type: operational-violation
3564     Probable_Cause: unspecified-reason
3565     Service_Affecting: True
3566     Suppression: False
3567     Management_Affecting_Severity: warning
3568     Degrade_Affecting_Severity: none
3569     Context: starlingx
3570
3571 900.005:
3572     Type: Alarm
3573     Description: System Upgrade in progress.
3574     Entity_Instance_ID: host=controller
3575     Severity: minor
3576     Proposed_Repair_Action: No action required.
3577     Maintenance_Action:
3578     Inhibit_Alarms:
3579     Alarm_Type: operational-violation
3580     Probable_Cause: unspecified-reason
3581     Service_Affecting: False
3582     Suppression: False
3583     Management_Affecting_Severity: warning
3584     Degrade_Affecting_Severity: none
3585     Context: starlingx
3586
3587 900.006:
3588     Type: Alarm
3589     Description: Device image update operation in progress.
3590     Entity_Instance_ID: host=controller
3591     Severity: minor
3592     Proposed_Repair_Action: Complete reboots of affected hosts.
3593     Maintenance_Action:
3594     Inhibit_Alarms:
3595     Alarm_Type: environmental
3596     Probable_Cause: unspecified-reason
3597     Service_Affecting: False
3598     Suppression: False
3599     Management_Affecting_Severity: warning
3600     Degrade_Affecting_Severity: none
3601     Context: starlingx
3602
3603 900.007:
3604     Type: Alarm
3605     Description: Kubernetes upgrade in progress.
3606     Entity_Instance_ID: host=controller
3607     Severity: minor
3608     Proposed_Repair_Action: No action required.
3609     Maintenance_Action:
3610     Inhibit_Alarms:
3611     Alarm_Type: operational-violation
3612     Probable_Cause: unspecified-reason
3613     Service_Affecting: False
3614     Suppression: False
3615     Management_Affecting_Severity: warning
3616     Degrade_Affecting_Severity: none
3617     Context: starlingx
3618
3619 900.008:
3620     Type: Alarm
3621     Description: Kubernetes rootca update in progress
3622     Entity_Instance_ID: host=controller
3623     Severity: minor
3624     Proposed_Repair_Action: Wait for kubernetes rootca procedure to complete
3625     Maintenance_Action:
3626     Inhibit_Alarms:
3627     Alarm_Type: operational-violation
3628     Probable_Cause: unspecified-reason
3629     Service_Affecting: False
3630     Suppression: False
3631     Management_Affecting_Severity: warning
3632     Degrade_Affecting_Severity: none
3633     Context: starlingx
3634
3635 900.009:
3636     Type: Alarm
3637     Description: Kubernetes root CA update aborted, certificates may not be fully updated. Command "system kube-rootca-update-abort" has been run.
3638     Entity_Instance_ID: host=controller
3639     Severity: minor
3640     Proposed_Repair_Action: Fully update certificates by a new root CA update.
3641     Maintenance_Action:
3642     Inhibit_Alarms:
3643     Alarm_Type: operational-violation
3644     Probable_Cause: unspecified-reason
3645     Service_Affecting: False
3646     Suppression: False
3647     Management_Affecting_Severity: warning
3648     Degrade_Affecting_Severity: none
3649     Context: starlingx
3650
3651 900.010:
3652     Type: Alarm
3653     Description: System Config update in progress
3654     Entity_Instance_ID: host=controller
3655     Severity: minor
3656     Proposed_Repair_Action: Wait for system config update to complete
3657     Maintenance_Action:
3658     Inhibit_Alarms:
3659     Alarm_Type: operational-violation
3660     Probable_Cause: unspecified-reason
3661     Service_Affecting: False
3662     Suppression: False
3663     Management_Affecting_Severity: warning
3664     Degrade_Affecting_Severity: none
3665     Context: starlingx
3666
3667 900.011:
3668     Type: Alarm
3669     Description: System Config update aborted, configurations may not be fully updated
3670     Entity_Instance_ID: host=<hostname>
3671     Severity: minor
3672     Proposed_Repair_Action: Lock the host, wait for the host resource in the deployment namespace to become in-sync, then unlock the host
3673     Maintenance_Action:
3674     Inhibit_Alarms:
3675     Alarm_Type: operational-violation
3676     Probable_Cause: unspecified-reason
3677     Service_Affecting: False
3678     Suppression: False
3679     Management_Affecting_Severity: warning
3680     Degrade_Affecting_Severity: none
3681     Context: starlingx
3682
3683 900.020:
3684     Type: Alarm
3685     Description: Deploy host completed with success
3686     Entity_Instance_ID: host=<hostname>
3687     Severity: warning
3688     Proposed_Repair_Action: Unlock host
3689     Maintenance_Action:
3690     Inhibit_Alarms:
3691     Alarm_Type: equipment
3692     Probable_Cause: unspecified-reason
3693     Service_Affecting: False
3694     Suppression: False
3695     Management_Affecting_Severity: none
3696     Degrade_Affecting_Severity: none
3697     Context: starlingx
3698
3699 900.021:
3700     Type: Alarm
3701     Description: Deploy host failed
3702     Entity_Instance_ID: host=<hostname>
3703     Severity: major
3704     Proposed_Repair_Action: Check the logs for errors, fix the issues manually and retry
3705     Maintenance_Action:
3706     Inhibit_Alarms:
3707     Alarm_Type: equipment
3708     Probable_Cause: unspecified-reason
3709     Service_Affecting: True
3710     Suppression: False
3711     Management_Affecting_Severity: warning
3712     Degrade_Affecting_Severity: none
3713     Context: starlingx
3714
3715 900.101:
3716     Type: Alarm
3717     Description: Software patch auto-apply in progress
3718     Entity_Instance_ID: orchestration=sw-patch
3719     Severity: major
3720     Proposed_Repair_Action: Wait for software patch auto-apply to complete; if problem persists contact next level of support
3721     Maintenance_Action:
3722     Inhibit_Alarms:
3723     Alarm_Type: equipment
3724     Probable_Cause: unspecified-reason
3725     Service_Affecting: True
3726     Suppression: True
3727     Management_Affecting_Severity: warning
3728     Degrade_Affecting_Severity: none
3729     Context: starlingx
3730
3731 900.102:
3732     Type: Alarm
3733     Description: Software patch auto-apply aborting
3734     Entity_Instance_ID: orchestration=sw-patch
3735     Severity: major
3736     Proposed_Repair_Action: Wait for software patch auto-apply abort to complete; if problem persists contact next level of support
3737     Maintenance_Action:
3738     Inhibit_Alarms:
3739     Alarm_Type: equipment
3740     Probable_Cause: unspecified-reason
3741     Service_Affecting: True
3742     Suppression: True
3743     Management_Affecting_Severity: warning
3744     Degrade_Affecting_Severity: none
3745     Context: starlingx
3746
3747 900.103:
3748     Type: Alarm
3749     Description: Software patch auto-apply failed. Command "sw-manager patch-strategy apply" failed.
3750     Entity_Instance_ID: orchestration=sw-patch
3751     Severity: critical
3752     Proposed_Repair_Action: Attempt to apply software patches manually; if problem persists contact next level of support
3753     Maintenance_Action:
3754     Inhibit_Alarms:
3755     Alarm_Type: equipment
3756     Probable_Cause: underlying-resource-unavailable
3757     Service_Affecting: True
3758     Suppression: True
3759     Management_Affecting_Severity: warning
3760     Degrade_Affecting_Severity: none
3761     Context: starlingx
3762
3763 900.111:
3764     Type: Log
3765     Description: Software patch auto-apply start
3766     Entity_Instance_ID: orchestration=sw-patch
3767     Severity: critical
3768     Alarm_Type: equipment
3769     Probable_Cause: unspecified-reason
3770     Service_Affecting: False
3771     Context: starlingx
3772
3773 900.112:
3774     Type: Log
3775     Description: Software patch auto-apply in progress
3776     Entity_Instance_ID: orchestration=sw-patch
3777     Severity: critical
3778     Alarm_Type: equipment
3779     Probable_Cause: unspecified-reason
3780     Service_Affecting: False
3781     Context: starlingx
3782
3783 900.113:
3784     Type: Log
3785     Description: Software patch auto-apply rejected
3786     Entity_Instance_ID: orchestration=sw-patch
3787     Severity: critical
3788     Alarm_Type: equipment
3789     Probable_Cause: unspecified-reason
3790     Service_Affecting: False
3791     Context: starlingx
3792
3793 900.114:
3794     Type: Log
3795     Description: Software patch auto-apply cancelled
3796     Entity_Instance_ID: orchestration=sw-patch
3797     Severity: critical
3798     Alarm_Type: equipment
3799     Probable_Cause: unspecified-reason
3800     Service_Affecting: False
3801     Context: starlingx
3802
3803 900.115:
3804     Type: Log
3805     Description: Software patch auto-apply failed
3806     Entity_Instance_ID: orchestration=sw-patch
3807     Severity: critical
3808     Alarm_Type: equipment
3809     Probable_Cause: unspecified-reason
3810     Service_Affecting: False
3811     Context: starlingx
3812
3813 900.116:
3814     Type: Log
3815     Description: Software patch auto-apply completed
3816     Entity_Instance_ID: orchestration=sw-patch
3817     Severity: critical
3818     Alarm_Type: equipment
3819     Probable_Cause: unspecified-reason
3820     Service_Affecting: False
3821     Context: starlingx
3822
3823 900.117:
3824     Type: Log
3825     Description: Software patch auto-apply abort
3826     Entity_Instance_ID: orchestration=sw-patch
3827     Severity: critical
3828     Alarm_Type: equipment
3829     Probable_Cause: unspecified-reason
3830     Service_Affecting: False
3831     Context: starlingx
3832
3833 900.118:
3834     Type: Log
3835     Description: Software patch auto-apply aborting
3836     Entity_Instance_ID: orchestration=sw-patch
3837     Severity: critical
3838     Alarm_Type: equipment
3839     Probable_Cause: unspecified-reason
3840     Service_Affecting: False
3841     Context: starlingx
3842
3843 900.119:
3844     Type: Log
3845     Description: Software patch auto-apply abort rejected
3846     Entity_Instance_ID: orchestration=sw-patch
3847     Severity: critical
3848     Alarm_Type: equipment
3849     Probable_Cause: unspecified-reason
3850     Service_Affecting: False
3851     Context: starlingx
3852
3853 900.120:
3854     Type: Log
3855     Description: Software patch auto-apply abort failed
3856     Entity_Instance_ID: orchestration=sw-patch
3857     Severity: critical
3858     Alarm_Type: equipment
3859     Probable_Cause: unspecified-reason
3860     Service_Affecting: False
3861     Context: starlingx
3862
3863 900.121:
3864     Type: Log
3865     Description: Software patch auto-apply aborted
3866     Entity_Instance_ID: orchestration=sw-patch
3867     Severity: critical
3868     Alarm_Type: equipment
3869     Probable_Cause: unspecified-reason
3870     Service_Affecting: False
3871     Context: starlingx
3872
3873 900.201:
3874     Type: Alarm
3875     Description: Software upgrade auto-apply in progress
3876     Entity_Instance_ID: orchestration=sw-upgrade
3877     Severity: major
3878     Proposed_Repair_Action: Wait for software upgrade auto-apply to complete; if problem persists contact next level of support
3879     Maintenance_Action:
3880     Inhibit_Alarms:
3881     Alarm_Type: equipment
3882     Probable_Cause: unspecified-reason
3883     Service_Affecting: True
3884     Suppression: True
3885     Management_Affecting_Severity: warning
3886     Degrade_Affecting_Severity: none
3887     Context: starlingx
3888
3889 900.202:
3890     Type: Alarm
3891     Description: Software upgrade auto-apply aborting
3892     Entity_Instance_ID: orchestration=sw-upgrade
3893     Severity: major
3894     Proposed_Repair_Action: Wait for software upgrade auto-apply abort to complete; if problem persists contact next level of support
3895     Maintenance_Action:
3896     Inhibit_Alarms:
3897     Alarm_Type: equipment
3898     Probable_Cause: unspecified-reason
3899     Service_Affecting: True
3900     Suppression: True
3901     Management_Affecting_Severity: warning
3902     Degrade_Affecting_Severity: none
3903     Context: starlingx
3904
3905 900.203:
3906     Type: Alarm
3907     Description: Software upgrade auto-apply failed. Command "sw-manager update-strategy apply" failed
3908     Entity_Instance_ID: orchestration=sw-upgrade
3909     Severity: critical
3910     Proposed_Repair_Action: Attempt to apply software upgrade manually; if problem persists contact next level of support
3911     Maintenance_Action:
3912     Inhibit_Alarms:
3913     Alarm_Type: equipment
3914     Probable_Cause: underlying-resource-unavailable
3915     Service_Affecting: True
3916     Suppression: True
3917     Management_Affecting_Severity: warning
3918     Degrade_Affecting_Severity: none
3919     Context: starlingx
3920
3921 900.211:
3922     Type: Log
3923     Description: Software upgrade auto-apply start
3924     Entity_Instance_ID: orchestration=sw-upgrade
3925     Severity: critical
3926     Alarm_Type: equipment
3927     Probable_Cause: unspecified-reason
3928     Service_Affecting: False
3929     Context: starlingx
3930
3931 900.212:
3932     Type: Log
3933     Description: Software upgrade auto-apply in progress
3934     Entity_Instance_ID: orchestration=sw-upgrade
3935     Severity: critical
3936     Alarm_Type: equipment
3937     Probable_Cause: unspecified-reason
3938     Service_Affecting: False
3939     Context: starlingx
3940
3941 900.213:
3942     Type: Log
3943     Description: Software upgrade auto-apply rejected
3944     Entity_Instance_ID: orchestration=sw-upgrade
3945     Severity: critical
3946     Alarm_Type: equipment
3947     Probable_Cause: unspecified-reason
3948     Service_Affecting: False
3949     Context: starlingx
3950
3951 900.214:
3952     Type: Log
3953     Description: Software upgrade auto-apply cancelled
3954     Entity_Instance_ID: orchestration=sw-upgrade
3955     Severity: critical
3956     Alarm_Type: equipment
3957     Probable_Cause: unspecified-reason
3958     Service_Affecting: False
3959     Context: starlingx
3960
3961 900.215:
3962     Type: Log
3963     Description: Software upgrade auto-apply failed
3964     Entity_Instance_ID: orchestration=sw-upgrade
3965     Severity: critical
3966     Alarm_Type: equipment
3967     Probable_Cause: unspecified-reason
3968     Service_Affecting: False
3969     Context: starlingx
3970
3971 900.216:
3972     Type: Log
3973     Description: Software upgrade auto-apply completed
3974     Entity_Instance_ID: orchestration=sw-upgrade
3975     Severity: critical
3976     Alarm_Type: equipment
3977     Probable_Cause: unspecified-reason
3978     Service_Affecting: False
3979     Context: starlingx
3980
3981 900.217:
3982     Type: Log
3983     Description: Software upgrade auto-apply abort
3984     Entity_Instance_ID: orchestration=sw-upgrade
3985     Severity: critical
3986     Alarm_Type: equipment
3987     Probable_Cause: unspecified-reason
3988     Service_Affecting: False
3989     Context: starlingx
3990
3991 900.218:
3992     Type: Log
3993     Description: Software upgrade auto-apply aborting
3994     Entity_Instance_ID: orchestration=sw-upgrade
3995     Severity: critical
3996     Alarm_Type: equipment
3997     Probable_Cause: unspecified-reason
3998     Service_Affecting: False
3999     Context: starlingx
4000
4001 900.219:
4002     Type: Log
4003     Description: Software upgrade auto-apply abort rejected
4004     Entity_Instance_ID: orchestration=sw-upgrade
4005     Severity: critical
4006     Alarm_Type: equipment
4007     Probable_Cause: unspecified-reason
4008     Service_Affecting: False
4009     Context: starlingx
4010
4011 900.220:
4012     Type: Log
4013     Description: Software upgrade auto-apply abort failed
4014     Entity_Instance_ID: orchestration=sw-upgrade
4015     Severity: critical
4016     Alarm_Type: equipment
4017     Probable_Cause: unspecified-reason
4018     Service_Affecting: False
4019     Context: starlingx
4020
4021 900.221:
4022     Type: Log
4023     Description: Software upgrade auto-apply aborted
4024     Entity_Instance_ID: orchestration=sw-upgrade
4025     Severity: critical
4026     Alarm_Type: equipment
4027     Probable_Cause: unspecified-reason
4028     Service_Affecting: False
4029     Context: starlingx
4030
4031 900.231:
4032     Type: Alarm
4033     Description: Software deploy state out of sync
4034     Entity_Instance_ID: orchestration=sw-upgrade
4035     Severity: major
4036     Proposed_Repair_Action: Wait for the deployment on the active controller to complete. If problem persists contact next level of support
4037     Maintenance_Action:
4038     Inhibit_Alarms:
4039     Alarm_Type: equipment
4040     Probable_Cause: unspecified-reason
4041     Service_Affecting: True
4042     Suppression: False
4043     Management_Affecting_Severity: warning
4044     Degrade_Affecting_Severity: none
4045     Context: starlingx
4046
4047 900.301:
4048     Type: Alarm
4049     Description: Firmware Update auto-apply in progress
4050     Entity_Instance_ID: orchestration=fw-update
4051     Severity: major
4052     Proposed_Repair_Action: Wait for firmware update auto-apply to complete; if problem persists contact next level of support
4053     Maintenance_Action:
4054     Inhibit_Alarms:
4055     Alarm_Type: equipment
4056     Probable_Cause: unspecified-reason
4057     Service_Affecting: True
4058     Suppression: True
4059     Management_Affecting_Severity: warning
4060     Degrade_Affecting_Severity: none
4061     Context: starlingx
4062
4063 900.302:
4064     Type: Alarm
4065     Description: Firmware Update auto-apply aborting
4066     Entity_Instance_ID: orchestration=fw-update
4067     Severity: major
4068     Proposed_Repair_Action: Wait for firmware update auto-apply abort to complete; if problem persists contact next level of support
4069     Maintenance_Action:
4070     Inhibit_Alarms:
4071     Alarm_Type: equipment
4072     Probable_Cause: unspecified-reason
4073     Service_Affecting: True
4074     Suppression: True
4075     Management_Affecting_Severity: warning
4076     Degrade_Affecting_Severity: none
4077     Context: starlingx
4078
4079 900.303:
4080     Type: Alarm
4081     Description: Firmware Update auto-apply failed. Command "sw-manager kube-rootca-update-strategy apply" failed.
4082     Entity_Instance_ID: orchestration=fw-update
4083     Severity: critical
4084     Proposed_Repair_Action: Attempt to apply firmware update manually; if problem persists contact next level of support
4085     Maintenance_Action:
4086     Inhibit_Alarms:
4087     Alarm_Type: equipment
4088     Probable_Cause: underlying-resource-unavailable
4089     Service_Affecting: True
4090     Suppression: True
4091     Management_Affecting_Severity: warning
4092     Degrade_Affecting_Severity: none
4093     Context: starlingx
4094
4095 900.311:
4096     Type: Log
4097     Description: Firmware update auto-apply start
4098     Entity_Instance_ID: orchestration=fw-update
4099     Severity: critical
4100     Alarm_Type: equipment
4101     Probable_Cause: unspecified-reason
4102     Service_Affecting: False
4103     Context: starlingx
4104
4105 900.312:
4106     Type: Log
4107     Description: Firmware update auto-apply in progress
4108     Entity_Instance_ID: orchestration=fw-update
4109     Severity: critical
4110     Alarm_Type: equipment
4111     Probable_Cause: unspecified-reason
4112     Service_Affecting: False
4113     Context: starlingx
4114
4115 900.313:
4116     Type: Log
4117     Description: Firmware update auto-apply rejected
4118     Entity_Instance_ID: orchestration=fw-update
4119     Severity: critical
4120     Alarm_Type: equipment
4121     Probable_Cause: unspecified-reason
4122     Service_Affecting: False
4123     Context: starlingx
4124
4125 900.314:
4126     Type: Log
4127     Description: Firmware update auto-apply cancelled
4128     Entity_Instance_ID: orchestration=fw-update
4129     Severity: critical
4130     Alarm_Type: equipment
4131     Probable_Cause: unspecified-reason
4132     Service_Affecting: False
4133     Context: starlingx
4134
4135 900.315:
4136     Type: Log
4137     Description: Firmware update auto-apply failed
4138     Entity_Instance_ID: orchestration=fw-update
4139     Severity: critical
4140     Alarm_Type: equipment
4141     Probable_Cause: unspecified-reason
4142     Service_Affecting: False
4143     Context: starlingx
4144
4145 900.316:
4146     Type: Log
4147     Description: Firmware update auto-apply completed
4148     Entity_Instance_ID: orchestration=fw-update
4149     Severity: critical
4150     Alarm_Type: equipment
4151     Probable_Cause: unspecified-reason
4152     Service_Affecting: False
4153     Context: starlingx
4154
4155 900.317:
4156     Type: Log
4157     Description: Firmware update auto-apply abort
4158     Entity_Instance_ID: orchestration=fw-update
4159     Severity: critical
4160     Alarm_Type: equipment
4161     Probable_Cause: unspecified-reason
4162     Service_Affecting: False
4163     Context: starlingx
4164
4165 900.318:
4166     Type: Log
4167     Description: Firmware update auto-apply aborting
4168     Entity_Instance_ID: orchestration=fw-update
4169     Severity: critical
4170     Alarm_Type: equipment
4171     Probable_Cause: unspecified-reason
4172     Service_Affecting: False
4173     Context: starlingx
4174
4175 900.319:
4176     Type: Log
4177     Description: Firmware update auto-apply abort rejected
4178     Entity_Instance_ID: orchestration=fw-update
4179     Severity: critical
4180     Alarm_Type: equipment
4181     Probable_Cause: unspecified-reason
4182     Service_Affecting: False
4183     Context: starlingx
4184
4185 900.320:
4186     Type: Log
4187     Description: Firmware update auto-apply abort failed
4188     Entity_Instance_ID: orchestration=fw-update
4189     Severity: critical
4190     Alarm_Type: equipment
4191     Probable_Cause: unspecified-reason
4192     Service_Affecting: False
4193     Context: starlingx
4194
4195 900.321:
4196     Type: Log
4197     Description: Firmware update auto-apply aborted
4198     Entity_Instance_ID: orchestration=fw-update
4199     Severity: critical
4200     Alarm_Type: equipment
4201     Probable_Cause: unspecified-reason
4202     Service_Affecting: False
4203     Context: starlingx
4204
4205 900.401:
4206     Type: Alarm
4207     Description: Kubernetes upgrade auto-apply in progress
4208     Entity_Instance_ID: orchestration=kube-upgrade
4209     Severity: major
4210     Proposed_Repair_Action: Wait for kubernetes upgrade auto-apply to complete; if problem persists contact next level of support
4211     Maintenance_Action:
4212     Inhibit_Alarms:
4213     Alarm_Type: equipment
4214     Probable_Cause: unspecified-reason
4215     Service_Affecting: True
4216     Suppression: True
4217     Management_Affecting_Severity: warning
4218     Degrade_Affecting_Severity: none
4219     Context: none
4220
4221 900.402:
4222     Type: Alarm
4223     Description: Kubernetes upgrade auto-apply aborting
4224     Entity_Instance_ID: orchestration=kube-upgrade
4225     Severity: major
4226     Proposed_Repair_Action: Wait for kubernetes upgrade auto-apply abort to complete; if problem persists contact next level of support
4227     Maintenance_Action:
4228     Inhibit_Alarms:
4229     Alarm_Type: equipment
4230     Probable_Cause: unspecified-reason
4231     Service_Affecting: True
4232     Suppression: True
4233     Management_Affecting_Severity: warning
4234     Degrade_Affecting_Severity: none
4235     Context: none
4236
4237 900.403:
4238     Type: Alarm
4239     Description: Kubernetes upgrade auto-apply failed
4240     Entity_Instance_ID: orchestration=kube-upgrade
4241     Severity: critical
4242     Proposed_Repair_Action: Attempt to apply kubernetes upgrade manually; if problem persists contact next level of support
4243     Maintenance_Action:
4244     Inhibit_Alarms:
4245     Alarm_Type: equipment
4246     Probable_Cause: underlying-resource-unavailable
4247     Service_Affecting: True
4248     Suppression: True
4249     Management_Affecting_Severity: warning
4250     Degrade_Affecting_Severity: none
4251     Context: none
4252
4253 900.411:
4254     Type: Log
4255     Description: Kubernetes upgrade auto-apply start
4256     Entity_Instance_ID: orchestration=kube-upgrade
4257     Severity: critical
4258     Alarm_Type: equipment
4259     Probable_Cause: unspecified-reason
4260     Service_Affecting: False
4261     Context: none
4262
4263 900.412:
4264     Type: Log
4265     Description: Kubernetes upgrade auto-apply in progress
4266     Entity_Instance_ID: orchestration=kube-upgrade
4267     Severity: critical
4268     Alarm_Type: equipment
4269     Probable_Cause: unspecified-reason
4270     Service_Affecting: False
4271     Context: none
4272
4273 900.413:
4274     Type: Log
4275     Description: Kubernetes upgrade auto-apply rejected
4276     Entity_Instance_ID: orchestration=kube-upgrade
4277     Severity: critical
4278     Alarm_Type: equipment
4279     Probable_Cause: unspecified-reason
4280     Service_Affecting: False
4281     Context: none
4282
4283 900.414:
4284     Type: Log
4285     Description: Kubernetes upgrade auto-apply cancelled
4286     Entity_Instance_ID: orchestration=kube-upgrade
4287     Severity: critical
4288     Alarm_Type: equipment
4289     Probable_Cause: unspecified-reason
4290     Service_Affecting: False
4291     Context: none
4292
4293 900.415:
4294     Type: Log
4295     Description: Kubernetes upgrade auto-apply failed
4296     Entity_Instance_ID: orchestration=kube-upgrade
4297     Severity: critical
4298     Alarm_Type: equipment
4299     Probable_Cause: unspecified-reason
4300     Service_Affecting: False
4301     Context: none
4302
4303 900.416:
4304     Type: Log
4305     Description: Kubernetes upgrade auto-apply completed
4306     Entity_Instance_ID: orchestration=kube-upgrade
4307     Severity: critical
4308     Alarm_Type: equipment
4309     Probable_Cause: unspecified-reason
4310     Service_Affecting: False
4311     Context: none
4312
4313 900.417:
4314     Type: Log
4315     Description: Kubernetes upgrade auto-apply abort
4316     Entity_Instance_ID: orchestration=kube-upgrade
4317     Severity: critical
4318     Alarm_Type: equipment
4319     Probable_Cause: unspecified-reason
4320     Service_Affecting: False
4321     Context: none
4322
4323 900.418:
4324     Type: Log
4325     Description: Kubernetes upgrade auto-apply aborting
4326     Entity_Instance_ID: orchestration=kube-upgrade
4327     Severity: critical
4328     Alarm_Type: equipment
4329     Probable_Cause: unspecified-reason
4330     Service_Affecting: False
4331     Context: none
4332
4333 900.419:
4334     Type: Log
4335     Description: Kubernetes upgrade auto-apply abort rejected
4336     Entity_Instance_ID: orchestration=kube-upgrade
4337     Severity: critical
4338     Alarm_Type: equipment
4339     Probable_Cause: unspecified-reason
4340     Service_Affecting: False
4341     Context: none
4342
4343 900.420:
4344     Type: Log
4345     Description: Kubernetes upgrade auto-apply abort failed
4346     Entity_Instance_ID: orchestration=kube-upgrade
4347     Severity: critical
4348     Alarm_Type: equipment
4349     Probable_Cause: unspecified-reason
4350     Service_Affecting: False
4351     Context: none
4352
4353 900.421:
4354     Type: Log
4355     Description: Kubernetes upgrade auto-apply aborted
4356     Entity_Instance_ID: orchestration=kube-upgrade
4357     Severity: critical
4358     Alarm_Type: equipment
4359     Probable_Cause: unspecified-reason
4360     Service_Affecting: False
4361     Context: none
4362
4363 900.501:
4364     Type: Alarm
4365     Description: Kubernetes rootca update auto-apply in progress
4366     Entity_Instance_ID: orchestration=kube-rootca-update
4367     Severity: major
4368     Proposed_Repair_Action: Wait for kubernetes rootca update auto-apply to complete; if problem persists contact next level of support
4369     Maintenance_Action:
4370     Inhibit_Alarms:
4371     Alarm_Type: equipment
4372     Probable_Cause: unspecified-reason
4373     Service_Affecting: True
4374     Suppression: True
4375     Management_Affecting_Severity: warning
4376     Degrade_Affecting_Severity: none
4377     Context: starlingx
4378
4379 900.502:
4380     Type: Alarm
4381     Description: Kubernetes rootca update auto-apply aborting
4382     Entity_Instance_ID: orchestration=kube-rootca-update
4383     Severity: major
4384     Proposed_Repair_Action: Wait for kubernetes rootca update auto-apply abort to complete; if problem persists contact next level of support
4385     Maintenance_Action:
4386     Inhibit_Alarms:
4387     Alarm_Type: equipment
4388     Probable_Cause: unspecified-reason
4389     Service_Affecting: True
4390     Suppression: True
4391     Management_Affecting_Severity: warning
4392     Degrade_Affecting_Severity: none
4393     Context: starlingx
4394
4395 900.503:
4396     Type: Alarm
4397     Description: Kubernetes rootca update auto-apply failed. Command "sw-manager kube-upgrade-strategy apply" failed.
4398     Entity_Instance_ID: orchestration=kube-rootca-update
4399     Severity: critical
4400     Proposed_Repair_Action: Attempt to apply kubernetes rootca update manually; if problem persists contact next level of support
4401     Maintenance_Action:
4402     Inhibit_Alarms:
4403     Alarm_Type: equipment
4404     Probable_Cause: underlying-resource-unavailable
4405     Service_Affecting: True
4406     Suppression: True
4407     Management_Affecting_Severity: warning
4408     Degrade_Affecting_Severity: none
4409     Context: starlingx
4410
4411 900.511:
4412     Type: Log
4413     Description: Kubernetes rootca update auto-apply start
4414     Entity_Instance_ID: orchestration=kube-rootca-update
4415     Severity: critical
4416     Alarm_Type: equipment
4417     Probable_Cause: unspecified-reason
4418     Service_Affecting: False
4419     Context: starlingx
4420
4421 900.512:
4422     Type: Log
4423     Description: Kubernetes rootca update auto-apply in progress
4424     Entity_Instance_ID: orchestration=kube-rootca-update
4425     Severity: critical
4426     Alarm_Type: equipment
4427     Probable_Cause: unspecified-reason
4428     Service_Affecting: False
4429     Context: starlingx
4430
4431 900.513:
4432     Type: Log
4433     Description: Kubernetes rootca update auto-apply rejected
4434     Entity_Instance_ID: orchestration=kube-rootca-update
4435     Severity: critical
4436     Alarm_Type: equipment
4437     Probable_Cause: unspecified-reason
4438     Service_Affecting: False
4439     Context: starlingx
4440
4441 900.514:
4442     Type: Log
4443     Description: Kubernetes rootca update auto-apply cancelled
4444     Entity_Instance_ID: orchestration=kube-rootca-update
4445     Severity: critical
4446     Alarm_Type: equipment
4447     Probable_Cause: unspecified-reason
4448     Service_Affecting: False
4449     Context: starlingx
4450
4451 900.515:
4452     Type: Log
4453     Description: Kubernetes rootca update auto-apply failed
4454     Entity_Instance_ID: orchestration=kube-rootca-update
4455     Severity: critical
4456     Alarm_Type: equipment
4457     Probable_Cause: unspecified-reason
4458     Service_Affecting: False
4459     Context: starlingx
4460
4461 900.516:
4462     Type: Log
4463     Description: Kubernetes rootca update auto-apply completed
4464     Entity_Instance_ID: orchestration=kube-rootca-update
4465     Severity: critical
4466     Alarm_Type: equipment
4467     Probable_Cause: unspecified-reason
4468     Service_Affecting: False
4469     Context: starlingx
4470
4471 900.517:
4472     Type: Log
4473     Description: Kubernetes rootca update auto-apply abort
4474     Entity_Instance_ID: orchestration=kube-rootca-update
4475     Severity: critical
4476     Alarm_Type: equipment
4477     Probable_Cause: unspecified-reason
4478     Service_Affecting: False
4479     Context: starlingx
4480
4481 900.518:
4482     Type: Log
4483     Description: Kubernetes rootca update auto-apply aborting
4484     Entity_Instance_ID: orchestration=kube-rootca-update
4485     Severity: critical
4486     Alarm_Type: equipment
4487     Probable_Cause: unspecified-reason
4488     Service_Affecting: False
4489     Context: starlingx
4490
4491 900.519:
4492     Type: Log
4493     Description: Kubernetes rootca update auto-apply abort rejected
4494     Entity_Instance_ID: orchestration=kube-rootca-update
4495     Severity: critical
4496     Alarm_Type: equipment
4497     Probable_Cause: unspecified-reason
4498     Service_Affecting: False
4499     Context: starlingx
4500
4501 900.520:
4502     Type: Log
4503     Description: Kubernetes rootca update auto-apply abort failed
4504     Entity_Instance_ID: orchestration=kube-rootca-update
4505     Severity: critical
4506     Alarm_Type: equipment
4507     Probable_Cause: unspecified-reason
4508     Service_Affecting: False
4509     Context: starlingx
4510
4511 900.521:
4512     Type: Log
4513     Description: Kubernetes rootca update auto-apply aborted
4514     Entity_Instance_ID: orchestration=kube-rootca-update
4515     Severity: critical
4516     Alarm_Type: equipment
4517     Probable_Cause: unspecified-reason
4518     Service_Affecting: False
4519     Context: starlingx
4520
4521 900.601:
4522     Type: Alarm
4523     Description: System config update auto-apply in progress
4524     Entity_Instance_ID: orchestration=system-config-update
4525     Severity: major
4526     Proposed_Repair_Action: Wait for system config update auto-apply to complete; if problem persists contact next level of support
4527     Maintenance_Action:
4528     Inhibit_Alarms:
4529     Alarm_Type: equipment
4530     Probable_Cause: unspecified-reason
4531     Service_Affecting: True
4532     Suppression: True
4533     Management_Affecting_Severity: warning
4534     Degrade_Affecting_Severity: none
4535     Context: starlingx
4536
4537 900.602:
4538     Type: Alarm
4539     Description: System config update auto-apply aborting
4540     Entity_Instance_ID: orchestration=system-config-update
4541     Severity: major
4542     Proposed_Repair_Action: Wait for system config update auto-apply abort to complete; if problem persists contact next level of support
4543     Maintenance_Action:
4544     Inhibit_Alarms:
4545     Alarm_Type: equipment
4546     Probable_Cause: unspecified-reason
4547     Service_Affecting: True
4548     Suppression: True
4549     Management_Affecting_Severity: warning
4550     Degrade_Affecting_Severity: none
4551     Context: starlingx
4552
4553 900.603:
4554     Type: Alarm
4555     Description: System config update auto-apply failed. Command "sw-manager kube-upgrade-strategy apply" failed
4556     Entity_Instance_ID: orchestration=system-config-update
4557     Severity: critical
4558     Proposed_Repair_Action: Attempt to apply system config update manually; if problem persists contact next level of support
4559     Maintenance_Action:
4560     Inhibit_Alarms:
4561     Alarm_Type: equipment
4562     Probable_Cause: underlying-resource-unavailable
4563     Service_Affecting: True
4564     Suppression: True
4565     Management_Affecting_Severity: warning
4566     Degrade_Affecting_Severity: none
4567     Context: starlingx
4568
4569 900.611:
4570     Type: Log
4571     Description: System config update auto-apply start
4572     Entity_Instance_ID: orchestration=system-config-update
4573     Severity: critical
4574     Alarm_Type: equipment
4575     Probable_Cause: unspecified-reason
4576     Service_Affecting: False
4577     Context: starlingx
4578
4579 900.612:
4580     Type: Log
4581     Description: System config update auto-apply in progress
4582     Entity_Instance_ID: orchestration=system-config-update
4583     Severity: critical
4584     Alarm_Type: equipment
4585     Probable_Cause: unspecified-reason
4586     Service_Affecting: False
4587     Context: starlingx
4588
4589 900.613:
4590     Type: Log
4591     Description: System config update auto-apply rejected
4592     Entity_Instance_ID: orchestration=system-config-update
4593     Severity: critical
4594     Alarm_Type: equipment
4595     Probable_Cause: unspecified-reason
4596     Service_Affecting: False
4597     Context: starlingx
4598
4599 900.614:
4600     Type: Log
4601     Description: System config update auto-apply cancelled
4602     Entity_Instance_ID: orchestration=system-config-update
4603     Severity: critical
4604     Alarm_Type: equipment
4605     Probable_Cause: unspecified-reason
4606     Service_Affecting: False
4607     Context: starlingx
4608
4609 900.615:
4610     Type: Log
4611     Description: System config update auto-apply failed
4612     Entity_Instance_ID: orchestration=system-config-update
4613     Severity: critical
4614     Alarm_Type: equipment
4615     Probable_Cause: unspecified-reason
4616     Service_Affecting: False
4617     Context: starlingx
4618
4619 900.616:
4620     Type: Log
4621     Description: System config update auto-apply completed
4622     Entity_Instance_ID: orchestration=system-config-update
4623     Severity: critical
4624     Alarm_Type: equipment
4625     Probable_Cause: unspecified-reason
4626     Service_Affecting: False
4627     Context: starlingx
4628
4629 900.617:
4630     Type: Log
4631     Description: System config update auto-apply abort
4632     Entity_Instance_ID: orchestration=system-config-update
4633     Severity: critical
4634     Alarm_Type: equipment
4635     Probable_Cause: unspecified-reason
4636     Service_Affecting: False
4637     Context: starlingx
4638
4639 900.618:
4640     Type: Log
4641     Description: System config update auto-apply aborting
4642     Entity_Instance_ID: orchestration=system-config-update
4643     Severity: critical
4644     Alarm_Type: equipment
4645     Probable_Cause: unspecified-reason
4646     Service_Affecting: False
4647     Context: starlingx
4648
4649 900.619:
4650     Type: Log
4651     Description: System config update auto-apply abort rejected
4652     Entity_Instance_ID: orchestration=system-config-update
4653     Severity: critical
4654     Alarm_Type: equipment
4655     Probable_Cause: unspecified-reason
4656     Service_Affecting: False
4657     Context: starlingx
4658
4659 900.620:
4660     Type: Log
4661     Description: System config update auto-apply abort failed
4662     Entity_Instance_ID: orchestration=system-config-update
4663     Severity: critical
4664     Alarm_Type: equipment
4665     Probable_Cause: unspecified-reason
4666     Service_Affecting: False
4667     Context: starlingx
4668
4669 900.621:
4670     Type: Log
4671     Description: System config update auto-apply aborted
4672     Entity_Instance_ID: orchestration=system-config-update
4673     Severity: critical
4674     Alarm_Type: equipment
4675     Probable_Cause: unspecified-reason
4676     Service_Affecting: False
4677     Context: starlingx
4678
4679 900.701:
4680     Type: Alarm
4681     Description: Node <hostname> tainted.
4682     Entity_Instance_ID: host=<hostname>
4683     Severity: major
4684     Proposed_Repair_Action: |-
4685             "Execute 'kubectl taint nodes <hostname> services=disabled:NoExecute-'
4686             If it fails, Execute 'system host-lock <hostname>' followed by
4687             'system host-unlock <hostname>'.
4688             If issue still persists, contact next level of support."
4689     Maintenance_Action: none
4690     Inhibit_Alarms:
4691     Alarm_Type: operational-violation
4692     Probable_Cause: unknown
4693     Service_Affecting: True
4694     Suppression: False
4695     Management_Affecting_Severity: warning
4696     Degrade_Affecting_Severity: major
4697     Context: starlingx
4698 ...