Source code for saltext.sap_control._states.sap_control

"""
SaltStack extension for sapcontrol
Copyright (C) 2022 SAP UCC Magdeburg

sapcontrol state module
=======================
SaltStack module that implements states based on sapcontrol functionality.

:codeauthor:    Benjamin Wegener, Alexander Wilke
:maturity:      new
:depends:       N/A
:platform:      Linux

This module implements states that utilize sapcontrol functionality.

.. note::
    This module can only run on linux platforms.
"""
import logging
import os
import re
import time
from datetime import datetime as dt


# Globals
log = logging.getLogger(__name__)

# the following list contains syslog errors that are to be ignored.
NON_CRITICAL_SYSLOG_ERRORS = [
    'Monitoring: Program RSUSR003 Reports "Security check passed" ',
]

__virtualname__ = "sap_control"


def __virtual__():
    return __virtualname__


def _which(executable, runas=None):
    """
    Similar to salt.utils.path.which(), but:
     - Only works on Linux
     - Allows runas

    If not runas is given, the salt minion user is used
    """
    ret = __salt__["cmd.run_all"](cmd=f"which {executable}", runas=runas)
    if ret["retcode"]:
        return None
    return ret["stdout"]


# pylint: disable=unused-argument
[docs]def running(name, instance, username, password, restart=False, **kwargs): """ Ensure that sapcontrol is started for an SID / instance. name The SID for which sapcontrol should be running. instance The instance for which sapcontrol should be running. username User with which to run all operations. password Passwort for the user. restart Boolean if sapcontrol should be restarted if it is already running, defualt is ``False``. Example: .. code-block:: jinja sapcontrol for S4H / instance 00 is running: sap_control.running: - name: S4H - instance: '00' - username: sapadm - password: __slot__:salt:vault.read_secret(path="os", key="sapadm") .. note:: This should not be used. Instead, a proper systemd service should be created that handles sapcontrol. """ log.debug("Running function") ret = { "name": name, "changes": {}, "result": False, "comment": "", } result = __salt__["sap_control.status"]( instance_number=instance, username=username, password=password ) if not isinstance(result, bool): msg = f"Cannot retrieve status for sapcontrol / instance {instance}" log.error(msg) ret["comment"] = msg return ret if result: log.debug("sapcontrol is running") if not restart: ret["comment"] = "sapcontrol is already running" else: log.debug("Restarting sapcontrol") if __opts__["test"]: ret["comment"] = "sapcontrol would have been restarted" ret["changes"] = { "old": f"sapcontrol for {name} / {instance} was running", "new": f"sapcontrol for {name} / {instance} would have been restarted", } else: result = __salt__["sap_control.restart"]( sid=name, instance_number=instance, username=username, password=password ) if not isinstance(result, bool) or not result: log.error(f"Cannot start sapcontrol for {name} / {instance} was running") ret["comment"] = f"Cannot start sapcontrol {name} / {instance}" ret["result"] = False return ret else: ret["comment"] = "sapcontrol was restarted" ret["changes"] = { "old": f"sapcontrol for {name} / {instance} was running", "new": f"sapcontrol for {name} / {instance} was restarted", } else: log.debug("sapcontrol is not running, starting") if __opts__["test"]: ret["comment"] = "sapcontrol would have been started" ret["changes"] = { "old": f"sapcontrol for {name} / {instance} was not running", "new": f"sapcontrol for {name} / {instance} would have been started", } else: result = __salt__["sap_control.start"]( sid=name, instance_number=instance, username=username, password=password ) if not isinstance(result, bool) or not result: log.error(f"Cannot start sapcontrol for {name} / {instance}") ret["comment"] = f"Cannot start sapcontrol {name} / {instance}" return ret log.debug("sapcontrol was started") ret["comment"] = "sapcontrol was started" ret["changes"] = { "old": f"sapcontrol for {name} / {instance} was not running", "new": f"sapcontrol for {name} / {instance} was started", } ret["result"] = True if (not __opts__["test"] or ret["changes"]) else None return ret
# pylint: disable=unused-argument
[docs]def dead(name, instance, username, password, **kwargs): """ Ensure that sapcontrol is stopped for an SID / instance. name The SID for which sapcontrol should be stopped. instance The instance for which sapcontrol should be stopped. username User with which to run all operations. password Passwort for the user. Example: .. code-block:: jinja sapcontrol for S4h / instance 00 is stopped: sap_control.dead: - name: S4H - instance: '00' - username: sapadm - password: __slot__:salt:vault.read_secret(path="os", key="sapadm") .. note:: This should not be used. Instead, a proper systemd service should be created that handles sapcontrol. """ log.debug("Running function") ret = { "name": name, "changes": {}, "result": False, "comment": "", } result = __salt__["sap_control.status"]( instance_number=instance, username=username, password=password ) if not isinstance(result, bool): msg = f"Cannot retrieve status for sapcontrol / instance {instance}" log.error(msg) ret["comment"] = msg return ret if not result: log.debug("sapcontrol is already stopped") ret["comment"] = "sapcontrol is already stopped" ret["result"] = True return ret log.debug("sapcontrol is running, stopping") if __opts__["test"]: ret["comment"] = "sapcontrol would have been stopped" ret["changes"] = { "old": f"sapcontrol for {name} / {instance} is running", "new": f"sapcontrol for {name} / {instance} would have been stopped", } else: result = __salt__["sap_control.stop"]( instance_number=instance, username=username, password=password ) if not isinstance(result, bool) or not result: log.error("Cannot stop sapcontrol") ret["comment"] = "Cannot stop sapcontrol" return ret log.debug("sapcontrol was stopped") ret["comment"] = "sapcontrol was stopped" ret["changes"] = { "old": f"sapcontrol for {name} / {instance} was running", "new": f"sapcontrol for {name} / {instance} is not running", } ret["result"] = True if (not __opts__["test"] or not ret["changes"]) else None return ret
[docs]def sld_registered( name, sid, instance_number, username, password, sld_user, sld_password, sld_host, sld_port, log_files=None, remove_logs=True, overwrite=False, sld_check_timeout=60, **kwargs, ): """ Ensure that a sapcontrol instance is registered at an SLD / LMDB. If log files are defined (see argument ``log_files``), then each file will be checked for a correct HTTP return code. name Target slddest.cfg file. sid SID of the system. instance_number Instance number for which the SLD registration should take place. username Username for the sapcontrol connection. password Password for the sapcontrol connection. sld_user SLD connection username. sld_password SLD connection password. sld_host SLD connection fqdn. sld_port SLD connection port. log_files List of log files to check for success (full path). remove_logs Remove the logs before restarting the service. Default is ``True``. overwrite Configuration will not be checked but overwritten. Default is ``False``. sld_check_timeout How long the system will wait for a positive HTTP return code from the SLD in the defined logs. Default is ``60``. .. warning:: In order to trigger the data transfer, sapcontrol will be restarted! .. note:: No password check will be performed if all other configuration parameters fit. To circumvent this, set overwrite=True. Example: .. code-block:: jinja SLD is configured and data is transfered for S4H / 00: sap_control.sld_registered: - name: /usr/sap/S4H/SYS/global/slddest.cfg - sid: S4H - instance_number: '00' - username: s4hadm - password: __slot__:salt:vault.read_secret(path="os", key="s4hadm") - sld_user: SLD_DS_USER - sld_password: __slot__:salt:vault.read_secret(path="sld", key="SLD_DS_USER") - sld_host: sol.my.domain - sld_port: 50000 - log_files: - /usr/sap/S4H/D00/work/dev_sldregs - /usr/sap/S4h/D00/work/dev_sldregk - /usr/sap/S4H/D00/work/dev_krnlreg """ log.debug("Running function") ret = { "name": name, "changes": {}, "result": False, "comment": "", } if not log_files: log_files = [] sldreg_bin = _which("sldreg", runas=username) if not sldreg_bin: msg = f"Could not determine path of sldreg for user {username}" log.error(msg) ret["comment"] = msg ret["result"] = False return ret sldreg_dir = os.path.dirname(sldreg_bin) log.debug("Checking for existing config") update_cfg = True if __salt__["file.file_exists"](name): log.debug("Getting existing config") cmd = " ".join([sldreg_bin, "-showconnect", name]) result = __salt__["cmd.run_all"]( cmd=cmd, runas=username, env={"LD_LIBRARY_PATH": sldreg_dir} ) if result["retcode"]: return False log.debug("Parse output") existing_config = {} for line in result["stdout"]: for param in ["host_param", "https_param", "port_param", "user_param"]: if param in line: line_idx = line.find(param) key, value = line[line_idx:].split("=", 1) existing_config[key] = value.strip("'") if ( sld_user == existing_config.get("user_param", None) and sld_host == existing_config.get("host_param", None) and sld_port == existing_config.get("port_param", None) and "y" == existing_config.get("https_param", None) ): update_cfg = False if not update_cfg and not overwrite: ret["comment"] = "No changes required" ret["result"] = True return ret log.debug("Updating configuration") if __opts__["test"]: ret["changes"]["config"] = f"Configuration {name} would have been updated" else: cmd = " ".join( [ sldreg_bin, "-configure", name, "-usekeyfile", "-noninteractive", "-user", sld_user, "-pass", sld_password, "-host", sld_host, "-port", str(sld_port), "-usehttps", ] ) result = __salt__["cmd.run_all"]( cmd=cmd, runas=username, env={"LD_LIBRARY_PATH": sldreg_dir} ) if result["retcode"]: msg = "Could not update configuration" log.error(msg) ret["comment"] = msg ret["result"] = False return ret ret["changes"]["config"] = f"Configuration {name} updated" if remove_logs: log.debug("Removing log files") for log_file in log_files: result = __salt__["file.remove"](log_file) if result: if "log_files" not in ret["changes"]: ret["changes"]["log_files"] = [] ret["changes"]["log_files"].append(f"Removed {log_file}") log.debug("Restarting sapcontrol to trigger SLD data transfer") if __opts__["test"]: ret["changes"]["sapcontrol"] = "Would have been restarted" else: result = __salt__["sap_control.restart"](sid, instance_number, username, password) if not result: ret["comment"] = "Could not restart sapcontrol" ret["result"] = False return ret ret["changes"]["sapcontrol"] = "Restarted" all_success = True if not __opts__["test"]: if log_files: log.debug("Checking log files for success") # wait max. n seconds for the registration to happen timeout = time.time() + sld_check_timeout re_rc = re.compile(r"Return code: ([0-9]{3})") while time.time() < timeout: all_success = True for log_file in log_files: log.debug(f"Checking {log_file}") try: log_file_data = __salt__["file.read"](log_file) except FileNotFoundError: log.debug(f"{log_file} does not (yet?) exist") all_success = False break return_codes = re_rc.findall(log_file_data) log.debug(f"Got result from checkup: {return_codes}") if not return_codes or int(return_codes[-1]) != 200: all_success = False if all_success: break time.sleep(0.5) if all_success: if log_files: if __opts__["test"]: ret["comment"] = "SLD registration and data transfer would have been successful" else: ret["comment"] = "SLD registration and data transfer successful" else: if __opts__["test"]: ret["comment"] = "SLD registration would have been successful" else: ret["comment"] = "SLD registration successful" ret["result"] = True if (not __opts__["test"] or not ret["changes"]) else None else: ret["comment"] = "SLD data transfer not successful" ret["result"] = False return ret
# pylint: disable=unused-argument
[docs]def system_health_ok(name, check_from, instance_number, username, password, **kwargs): """ This state checks the system health by looking for Critical Syslog Entries and Work Process Errors. If errors are present in the system, the state will return ``False`` as result. name SID of the SAP system. check_from Date from which on the system health should be checked (e.g. for log entries) in the format 31129999 or 01012000. instance_number Instance number for which syslog errors should be retrieved. username Username for the sapcontrol connection. password Password for the sapcontrol connection. .. note:: This state does not implement ``__opts__["test"]`` since no data is changed. Example: .. code-block:: jinja System healh is OK for SAP NetWeaver AS ABAP system S4H (SM50 / SM21): sap_control.system_health_ok: - name: 'S4H' - check_from: {{ None | strftime("%d%m%Y") }} {# renders to current date, e.g. 31082002 #} - instance_number: '00' - username: s4hadm - password: __slot__:salt:vault.read_secret(path="os", key="s4hadm") """ log.debug("Running function") ret = { "name": name, "changes": {}, "result": False, "comment": [], } from_datetime = dt.strptime(f"{check_from}000000", "%d%m%Y%H%M%S") log.debug("Checking system log") syslog_errors = __salt__["sap_control.get_syslog_errors"]( timestamp_from=from_datetime, instance_number=instance_number, username=username, password=password, ) processed_errors = [] if syslog_errors: log.error("Syslog errors:") for err in syslog_errors: # skip non-critical errors if err.Text in NON_CRITICAL_SYSLOG_ERRORS: continue log.error(err) processed_errors.append(re.sub(" +", " ", f"SM21: {err.Text}")) ret["comment"] += list(set(processed_errors)) log.debug("Checking for work process errors") wp_table = __salt__["sap_control.get_workprocess_table"]( instance_number=instance_number, username=username, password=password, ) if not isinstance(wp_table, list): msg = "Cannot retrieve workprocess table" log.error(msg) ret["comment"].append(msg) for wproc in wp_table: if wproc.Status == "Ended" or wproc.Err: reason = f" (reason: {wproc.Reason})" if wproc.Reason else "" msg = ( f"SM50: {wproc.Typ} work process {wproc.No} (PID: {wproc.Pid}) is in " f"status {wproc.Status}{reason} with error '{wproc.Err}'" ) log.error(msg) ret["comment"].append(msg) if ret["comment"]: ret["result"] = False else: ret["comment"] = "System health OK" ret["result"] = True return ret