File: //sbin/ie-watchdog
#!/opt/imunify360/venv/bin/python3
from contextlib import suppress
import yaml
import json
import logging
import logging.handlers
import os
import sentry_sdk
import subprocess
import shutil
from typing import Optional
QUARANTINE_CONFIG_PATH = '/etc/imunifyemail/quarantine.yaml'
QUARANTINE_DEFAULT_DSN = 'https://7099cd4667794ff8b3601d1ca96221be@im360.sentry.cloudlinux.com/14'
QUARANTINE_SVC_NAME = 'ie-quarantine'
QUARANTINE_BIN_PATH = '/usr/bin/ie-quarantine'
QUARANTINE_RSS_THRESHOLD_KB = 1024 * 500
LICENSE_FILE_PATH = '/var/imunify360/license.json'
UNKNOWN_LICENSE_ID = 'UNKNOWN'
SUBPROCESS_TIMEOUT_SEC = 60
def run_cmd(cmd, *, timeout=SUBPROCESS_TIMEOUT_SEC, check=False, **kwargs) -> subprocess.CompletedProcess:
"""Run *cmd* with *timeout* without raising TimeoutExpired.
On timeout, return CompletedProcess with returncode equal to None.
"""
try:
return subprocess.run(cmd, timeout=timeout, check=check, **kwargs)
except subprocess.TimeoutExpired as e:
return subprocess.CompletedProcess(
e.cmd, returncode=None, stdout=e.stdout, stderr=e.stderr
)
def setup_logging(level) -> logging.Logger:
logger = logging.getLogger('ie-watchdog')
logger.setLevel(level)
handler = logging.handlers.SysLogHandler('/dev/log')
formatter = logging.Formatter('%(name)s: %(message)s')
handler.formatter = formatter
logger.addHandler(handler)
return logger
def is_service_running(sysctl_exec: Optional[str], name: str) -> bool:
"""Check with help of [systemctl|service] command status of service"""
if sysctl_exec:
cmd = [sysctl_exec, 'status', name]
else:
cmd = ['service', name, 'status']
cp = run_cmd(
cmd,
stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
return cp.returncode == 0
def restart_service(sysctl_exec: Optional[str], name: str) -> None:
"""Check with help of [systemctl|service] command status of service"""
if sysctl_exec:
cmd = [sysctl_exec, 'restart', name]
else:
cmd = ['service', name, 'restart']
run_cmd(
cmd,
stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
def get_license_id() -> str:
with suppress(FileNotFoundError), open(LICENSE_FILE_PATH) as file:
return json.load(file)['id']
return UNKNOWN_LICENSE_ID
def get_rpm_package_version(name: str) -> str:
cmd = ['rpm', '-q', '--queryformat=%{VERSION}-%{RELEASE}', name]
return run_cmd(cmd,
stdout=subprocess.PIPE,
text=True
).stdout.strip()
def check_quarantine(sysctl_exec: Optional[str], max_kb: int, l: logging.Logger) -> None:
if not is_service_running(sysctl_exec, QUARANTINE_SVC_NAME):
l.info(f'quarantine service is not active')
return
quarantine_rss_kb = rss_value(QUARANTINE_BIN_PATH)
if quarantine_rss_kb < max_kb:
l.info(f'quarantine rss is {quarantine_rss_kb} KB, less than {max_kb} KB')
return
restart_service(sysctl_exec, QUARANTINE_SVC_NAME)
e = Exception(f'ie-quarantine restarted due to high RSS: {quarantine_rss_kb} KB, limit {max_kb} KB')
l.warning(f'{e}')
sentry_capture_exception(e)
def sentry_capture_exception(e: Exception) -> None:
with open(QUARANTINE_CONFIG_PATH) as cfg:
sentry_dsn = yaml.safe_load(cfg).get('sentry', {}).get('dsn', QUARANTINE_DEFAULT_DSN)
sentry_sdk.init(dsn=sentry_dsn)
scope = sentry_sdk.Hub.current.scope
scope.set_tag('server_id', get_license_id())
scope.set_tag('version', get_rpm_package_version('imunifyemail'))
sentry_sdk.capture_exception(e, scope=sentry_sdk.Hub.current.configure_scope())
sentry_sdk.flush(timeout=10)
def rss_value(command_name: str) -> int:
output = run_cmd(
['ps', '-eo', 'rss,command'],
stdout=subprocess.PIPE,
text=True
).stdout
for line in output.splitlines():
if command_name not in line:
continue
columns = line.split()
try:
rss_kb = int(columns[0])
except ValueError:
return -1
return rss_kb
return 0
def sysctl_executable() -> Optional[str]:
"""Try to find systemctl in default PATH and return None if failed."""
return shutil.which('systemctl', path=os.defpath)
if __name__ == "__main__":
sysctl_exec = sysctl_executable()
try:
quarantine_max_rss_kb = int(os.getenv('IE_QUARANTINE_MAX_RSS_KB', QUARANTINE_RSS_THRESHOLD_KB))
except ValueError:
quarantine_max_rss_kb = QUARANTINE_RSS_THRESHOLD_KB
l = setup_logging(logging.DEBUG)
l.info('Starting ie-watchdog')
check_quarantine(sysctl_exec=sysctl_exec, max_kb=quarantine_max_rss_kb, l=l)