
Source code for ding.utils.k8s_helper

import os
import json
from typing import Tuple
from easydict import EasyDict
import yaml
import subprocess
from enum import Enum, unique
from ding.interaction.base import split_http_address
from .default_helper import one_time_warning

DEFAULT_POD_NAME = 'dijob-example-coordinator'


[docs]def get_operator_server_kwargs(cfg: EasyDict) -> dict: """ Overview: Get kwarg dict from config file Arguments: - cfg (:obj:`EasyDict`) System config Returns: - result (:obj:`dict`) Containing ``api_version``, ``namespace``, ``name``, ``port``, ``host``. """ namespace = os.environ.get('KUBERNETES_POD_NAMESPACE', DEFAULT_NAMESPACE) name = os.environ.get('KUBERNETES_POD_NAME', DEFAULT_POD_NAME) url = cfg.get('system_addr', None) or os.environ.get('KUBERNETES_SERVER_URL', None) assert url, 'please set environment variable KUBERNETES_SERVER_URL in Kubenetes platform.' api_version = cfg.get('api_version', None) \ or os.environ.get('KUBERNETES_SERVER_API_VERSION', DEFAULT_API_VERSION) try: host, port = url.split(":")[0], int(url.split(":")[1]) except Exception as e: host, port, _, _ = split_http_address(url) return { 'api_version': api_version, 'namespace': namespace, 'name': name, 'host': host, 'port': port, }
[docs]def exist_operator_server() -> bool: """ Overview: Check if the 'KUBERNETES_SERVER_URL' environment variable exists. """ return 'KUBERNETES_SERVER_URL' in os.environ
[docs]def pod_exec_command(kubeconfig: str, name: str, namespace: str, cmd: str) -> Tuple[int, str]: """ Overview: Execute command in pod Arguments: - kubeconfig (:obj:`str`) The path of kubeconfig file - name (:obj:`str`) The name of pod - namespace (:obj:`str`) The namespace of pod """ try: from kubernetes import config from kubernetes.client import CoreV1Api from import ApiException from import stream except ModuleNotFoundError as e: one_time_warning("You have not installed kubernetes package! Please try 'pip install DI-engine[k8s]'.") exit(-1) config.load_kube_config(config_file=kubeconfig) core_v1 = CoreV1Api() resp = None try: resp = core_v1.read_namespaced_pod(name=name, namespace=namespace) except ApiException as e: if e.status != 404: return -1, "Unknown error: %s" % e if not resp: return -1, f"Pod {name} does not exist." if resp.status.phase != 'Running': return -1, f"Pod {name} is not in Running." exec_command = ['/bin/sh', '-c', cmd] resp = stream( core_v1.connect_get_namespaced_pod_exec, name, namespace, command=exec_command, stderr=False, stdin=False, stdout=True, tty=False ) resp = resp.replace("\'", "\"") \ .replace('None', 'null') \ .replace(': False', ': 0') \ .replace(': True', ': 1') \ .replace('"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$"', '\\"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$\\"') resp = json.loads(resp) return resp['code'], resp['message']
[docs]@unique class K8sType(Enum): Local = 1 K3s = 2
[docs]class K8sLauncher(object): """ Overview: object to manage the K8s cluster Interfaces: ``__init__``, ``_load``, ``create_cluster``, ``_check_k3d_tools``, ``delete_cluster``, ``preload_images`` """
[docs] def __init__(self, config_path: str) -> None: """ Overview: Initialize the K8sLauncher object. Arguments: - config_path (:obj:`str`): The path of the config file. """ = None self.servers = 1 self.agents = 0 self.type = K8sType.Local self._images = [] self._load(config_path) self._check_k3d_tools()
[docs] def _load(self, config_path: str) -> None: """ Overview: Load the config file. Arguments: - config_path (:obj:`str`): The path of the config file. """ with open(config_path, 'r') as f: data = yaml.safe_load(f) = data.get('name') if data.get('name') else if data.get('servers'): if type(data.get('servers')) is not int: raise TypeError(f"servers' type is expected int, actual {type(data.get('servers'))}") self.servers = data.get('servers') if data.get('agents'): if type(data.get('agents')) is not int: raise TypeError(f"agents' type is expected int, actual {type(data.get('agents'))}") self.agents = data.get('agents') if data.get('type'): if data.get('type') == 'k3s': self.type = K8sType.K3s elif data.get('type') == 'local': self.type = K8sType.Local else: raise ValueError(f"no type found for {data.get('type')}") if data.get('preload_images'): if type(data.get('preload_images')) is not list: raise TypeError(f"preload_images' type is expected list, actual {type(data.get('preload_images'))}") self._images = data.get('preload_images')
[docs] def _check_k3d_tools(self) -> None: """ Overview: Check if the k3d tools exist. """ if self.type != K8sType.K3s: return args = ['which', 'k3d'] proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, _ = proc.communicate() if out.decode('utf-8') == '': raise FileNotFoundError( "No k3d tools found, please install by executing ./ding/scripts/" )
[docs] def create_cluster(self) -> None: """ Overview: Create the k8s cluster. """ print('Creating k8s cluster...') if self.type != K8sType.K3s: return args = ['k3d', 'cluster', 'create', f'{}', f'--servers={self.servers}', f'--agents={self.agents}'] proc = subprocess.Popen(args, stderr=subprocess.PIPE) _, err = proc.communicate() err_str = err.decode('utf-8').strip() if err_str != '' and 'WARN' not in err_str: if 'already exists' in err_str: print('K8s cluster already exists') else: raise RuntimeError(f'Failed to create cluster {}: {err_str}') # preload images self.preload_images(self._images)
[docs] def delete_cluster(self) -> None: """ Overview: Delete the k8s cluster. """ print('Deleting k8s cluster...') if self.type != K8sType.K3s: return args = ['k3d', 'cluster', 'delete', f'{}'] proc = subprocess.Popen(args, stderr=subprocess.PIPE) _, err = proc.communicate() err_str = err.decode('utf-8').strip() if err_str != '' and 'WARN' not in err_str and \ 'NotFound' not in err_str: raise RuntimeError(f'Failed to delete cluster {}: {err_str}')
[docs] def preload_images(self, images: list) -> None: """ Overview: Preload images. """ if self.type != K8sType.K3s or len(images) == 0: return args = ['k3d', 'image', 'import', f'--cluster={}'] args += images proc = subprocess.Popen(args, stderr=subprocess.PIPE) _, err = proc.communicate() err_str = err.decode('utf-8').strip() if err_str != '' and 'WARN' not in err_str: raise RuntimeError(f'Failed to preload images: {err_str}')