Made some changes to jobs system to make it faster to respond when jobs created and change status.

This commit is contained in:
Michael Woods
2025-12-28 11:53:42 -05:00
parent e7d308ab69
commit d3e66f45b2
5 changed files with 62 additions and 10 deletions

View File

@@ -1,5 +1,5 @@
"""Package runs arbitrary commands/jobs via different mechanisms.""" """Package runs arbitrary commands/jobs via different mechanisms."""
from typing import Union,Optional,Iterable,Self from typing import Union, Optional, Iterable, Self, Callable, List
from enum import Enum from enum import Enum
import datetime import datetime
from uuid import UUID, uuid4 from uuid import UUID, uuid4
@@ -73,8 +73,8 @@ class Runner:
"""Abstract class to take arguments and run a job and track the status and results.""" """Abstract class to take arguments and run a job and track the status and results."""
def __init__(self, username: str, args: Union[str, list[str]], job_id: int, environment: Optional[dict] = None, def __init__(self, username: str, args: Union[str, list[str]], job_id: int, environment: Optional[dict] = None,
timeout_secs: str = 300, labels: Optional[list] = None, timeout_secs: str = 300, labels: Optional[list] = None,
files: list[RunnerFile] = None): files: list[RunnerFile] = None, notify_function: Callable = None):
self.files = [] self.files: List[RunnerFile] = []
if files is not None: if files is not None:
for f in files: for f in files:
self.files.append(f) self.files.append(f)
@@ -87,6 +87,7 @@ class Runner:
self.finished_at = None self.finished_at = None
self._result = (0,(b'', b'')) self._result = (0,(b'', b''))
self._artifact_archive = b'' self._artifact_archive = b''
self.notify_function = notify_function
if environment: if environment:
for key in environment: for key in environment:
self.env[key] = environment[key] self.env[key] = environment[key]
@@ -98,6 +99,10 @@ class Runner:
self.timeout_seconds = timeout_secs self.timeout_seconds = timeout_secs
self.created_at = datetime.datetime.now(datetime.UTC) self.created_at = datetime.datetime.now(datetime.UTC)
def notify(self):
if self.notify_function:
self.notify_function()
def __repr__(self): def __repr__(self):
return f"<{type(self).__name__}: {self.username}[{self.job_id}] - {self.status.name}>" return f"<{type(self).__name__}: {self.username}[{self.job_id}] - {self.status.name}>"
@@ -150,6 +155,7 @@ class Orchestrator:
def __init__(self): def __init__(self):
self.runners = [] self.runners = []
self.runner_lock = Lock() self.runner_lock = Lock()
self.listeners = []
def get_finished_runners(self) -> list[Runner]: def get_finished_runners(self) -> list[Runner]:
return [r for r in self.runners if r.is_finished()] return [r for r in self.runners if r.is_finished()]
@@ -178,6 +184,11 @@ class Orchestrator:
files: list[RunnerFile] = None) -> Runner: files: list[RunnerFile] = None) -> Runner:
pass pass
def notify_listeners(self):
"""If any runners change status, call all listener functions."""
for func in self.listeners:
func()
def manage_lifecycle(self): def manage_lifecycle(self):
"""When called, updates runner statuses and performs any housekeeping.""" """When called, updates runner statuses and performs any housekeeping."""
pass pass

View File

@@ -1,5 +1,6 @@
"""Uses podman to run jobs in containers.""" """Uses podman to run jobs in containers."""
import time import time
from collections.abc import Callable
from ZEO import client from ZEO import client
@@ -24,6 +25,7 @@ from packetserver import VERSION as packetserver_version
import re import re
from threading import Thread from threading import Thread
from io import BytesIO from io import BytesIO
from typing import Callable
env_splitter_rex = '''([a-zA-Z0-9]+)=([a-zA-Z0-9]*)''' env_splitter_rex = '''([a-zA-Z0-9]+)=([a-zA-Z0-9]*)'''
@@ -33,9 +35,9 @@ PodmanOptions = namedtuple("PodmanOptions", ["default_timeout", "max_timeout", "
class PodmanRunner(Runner): class PodmanRunner(Runner):
def __init__(self, username: str, args: Union[str, list[str]], job_id: int, container: Container, def __init__(self, username: str, args: Union[str, list[str]], job_id: int, container: Container,
environment: Optional[dict] = None, timeout_secs: str = 300, labels: Optional[list] = None, environment: Optional[dict] = None, timeout_secs: str = 300, labels: Optional[list] = None,
files: list[RunnerFile] = None): files: list[RunnerFile] = None, notify_function: Callable = None):
super().__init__(username, args, job_id, environment=environment, timeout_secs=timeout_secs, super().__init__(username, args, job_id, environment=environment, timeout_secs=timeout_secs,
labels=labels, files=files) labels=labels, files=files, notify_function=notify_function)
self._artifact_archive = b'' self._artifact_archive = b''
if not container.inspect()['State']['Running']: if not container.inspect()['State']['Running']:
raise ValueError(f"Container {container} is not in state Running.") raise ValueError(f"Container {container} is not in state Running.")
@@ -48,6 +50,7 @@ class PodmanRunner(Runner):
def thread_runner(self): def thread_runner(self):
self.status = RunnerStatus.RUNNING self.status = RunnerStatus.RUNNING
logging.debug(f"Thread for runner {self.job_id} started. Command for {(type(self.args))}:\n{self.args}") logging.debug(f"Thread for runner {self.job_id} started. Command for {(type(self.args))}:\n{self.args}")
self.notify()
# run the exec call # run the exec call
if type(self.args) is str: if type(self.args) is str:
logging.debug(f"Running string: {self.args}") logging.debug(f"Running string: {self.args}")
@@ -60,6 +63,7 @@ class PodmanRunner(Runner):
logging.debug(str(res)) logging.debug(str(res))
# cleanup housekeeping # cleanup housekeeping
self.status = RunnerStatus.STOPPING self.status = RunnerStatus.STOPPING
self.notify()
self._result = res self._result = res
# run cleanup script # run cleanup script
logging.debug(f"Running cleanup script for {self.job_id}") logging.debug(f"Running cleanup script for {self.job_id}")
@@ -87,6 +91,7 @@ class PodmanRunner(Runner):
self.status = RunnerStatus.SUCCESSFUL self.status = RunnerStatus.SUCCESSFUL
else: else:
self.status = RunnerStatus.FAILED self.status = RunnerStatus.FAILED
self.notify()
@property @property
def has_artifacts(self) -> bool: def has_artifacts(self) -> bool:
@@ -411,7 +416,7 @@ class PodmanOrchestrator(Orchestrator):
self.touch_user_container(username) self.touch_user_container(username)
logging.debug(f"Queuing a runner on container {con}, with command '{args}' of type '{type(args)}'") logging.debug(f"Queuing a runner on container {con}, with command '{args}' of type '{type(args)}'")
runner = PodmanRunner(username, args, job_id, con, environment=environment, timeout_secs=timeout_secs, runner = PodmanRunner(username, args, job_id, con, environment=environment, timeout_secs=timeout_secs,
labels=labels, files=files) labels=labels, files=files, notify_function=lambda : self.notify_listeners())
self.runners.append(runner) self.runners.append(runner)
runner.start() runner.start()
return runner return runner

View File

@@ -112,6 +112,7 @@ class Server:
if val in ['podman']: if val in ['podman']:
logging.debug(f"Enabling {val} orchestrator") logging.debug(f"Enabling {val} orchestrator")
self.orchestrator = get_orchestrator_from_config(conn.root.config['jobs_config']) self.orchestrator = get_orchestrator_from_config(conn.root.config['jobs_config'])
self.orchestrator.listeners.append(lambda : self.ping_job_queue())
self.app = pe.app.Application() self.app = pe.app.Application()
PacketServerConnection.receive_subscribers.append(lambda x: self.server_receiver(x)) PacketServerConnection.receive_subscribers.append(lambda x: self.server_receiver(x))

View File

@@ -5,15 +5,17 @@ import persistent
import persistent.list import persistent.list
from persistent.mapping import PersistentMapping from persistent.mapping import PersistentMapping
import datetime import datetime
from typing import Self,Union,Optional,Tuple from typing import Self,Union,Optional,Tuple,List
from traceback import format_exc from traceback import format_exc
from packetserver.common import PacketServerConnection, Request, Response, Message, send_response, send_blank_response from packetserver.common import PacketServerConnection, Request, Response, Message, send_response, send_blank_response
from packetserver.common.constants import no_values, yes_values from packetserver.common.constants import no_values, yes_values
from packetserver.server.db import get_user_db_json from packetserver.server.db import get_user_db_json
import ZODB import ZODB
from ZODB.Connection import Connection
from persistent.list import PersistentList from persistent.list import PersistentList
import logging import logging
from packetserver.server.users import user_authorized from packetserver.server.users import user_authorized, User
from packetserver.server.objects import Object
import gzip import gzip
import tarfile import tarfile
import time import time
@@ -23,6 +25,7 @@ from packetserver.runner import Orchestrator, Runner, RunnerStatus, RunnerFile
from enum import Enum from enum import Enum
from io import BytesIO from io import BytesIO
import base64 import base64
from uuid import UUID
class JobStatus(Enum): class JobStatus(Enum):
CREATED = 1 CREATED = 1
@@ -58,6 +61,33 @@ def get_new_job_id(root: PersistentMapping) -> int:
root['job_counter'] = current + 1 root['job_counter'] = current + 1
return current return current
def add_object_to_file_list(object_id: Union[str,UUID], file_list: List[RunnerFile], username: str, conn: Connection):
if type(object_id) is str:
object_id = UUID(object_id)
root = conn.root()
obj = Object.get_object_by_uuid(object_id, root)
if obj is None:
raise KeyError(f"Object '{object_id}' does not exist.")
if obj.private:
owner_uuid = obj.owner
owner = User.get_user_by_uuid(owner_uuid, root)
if not (owner.username.lower() == username.lower()):
raise PermissionError(f"Specified object {object_id} not public and not owned by user.")
unique_path = obj.name
runner_paths = []
for i in file_list:
runner_paths.append(i.destination_path)
suffix = 1
while unique_path not in runner_paths:
unique_path = obj.name + f"_{suffix}"
suffix = suffix + 1
rf = RunnerFile(unique_path,data=obj.data_bytes)
file_list.append(rf)
class Job(persistent.Persistent): class Job(persistent.Persistent):
@classmethod @classmethod
def update_job_from_runner(cls, runner: Runner, db_root: PersistentMapping) -> True: def update_job_from_runner(cls, runner: Runner, db_root: PersistentMapping) -> True:
@@ -297,7 +327,7 @@ def handle_new_job_post(req: Request, conn: PacketServerConnection, db: ZODB.DB)
if type(req.payload['cmd']) not in [str, list]: if type(req.payload['cmd']) not in [str, list]:
send_blank_response(conn, req, 401, "job post must contain cmd key containing str or list[str]") send_blank_response(conn, req, 401, "job post must contain cmd key containing str or list[str]")
return return
files = [] files: List[RunnerFile] = []
if 'db' in req.payload: if 'db' in req.payload:
logging.debug(f"Fetching a user db as requested.") logging.debug(f"Fetching a user db as requested.")
try: try:
@@ -313,6 +343,11 @@ def handle_new_job_post(req: Request, conn: PacketServerConnection, db: ZODB.DB)
val = req.payload['files'][key] val = req.payload['files'][key]
if type(val) is bytes: if type(val) is bytes:
files.append(RunnerFile(key, data=val)) files.append(RunnerFile(key, data=val))
if 'objs' in req.payload:
if type(req.payload['objs']) is list:
with db.transaction() as db_connection:
for obj in req.payload['objs']:
add_object_to_file_list(obj, files, username, db_connection)
env = {} env = {}
if 'env' in req.payload: if 'env' in req.payload:
if type(req.payload['env']) is dict: if type(req.payload['env']) is dict:

View File

@@ -120,7 +120,7 @@ class Object(persistent.Persistent):
raise KeyError(f"User '{un}' not found.") raise KeyError(f"User '{un}' not found.")
@classmethod @classmethod
def get_object_by_uuid(cls, obj: UUID, db_root: PersistentMapping): def get_object_by_uuid(cls, obj: UUID, db_root: PersistentMapping) -> Union[None,Self]:
return db_root['objects'].get(obj) return db_root['objects'].get(obj)
@classmethod @classmethod