Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions coldfront/config/plugins/ecs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from coldfront.config.env import ENV
from coldfront.config.logging import LOGGING
from coldfront.config.base import INSTALLED_APPS

INSTALLED_APPS += [ 'coldfront.plugins.ecs' ]

ECS_USER = ENV.str('ECS_USER', default='')
ECS_PASS = ENV.str('ECS_PASS', default='')
ECS_CLIENT_VERSION = ENV.str('ECS_CLIENT_VERSION', default='3')

LOGGING['handlers']['ecs'] = {
'class': 'logging.handlers.TimedRotatingFileHandler',
'filename': 'logs/ecs.log',
'when': 'D',
'backupCount': 10, # how many backup files to keep
'formatter': 'default',
'level': 'DEBUG',
}

LOGGING['loggers']['coldfront.plugins.ecs'] = {
'handlers': ['ecs'],
}
1 change: 1 addition & 0 deletions coldfront/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
'PLUGIN_IFX': 'plugins/ifx.py',
'PLUGIN_SLURM': 'plugins/slurm.py',
'PLUGIN_IQUOTA': 'plugins/iquota.py',
'PLUGIN_ECS': 'plugins/ecs.py',
'PLUGIN_FREEIPA': 'plugins/freeipa.py',
'PLUGIN_SYSMON': 'plugins/system_monitor.py',
'PLUGIN_XDMOD': 'plugins/xdmod.py',
Expand Down
10 changes: 5 additions & 5 deletions coldfront/core/allocation/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2444,22 +2444,22 @@ def post(self, request, *args, **kwargs):
)
preupdate_replies = [p[1] for p in preupdate_responses if p[1]]
if not preupdate_replies:
error = ('this allocation\'s resource has no autoupdate options '
'at this time. Please manually create the resource '
'before approving this request.')
error = (
"This allocation's resource has no automation options at this time."
" Please manually update the share before approving this request.")
messages.error(request, error)
return self.redirect_to_detail(pk)
logger.info(
"Auto-updated allocation %s quota from %s to %s",
alloc_change_obj.allocation, old_quota, new_quota_value,
extra={'category': 'integration:isilon', 'status': 'success'},
extra={'category': 'integration', 'status': 'success'},
)
except Exception as e:
logger.exception(
'Auto-update of allocation quota failed. requesting_user=%s,allocation_pk=%s,change_request_pk=%s,error=%s',
request.user, alloc_change_obj.allocation.pk,
alloc_change_obj.pk, str(e),
extra={'category': 'integration:isilon', 'status': 'error'}
extra={'category': 'integration', 'status': 'error'}
)
err = ("An error was encountered while auto-updating"
"the allocation quota. Please contact Coldfront "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def handle(self, *args, **options):
('GPU Count', 'Int'),
('Features', 'Text'),
('slurm_integration', 'Text'),
('url', 'Text'),
# UBCCR
('Core Count', 'Int'),
# ('expiry_time', 'Int'),
Expand Down
Empty file.
9 changes: 9 additions & 0 deletions coldfront/plugins/ecs/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from django.apps import AppConfig


class SlurmConfig(AppConfig):
name = 'coldfront.plugins.ecs'


def ready(self):
import coldfront.plugins.ecs.signals
60 changes: 60 additions & 0 deletions coldfront/plugins/ecs/signals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import logging

from django.dispatch import receiver
from coldfront.core.allocation.signals import (
allocation_autocreate,
allocation_autoupdate,
)
from coldfront.plugins.ecs.utils import ECSResourceManager

logger = logging.getLogger(__name__)

@receiver(allocation_autocreate)
def activate_allocation(sender, **kwargs):
approval_form_data = kwargs['approval_form_data']
allocation_obj = kwargs['allocation_obj']
resource = kwargs['resource']

automation_specifications = approval_form_data.get('automation_specifications')
automation_kwargs = {k:True for k in automation_specifications}

if 'ecs' in resource.name:
try:
ecs_manager = ECSResourceManager(resource)
block_limit_tb = allocation_obj.size_tb
ecs_manager.create_allocation_bucket(allocation_obj.lab.name, block_limit_tb)
except Exception as e:
logger.exception(
"error creating ecs allocation. allocation_pk=%s,error=%s",
allocation_obj.pk, e,
extra={'category': 'integration:ecs', 'status': 'error'},
)
raise
return 'ecs'

@receiver(allocation_autoupdate)
def update_allocation(sender, **kwargs):
allocation_obj = kwargs['allocation_obj']
new_quota_value_tb = kwargs['new_quota_value']
resource = allocation_obj.resources.first()

if 'ecs' in resource.name:
try:
ecs_manager = ECSResourceManager(resource)
ecs_manager.change_bucket_quota(
bucket_name=f"lab-{allocation_obj.lab.name}-bucket",
new_block_size_tb=new_quota_value_tb
)
logger.info(
"Auto-updated allocation %s bucket quota from %s to %s",
allocation_obj, allocation_obj.size, new_quota_value_tb,
extra={'category': 'integration:ecs', 'status': 'success'},
)
except Exception as e:
logger.exception(
"error updating bucket allocation quota. allocation_pk=%s,error=%s",
allocation_obj.pk, e,
extra={'category': 'integration:ecs', 'status': 'error'},
)
raise
return 'ecs'
106 changes: 106 additions & 0 deletions coldfront/plugins/ecs/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import logging

from ecsclient.client import Client

from coldfront.core.utils.common import import_from_settings

ECS_CLIENT_VERSION = import_from_settings('ECS_CLIENT_VERSION', '3')
ECS_USER = import_from_settings('ECS_USER')
ECS_PASS = import_from_settings('ECS_PASS')

logger = logging.getLogger(__name__)


class ECSResourceManager():
"""Class for managing objects related to an ECS cluster."""

def __init__(self, resource, username=ECS_USER, password=ECS_PASS):
self.resource = resource
self.url = resource.resourceattribute_set.get(resource_attribute_type__name='url').value
self._username = username
self._password = password
self.client = self.connect()


def connect(self):
client = Client(
ECS_CLIENT_VERSION,
username=self._username,
password=self._password,
token_endpoint=f'{self.url}:4443/login',
ecs_endpoint=f'{self.url}:4443'
)
return client

def generate_token(self, username, password):
"""Generate a token for ECS API access."""

def create_allocation_bucket(self, lab_name, block_limit_tb):
"""Create a quota for a tenant."""
bucket_name = f"lab-{lab_name}-bucket"
block_limit_gb = block_limit_tb * 1024
notification_limit_gb = int(block_limit_gb * 0.9)
try:
self.client.bucket.create(bucket_name, namespace=lab_name,
replication_group='', filesystem_enabled=False,
head_type=None, stale_allowed=None,
metadata=None, encryption_enabled=False
)
except Exception as e:
logger.exception("Error creating bucket %s: %s", bucket_name, str(e))
raise
self.client.bucket.set_quota(
bucket_name,
block_size=block_limit_gb,
notification_size=notification_limit_gb,
)

def change_bucket_quota(self, bucket_name, new_block_size_tb, namespace_name=None):
"""Change a quota for a tenant."""
# possibly use this in create_allocation_bucket as well
new_block_size_gb = new_block_size_tb * 1024
new_notification_size_gb = int(new_block_size_gb * 0.9)

self.client.bucket.set_quota(
bucket_name,
namespace=namespace_name,
block_size=new_block_size_gb,
notification_size=new_notification_size_gb
)

def delete_allocation_bucket(self, bucket_name, namespace_name):
"""Delete a quota for a tenant."""
self.client.bucket.delete(bucket_name, namespace=namespace_name)

def update_resource_usage_data(self):
"""Get system usage data and update the corresponding resource records."""
capacity_dict = self.client.capacity.get_cluster_capacity()
allocated_tb = capacity_dict['totalProvisioned_gb'] / 1024
free_tb = capacity_dict['totalFree_gb'] / 1024
capacity_tb = allocated_tb + free_tb
tb_dict = {'allocated_tb': allocated_tb, 'free_tb': free_tb, 'capacity_tb': capacity_tb}
for k, v in tb_dict.items():
logger.info("ECS Capacity %s: %.2f TB", k, v)
attribute = self.resource.resourceattribute_set.get(resource_attribute_type__name=k)
attribute.value = v
attribute.save()
return capacity_dict

def update_bucket_allocation_usage_data(self, allocation, bucket_name, namespace_name):
"""Get bucket usage data and update the corresponding allocation records."""
# for getting bucket stats:
bucket_stats = self.client.billing.get_bucket_billing_info(
bucket_name, namespace_name, sizeunit='KB')
total_size_tb = bucket_stats['total_size'] / (1024 * 1024 * 1024)
total_size_bytes = bucket_stats['total_size'] * 1024
# update usage in bytes
quota_bytes_attr = allocation.allocationattribute_set.get(
allocation_attribute_type__name='Quota_In_Bytes')
quota_bytes_attr.usage = total_size_bytes
quota_bytes_attr.save()
# update usage in TB
quota_tb_attr = allocation.allocationattribute_set.get(
allocation_attribute_type__name='Storage Quota (TB)'
)
quota_tb_attr.usage = total_size_tb
quota_tb_attr.save()
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ idna==3.7
protobuf==6.30.0
pyparsing==3.1.2
python-dateutil==2.9.0.post0
python-ecsclient==1.1.12
python-memcached==1.62
pytz==2024.1
redis==5.0.0
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
'logging_tree==1.9',
'mysqlclient==2.2.0',
'pandas==2.2.1',
'python-ecsclient==1.1.12',
'reportlab==4.0.5',
'xhtml2pdf==0.2.15',
'XlsxWriter',
Expand Down