Coverage for drivers/linstorvolumemanager.py : 10%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python3
2#
3# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16#
18from sm_typing import override
20import errno
21import json
22import linstor
23import os.path
24import re
25import shutil
26import socket
27import stat
28import time
29import util
30import uuid
32# Persistent prefix to add to RAW persistent volumes.
33PERSISTENT_PREFIX = 'xcp-persistent-'
35# Contains the data of the "/var/lib/linstor" directory.
36DATABASE_VOLUME_NAME = PERSISTENT_PREFIX + 'database'
37DATABASE_SIZE = 1 << 30 # 1GB.
38DATABASE_PATH = '/var/lib/linstor'
39DATABASE_MKFS = 'mkfs.ext4'
41REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary")
42REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$')
44DRBD_BY_RES_PATH = '/dev/drbd/by-res/'
46PLUGIN = 'linstor-manager'
49# ==============================================================================
51def get_local_volume_openers(resource_name, volume):
52 if not resource_name or volume is None:
53 raise Exception('Cannot get DRBD openers without resource name and/or volume.')
55 path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format(
56 resource_name, volume
57 )
59 with open(path, 'r') as openers:
60 # Not a big cost, so read all lines directly.
61 lines = openers.readlines()
63 result = {}
65 opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)')
66 for line in lines:
67 match = opener_re.match(line)
68 assert match
70 groups = match.groups()
71 process_name = groups[0]
72 pid = groups[1]
73 open_duration_ms = groups[2]
74 result[pid] = {
75 'process-name': process_name,
76 'open-duration': open_duration_ms
77 }
79 return json.dumps(result)
81def get_all_volume_openers(resource_name, volume):
82 PLUGIN_CMD = 'getDrbdOpeners'
84 volume = str(volume)
85 openers = {}
87 # Make sure this call never stucks because this function can be called
88 # during HA init and in this case we can wait forever.
89 session = util.timeout_call(10, util.get_localAPI_session)
91 hosts = session.xenapi.host.get_all_records()
92 for host_ref, host_record in hosts.items():
93 node_name = host_record['hostname']
94 try:
95 if not session.xenapi.host_metrics.get_record(
96 host_record['metrics']
97 )['live']:
98 # Ensure we call plugin on online hosts only.
99 continue
101 openers[node_name] = json.loads(
102 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {
103 'resourceName': resource_name,
104 'volume': volume
105 })
106 )
107 except Exception as e:
108 util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format(
109 resource_name, node_name, e
110 ))
112 return openers
115# ==============================================================================
117def round_up(value, divisor):
118 assert divisor
119 divisor = int(divisor)
120 return ((int(value) + divisor - 1) // divisor) * divisor
123def round_down(value, divisor):
124 assert divisor
125 value = int(value)
126 return value - (value % int(divisor))
129# ==============================================================================
131def get_remote_host_ip(node_name):
132 (ret, stdout, stderr) = util.doexec([
133 'drbdsetup', 'show', DATABASE_VOLUME_NAME, '--json'
134 ])
135 if ret != 0:
136 return
138 try:
139 conf = json.loads(stdout)
140 if not conf:
141 return
143 for connection in conf[0]['connections']:
144 if connection['net']['_name'] == node_name:
145 value = connection['path']['_remote_host']
146 res = REG_DRBDSETUP_IP.match(value)
147 if res:
148 return res.groups()[0]
149 break
150 except Exception:
151 pass
154def _get_controller_uri():
155 PLUGIN_CMD = 'hasControllerRunning'
157 # Try to find controller using drbdadm.
158 (ret, stdout, stderr) = util.doexec([
159 'drbdadm', 'status', DATABASE_VOLUME_NAME
160 ])
161 if ret == 0:
162 # If we are here, the database device exists locally.
164 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)):
165 # Nice case, we have the controller running on this local host.
166 return 'linstor://localhost'
168 # Try to find the host using DRBD connections.
169 res = REG_DRBDADM_PRIMARY.search(stdout)
170 if res:
171 node_name = res.groups()[0]
172 ip = get_remote_host_ip(node_name)
173 if ip:
174 return 'linstor://' + ip
176 # Worst case: we use many hosts in the pool (>= 4), so we can't find the
177 # primary using drbdadm because we don't have all connections to the
178 # replicated volume. `drbdadm status xcp-persistent-database` returns
179 # 3 connections by default.
180 try:
181 session = util.timeout_call(10, util.get_localAPI_session)
183 for host_ref, host_record in session.xenapi.host.get_all_records().items():
184 node_name = host_record['hostname']
185 try:
186 if util.strtobool(
187 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {})
188 ):
189 return 'linstor://' + host_record['address']
190 except Exception as e:
191 # Can throw and exception if a host is offline. So catch it.
192 util.SMlog('Unable to search controller on `{}`: {}'.format(
193 node_name, e
194 ))
195 except:
196 # Not found, maybe we are trying to create the SR...
197 pass
199def get_controller_uri():
200 retries = 0
201 while True:
202 uri = _get_controller_uri()
203 if uri:
204 return uri
206 retries += 1
207 if retries >= 10:
208 break
209 time.sleep(1)
212def get_controller_node_name():
213 PLUGIN_CMD = 'hasControllerRunning'
215 (ret, stdout, stderr) = util.doexec([
216 'drbdadm', 'status', DATABASE_VOLUME_NAME
217 ])
219 if ret == 0:
220 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)):
221 return 'localhost'
223 res = REG_DRBDADM_PRIMARY.search(stdout)
224 if res:
225 return res.groups()[0]
227 session = util.timeout_call(5, util.get_localAPI_session)
229 for host_ref, host_record in session.xenapi.host.get_all_records().items():
230 node_name = host_record['hostname']
231 try:
232 if not session.xenapi.host_metrics.get_record(
233 host_record['metrics']
234 )['live']:
235 continue
237 if util.strtobool(session.xenapi.host.call_plugin(
238 host_ref, PLUGIN, PLUGIN_CMD, {}
239 )):
240 return node_name
241 except Exception as e:
242 util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format(
243 node_name, e
244 ))
247def demote_drbd_resource(node_name, resource_name):
248 PLUGIN_CMD = 'demoteDrbdResource'
250 session = util.timeout_call(5, util.get_localAPI_session)
252 for host_ref, host_record in session.xenapi.host.get_all_records().items():
253 if host_record['hostname'] != node_name:
254 continue
256 try:
257 session.xenapi.host.call_plugin(
258 host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name}
259 )
260 except Exception as e:
261 util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format(
262 resource_name, node_name, e
263 ))
264 raise Exception(
265 'Can\'t demote resource `{}`, unable to find node `{}`'
266 .format(resource_name, node_name)
267 )
269# ==============================================================================
271class LinstorVolumeManagerError(Exception):
272 ERR_GENERIC = 0,
273 ERR_VOLUME_EXISTS = 1,
274 ERR_VOLUME_NOT_EXISTS = 2,
275 ERR_VOLUME_DESTROY = 3,
276 ERR_GROUP_NOT_EXISTS = 4
278 def __init__(self, message, code=ERR_GENERIC):
279 super(LinstorVolumeManagerError, self).__init__(message)
280 self._code = code
282 @property
283 def code(self):
284 return self._code
287# ==============================================================================
289# Note:
290# If a storage pool is not accessible after a network change:
291# linstor node interface modify <NODE> default --ip <IP>
294class LinstorVolumeManager(object):
295 """
296 API to manager LINSTOR volumes in XCP-ng.
297 A volume in this context is a physical part of the storage layer.
298 """
300 __slots__ = (
301 '_linstor', '_logger', '_redundancy',
302 '_base_group_name', '_group_name', '_ha_group_name',
303 '_volumes', '_storage_pools', '_storage_pools_time',
304 '_kv_cache', '_resource_cache', '_volume_info_cache',
305 '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty'
306 )
308 DEV_ROOT_PATH = DRBD_BY_RES_PATH
310 # Default sector size.
311 BLOCK_SIZE = 512
313 # List of volume properties.
314 PROP_METADATA = 'metadata'
315 PROP_NOT_EXISTS = 'not-exists'
316 PROP_VOLUME_NAME = 'volume-name'
317 PROP_IS_READONLY_TIMESTAMP = 'readonly-timestamp'
319 # A volume can only be locked for a limited duration.
320 # The goal is to give enough time to slaves to execute some actions on
321 # a device before an UUID update or a coalesce for example.
322 # Expiration is expressed in seconds.
323 LOCKED_EXPIRATION_DELAY = 1 * 60
325 # Used when volume uuid is being updated.
326 PROP_UPDATING_UUID_SRC = 'updating-uuid-src'
328 # States of property PROP_NOT_EXISTS.
329 STATE_EXISTS = '0'
330 STATE_NOT_EXISTS = '1'
331 STATE_CREATING = '2'
333 # Property namespaces.
334 NAMESPACE_SR = 'xcp/sr'
335 NAMESPACE_VOLUME = 'xcp/volume'
337 # Regex to match properties.
338 REG_PROP = '^([^/]+)/{}$'
340 REG_METADATA = re.compile(REG_PROP.format(PROP_METADATA))
341 REG_NOT_EXISTS = re.compile(REG_PROP.format(PROP_NOT_EXISTS))
342 REG_VOLUME_NAME = re.compile(REG_PROP.format(PROP_VOLUME_NAME))
343 REG_UPDATING_UUID_SRC = re.compile(REG_PROP.format(PROP_UPDATING_UUID_SRC))
345 # Prefixes of SR/VOLUME in the LINSTOR DB.
346 # A LINSTOR (resource, group, ...) name cannot start with a number.
347 # So we add a prefix behind our SR/VOLUME uuids.
348 PREFIX_SR = 'xcp-sr-'
349 PREFIX_HA = 'xcp-ha-'
350 PREFIX_VOLUME = 'xcp-volume-'
352 # Limit request number when storage pool info is asked, we fetch
353 # the current pool status after N elapsed seconds.
354 STORAGE_POOLS_FETCH_INTERVAL = 15
356 @staticmethod
357 def default_logger(*args):
358 print(args)
360 # --------------------------------------------------------------------------
361 # API.
362 # --------------------------------------------------------------------------
364 class VolumeInfo(object):
365 __slots__ = (
366 'name',
367 'allocated_size', # Allocated size, place count is not used.
368 'virtual_size', # Total virtual available size of this volume
369 # (i.e. the user size at creation).
370 'diskful' # Array of nodes that have a diskful volume.
371 )
373 def __init__(self, name):
374 self.name = name
375 self.allocated_size = 0
376 self.virtual_size = 0
377 self.diskful = []
379 @override
380 def __repr__(self) -> str:
381 return 'VolumeInfo("{}", {}, {}, {})'.format(
382 self.name, self.allocated_size, self.virtual_size,
383 self.diskful
384 )
386 # --------------------------------------------------------------------------
388 def __init__(
389 self, uri, group_name, repair=False, logger=default_logger.__func__,
390 attempt_count=30
391 ):
392 """
393 Create a new LinstorVolumeManager object.
394 :param str uri: URI to communicate with the LINSTOR controller.
395 :param str group_name: The SR goup name to use.
396 :param bool repair: If true we try to remove bad volumes due to a crash
397 or unexpected behavior.
398 :param function logger: Function to log messages.
399 :param int attempt_count: Number of attempts to join the controller.
400 """
402 self._linstor = self._create_linstor_instance(
403 uri, attempt_count=attempt_count
404 )
405 self._base_group_name = group_name
407 # Ensure group exists.
408 group_name = self._build_group_name(group_name)
409 groups = self._linstor.resource_group_list_raise([group_name]).resource_groups
410 if not groups:
411 raise LinstorVolumeManagerError(
412 'Unable to find `{}` Linstor SR'.format(group_name)
413 )
415 # Ok. ;)
416 self._logger = logger
417 self._redundancy = groups[0].select_filter.place_count
418 self._group_name = group_name
419 self._ha_group_name = self._build_ha_group_name(self._base_group_name)
420 self._volumes = set()
421 self._storage_pools_time = 0
423 # To increate performance and limit request count to LINSTOR services,
424 # we use caches.
425 self._kv_cache = self._create_kv_cache()
426 self._resource_cache = None
427 self._resource_cache_dirty = True
428 self._volume_info_cache = None
429 self._volume_info_cache_dirty = True
430 self._build_volumes(repair=repair)
432 @property
433 def group_name(self):
434 """
435 Give the used group name.
436 :return: The group name.
437 :rtype: str
438 """
439 return self._base_group_name
441 @property
442 def redundancy(self):
443 """
444 Give the used redundancy.
445 :return: The redundancy.
446 :rtype: int
447 """
448 return self._redundancy
450 @property
451 def volumes(self):
452 """
453 Give the volumes uuid set.
454 :return: The volumes uuid set.
455 :rtype: set(str)
456 """
457 return self._volumes
459 @property
460 def max_volume_size_allowed(self):
461 """
462 Give the max volume size currently available in B.
463 :return: The current size.
464 :rtype: int
465 """
467 candidates = self._find_best_size_candidates()
468 if not candidates:
469 raise LinstorVolumeManagerError(
470 'Failed to get max volume size allowed'
471 )
473 size = candidates[0].max_volume_size
474 if size < 0:
475 raise LinstorVolumeManagerError(
476 'Invalid max volume size allowed given: {}'.format(size)
477 )
478 return self.round_down_volume_size(size * 1024)
480 @property
481 def physical_size(self):
482 """
483 Give the total physical size of the SR.
484 :return: The physical size.
485 :rtype: int
486 """
487 return self._compute_size('total_capacity')
489 @property
490 def physical_free_size(self):
491 """
492 Give the total free physical size of the SR.
493 :return: The physical free size.
494 :rtype: int
495 """
496 return self._compute_size('free_capacity')
498 @property
499 def allocated_volume_size(self):
500 """
501 Give the allocated size for all volumes. The place count is not
502 used here. When thick lvm is used, the size for one volume should
503 be equal to the virtual volume size. With thin lvm, the size is equal
504 or lower to the volume size.
505 :return: The allocated size of all volumes.
506 :rtype: int
507 """
509 # Paths: /res_name/vol_number/size
510 sizes = {}
512 for resource in self._get_resource_cache().resources:
513 if resource.name not in sizes:
514 current = sizes[resource.name] = {}
515 else:
516 current = sizes[resource.name]
518 for volume in resource.volumes:
519 # We ignore diskless pools of the form "DfltDisklessStorPool".
520 if volume.storage_pool_name != self._group_name:
521 continue
523 current_size = volume.allocated_size
524 if current_size < 0:
525 raise LinstorVolumeManagerError(
526 'Failed to get allocated size of `{}` on `{}`'
527 .format(resource.name, volume.storage_pool_name)
528 )
529 current[volume.number] = max(current_size, current.get(volume.number) or 0)
531 total_size = 0
532 for volumes in sizes.values():
533 for size in volumes.values():
534 total_size += size
536 return total_size * 1024
538 def get_min_physical_size(self):
539 """
540 Give the minimum physical size of the SR.
541 I.e. the size of the smallest disk + the number of pools.
542 :return: The physical min size.
543 :rtype: tuple(int, int)
544 """
545 size = None
546 pool_count = 0
547 for pool in self._get_storage_pools(force=True):
548 space = pool.free_space
549 if space:
550 pool_count += 1
551 current_size = space.total_capacity
552 if current_size < 0:
553 raise LinstorVolumeManagerError(
554 'Failed to get pool total_capacity attr of `{}`'
555 .format(pool.node_name)
556 )
557 if size is None or current_size < size:
558 size = current_size
559 return (pool_count, (size or 0) * 1024)
561 @property
562 def metadata(self):
563 """
564 Get the metadata of the SR.
565 :return: Dictionary that contains metadata.
566 :rtype: dict(str, dict)
567 """
569 sr_properties = self._get_sr_properties()
570 metadata = sr_properties.get(self.PROP_METADATA)
571 if metadata is not None:
572 metadata = json.loads(metadata)
573 if isinstance(metadata, dict):
574 return metadata
575 raise LinstorVolumeManagerError(
576 'Expected dictionary in SR metadata: {}'.format(
577 self._group_name
578 )
579 )
581 return {}
583 @metadata.setter
584 def metadata(self, metadata):
585 """
586 Set the metadata of the SR.
587 :param dict metadata: Dictionary that contains metadata.
588 """
590 assert isinstance(metadata, dict)
591 sr_properties = self._get_sr_properties()
592 sr_properties[self.PROP_METADATA] = json.dumps(metadata)
594 @property
595 def disconnected_hosts(self):
596 """
597 Get the list of disconnected hosts.
598 :return: Set that contains disconnected hosts.
599 :rtype: set(str)
600 """
602 disconnected_hosts = set()
603 for pool in self._get_storage_pools():
604 for report in pool.reports:
605 if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \
606 linstor.consts.WARN_NOT_CONNECTED:
607 disconnected_hosts.add(pool.node_name)
608 break
609 return disconnected_hosts
611 def check_volume_exists(self, volume_uuid):
612 """
613 Check if a volume exists in the SR.
614 :return: True if volume exists.
615 :rtype: bool
616 """
617 return volume_uuid in self._volumes
619 def create_volume(
620 self,
621 volume_uuid,
622 size,
623 persistent=True,
624 volume_name=None,
625 high_availability=False
626 ):
627 """
628 Create a new volume on the SR.
629 :param str volume_uuid: The volume uuid to use.
630 :param int size: volume size in B.
631 :param bool persistent: If false the volume will be unavailable
632 on the next constructor call LinstorSR(...).
633 :param str volume_name: If set, this name is used in the LINSTOR
634 database instead of a generated name.
635 :param bool high_availability: If set, the volume is created in
636 the HA group.
637 :return: The current device path of the volume.
638 :rtype: str
639 """
641 self._logger('Creating LINSTOR volume {}...'.format(volume_uuid))
642 if not volume_name:
643 volume_name = self.build_volume_name(util.gen_uuid())
644 volume_properties = self._create_volume_with_properties(
645 volume_uuid,
646 volume_name,
647 size,
648 True, # place_resources
649 high_availability
650 )
652 # Volume created! Now try to find the device path.
653 try:
654 self._logger(
655 'Find device path of LINSTOR volume {}...'.format(volume_uuid)
656 )
657 device_path = self._find_device_path(volume_uuid, volume_name)
658 if persistent:
659 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
660 self._volumes.add(volume_uuid)
661 self._logger(
662 'LINSTOR volume {} created!'.format(volume_uuid)
663 )
664 return device_path
665 except Exception:
666 # There is an issue to find the path.
667 # At this point the volume has just been created, so force flag can be used.
668 self._destroy_volume(volume_uuid, force=True)
669 raise
671 def mark_volume_as_persistent(self, volume_uuid):
672 """
673 Mark volume as persistent if created with persistent=False.
674 :param str volume_uuid: The volume uuid to mark.
675 """
677 self._ensure_volume_exists(volume_uuid)
679 # Mark volume as persistent.
680 volume_properties = self._get_volume_properties(volume_uuid)
681 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
683 def destroy_volume(self, volume_uuid):
684 """
685 Destroy a volume.
686 :param str volume_uuid: The volume uuid to destroy.
687 """
689 self._ensure_volume_exists(volume_uuid)
690 self.ensure_volume_is_not_locked(volume_uuid)
692 # Mark volume as destroyed.
693 volume_properties = self._get_volume_properties(volume_uuid)
694 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
696 try:
697 self._volumes.remove(volume_uuid)
698 self._destroy_volume(volume_uuid)
699 except Exception as e:
700 raise LinstorVolumeManagerError(
701 str(e),
702 LinstorVolumeManagerError.ERR_VOLUME_DESTROY
703 )
705 def lock_volume(self, volume_uuid, locked=True):
706 """
707 Prevent modifications of the volume properties during
708 "self.LOCKED_EXPIRATION_DELAY" seconds. The SR must be locked
709 when used. This method is useful to attach/detach correctly a volume on
710 a slave. Without it the GC can rename a volume, in this case the old
711 volume path can be used by a slave...
712 :param str volume_uuid: The volume uuid to protect/unprotect.
713 :param bool locked: Lock/unlock the volume.
714 """
716 self._ensure_volume_exists(volume_uuid)
718 self._logger(
719 '{} volume {} as locked'.format(
720 'Mark' if locked else 'Unmark',
721 volume_uuid
722 )
723 )
725 volume_properties = self._get_volume_properties(volume_uuid)
726 if locked:
727 volume_properties[
728 self.PROP_IS_READONLY_TIMESTAMP
729 ] = str(time.time())
730 elif self.PROP_IS_READONLY_TIMESTAMP in volume_properties:
731 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP)
733 def ensure_volume_is_not_locked(self, volume_uuid, timeout=None):
734 """
735 Ensure a volume is not locked. Wait if necessary.
736 :param str volume_uuid: The volume uuid to check.
737 :param int timeout: If the volume is always locked after the expiration
738 of the timeout, an exception is thrown.
739 """
740 return self.ensure_volume_list_is_not_locked([volume_uuid], timeout)
742 def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None):
743 checked = set()
744 for volume_uuid in volume_uuids:
745 if volume_uuid in self._volumes:
746 checked.add(volume_uuid)
748 if not checked:
749 return
751 waiting = False
753 volume_properties = self._get_kv_cache()
755 start = time.time()
756 while True:
757 # Can't delete in for loop, use a copy of the list.
758 remaining = checked.copy()
759 for volume_uuid in checked:
760 volume_properties.namespace = \
761 self._build_volume_namespace(volume_uuid)
762 timestamp = volume_properties.get(
763 self.PROP_IS_READONLY_TIMESTAMP
764 )
765 if timestamp is None:
766 remaining.remove(volume_uuid)
767 continue
769 now = time.time()
770 if now - float(timestamp) > self.LOCKED_EXPIRATION_DELAY:
771 self._logger(
772 'Remove readonly timestamp on {}'.format(volume_uuid)
773 )
774 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP)
775 remaining.remove(volume_uuid)
776 continue
778 if not waiting:
779 self._logger(
780 'Volume {} is locked, waiting...'.format(volume_uuid)
781 )
782 waiting = True
783 break
785 if not remaining:
786 break
787 checked = remaining
789 if timeout is not None and now - start > timeout:
790 raise LinstorVolumeManagerError(
791 'volume `{}` is locked and timeout has been reached'
792 .format(volume_uuid),
793 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
794 )
796 # We must wait to use the volume. After that we can modify it
797 # ONLY if the SR is locked to avoid bad reads on the slaves.
798 time.sleep(1)
799 volume_properties = self._create_kv_cache()
801 if waiting:
802 self._logger('No volume locked now!')
804 def remove_volume_if_diskless(self, volume_uuid):
805 """
806 Remove disless path from local node.
807 :param str volume_uuid: The volume uuid to remove.
808 """
810 self._ensure_volume_exists(volume_uuid)
812 volume_properties = self._get_volume_properties(volume_uuid)
813 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
815 node_name = socket.gethostname()
817 for resource in self._get_resource_cache().resources:
818 if resource.name == volume_name and resource.node_name == node_name:
819 if linstor.consts.FLAG_TIE_BREAKER in resource.flags:
820 return
821 break
823 result = self._linstor.resource_delete_if_diskless(
824 node_name=node_name, rsc_name=volume_name
825 )
826 if not linstor.Linstor.all_api_responses_no_error(result):
827 raise LinstorVolumeManagerError(
828 'Unable to delete diskless path of `{}` on node `{}`: {}'
829 .format(volume_name, node_name, ', '.join(
830 [str(x) for x in result]))
831 )
833 def introduce_volume(self, volume_uuid):
834 pass # TODO: Implement me.
836 def resize_volume(self, volume_uuid, new_size):
837 """
838 Resize a volume.
839 :param str volume_uuid: The volume uuid to resize.
840 :param int new_size: New size in B.
841 """
843 volume_name = self.get_volume_name(volume_uuid)
844 self.ensure_volume_is_not_locked(volume_uuid)
845 new_size = self.round_up_volume_size(new_size) // 1024
847 retry_count = 30
848 while True:
849 result = self._linstor.volume_dfn_modify(
850 rsc_name=volume_name,
851 volume_nr=0,
852 size=new_size
853 )
855 self._mark_resource_cache_as_dirty()
857 error_str = self._get_error_str(result)
858 if not error_str:
859 break
861 # After volume creation, DRBD volume can be unusable during many seconds.
862 # So we must retry the definition change if the device is not up to date.
863 # Often the case for thick provisioning.
864 if retry_count and error_str.find('non-UpToDate DRBD device') >= 0:
865 time.sleep(2)
866 retry_count -= 1
867 continue
869 raise LinstorVolumeManagerError(
870 'Could not resize volume `{}` from SR `{}`: {}'
871 .format(volume_uuid, self._group_name, error_str)
872 )
874 def get_volume_name(self, volume_uuid):
875 """
876 Get the name of a particular volume.
877 :param str volume_uuid: The volume uuid of the name to get.
878 :return: The volume name.
879 :rtype: str
880 """
882 self._ensure_volume_exists(volume_uuid)
883 volume_properties = self._get_volume_properties(volume_uuid)
884 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
885 if volume_name:
886 return volume_name
887 raise LinstorVolumeManagerError(
888 'Failed to get volume name of {}'.format(volume_uuid)
889 )
891 def get_volume_size(self, volume_uuid):
892 """
893 Get the size of a particular volume.
894 :param str volume_uuid: The volume uuid of the size to get.
895 :return: The volume size.
896 :rtype: int
897 """
899 volume_name = self.get_volume_name(volume_uuid)
900 dfns = self._linstor.resource_dfn_list_raise(
901 query_volume_definitions=True,
902 filter_by_resource_definitions=[volume_name]
903 ).resource_definitions
905 size = dfns[0].volume_definitions[0].size
906 if size < 0:
907 raise LinstorVolumeManagerError(
908 'Failed to get volume size of: {}'.format(volume_uuid)
909 )
910 return size * 1024
912 def set_auto_promote_timeout(self, volume_uuid, timeout):
913 """
914 Define the blocking time of open calls when a DRBD
915 is already open on another host.
916 :param str volume_uuid: The volume uuid to modify.
917 """
919 volume_name = self.get_volume_name(volume_uuid)
920 result = self._linstor.resource_dfn_modify(volume_name, {
921 'DrbdOptions/Resource/auto-promote-timeout': timeout
922 })
923 error_str = self._get_error_str(result)
924 if error_str:
925 raise LinstorVolumeManagerError(
926 'Could not change the auto promote timeout of `{}`: {}'
927 .format(volume_uuid, error_str)
928 )
930 def get_volume_info(self, volume_uuid):
931 """
932 Get the volume info of a particular volume.
933 :param str volume_uuid: The volume uuid of the volume info to get.
934 :return: The volume info.
935 :rtype: VolumeInfo
936 """
938 volume_name = self.get_volume_name(volume_uuid)
939 return self._get_volumes_info()[volume_name]
941 def get_device_path(self, volume_uuid):
942 """
943 Get the dev path of a volume, create a diskless if necessary.
944 :param str volume_uuid: The volume uuid to get the dev path.
945 :return: The current device path of the volume.
946 :rtype: str
947 """
949 volume_name = self.get_volume_name(volume_uuid)
950 return self._find_device_path(volume_uuid, volume_name)
952 def get_volume_uuid_from_device_path(self, device_path):
953 """
954 Get the volume uuid of a device_path.
955 :param str device_path: The dev path to find the volume uuid.
956 :return: The volume uuid of the local device path.
957 :rtype: str
958 """
960 expected_volume_name = \
961 self.get_volume_name_from_device_path(device_path)
963 volume_names = self.get_volumes_with_name()
964 for volume_uuid, volume_name in volume_names.items():
965 if volume_name == expected_volume_name:
966 return volume_uuid
968 raise LinstorVolumeManagerError(
969 'Unable to find volume uuid from dev path `{}`'.format(device_path)
970 )
972 def get_volume_name_from_device_path(self, device_path):
973 """
974 Get the volume name of a device_path.
975 :param str device_path: The dev path to find the volume name.
976 :return: The volume name of the device path.
977 :rtype: str
978 """
980 # Assume that we have a path like this:
981 # - "/dev/drbd/by-res/xcp-volume-<UUID>/0"
982 # - "../xcp-volume-<UUID>/0"
983 if device_path.startswith(DRBD_BY_RES_PATH):
984 prefix_len = len(DRBD_BY_RES_PATH)
985 else:
986 assert device_path.startswith('../')
987 prefix_len = 3
989 res_name_end = device_path.find('/', prefix_len)
990 assert res_name_end != -1
991 return device_path[prefix_len:res_name_end]
993 def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False):
994 """
995 Change the uuid of a volume.
996 :param str volume_uuid: The volume to modify.
997 :param str new_volume_uuid: The new volume uuid to use.
998 :param bool force: If true we doesn't check if volume_uuid is in the
999 volume list. I.e. the volume can be marked as deleted but the volume
1000 can still be in the LINSTOR KV store if the deletion has failed.
1001 In specific cases like "undo" after a failed clone we must rename a bad
1002 deleted VDI.
1003 """
1005 self._logger(
1006 'Trying to update volume UUID {} to {}...'
1007 .format(volume_uuid, new_volume_uuid)
1008 )
1009 assert volume_uuid != new_volume_uuid, 'can\'t update volume UUID, same value'
1011 if not force:
1012 self._ensure_volume_exists(volume_uuid)
1013 self.ensure_volume_is_not_locked(volume_uuid)
1015 if new_volume_uuid in self._volumes:
1016 raise LinstorVolumeManagerError(
1017 'Volume `{}` already exists'.format(new_volume_uuid),
1018 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
1019 )
1021 volume_properties = self._get_volume_properties(volume_uuid)
1022 if volume_properties.get(self.PROP_UPDATING_UUID_SRC):
1023 raise LinstorVolumeManagerError(
1024 'Cannot update volume uuid {}: invalid state'
1025 .format(volume_uuid)
1026 )
1028 # 1. Copy in temp variables metadata and volume_name.
1029 metadata = volume_properties.get(self.PROP_METADATA)
1030 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
1032 # 2. Switch to new volume namespace.
1033 volume_properties.namespace = self._build_volume_namespace(
1034 new_volume_uuid
1035 )
1037 if list(volume_properties.items()):
1038 raise LinstorVolumeManagerError(
1039 'Cannot update volume uuid {} to {}: '
1040 .format(volume_uuid, new_volume_uuid) +
1041 'this last one is not empty'
1042 )
1044 try:
1045 # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC.
1046 # If we crash after that, the new properties can be removed
1047 # properly.
1048 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
1049 volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid
1051 # 4. Copy the properties.
1052 # Note: On new volumes, during clone for example, the metadata
1053 # may be missing. So we must test it to avoid this error:
1054 # "None has to be a str/unicode, but is <type 'NoneType'>"
1055 if metadata:
1056 volume_properties[self.PROP_METADATA] = metadata
1057 volume_properties[self.PROP_VOLUME_NAME] = volume_name
1059 # 5. Ok!
1060 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
1061 except Exception as e:
1062 try:
1063 # Clear the new volume properties in case of failure.
1064 assert volume_properties.namespace == \
1065 self._build_volume_namespace(new_volume_uuid)
1066 volume_properties.clear()
1067 except Exception as e:
1068 self._logger(
1069 'Failed to clear new volume properties: {} (ignoring...)'
1070 .format(e)
1071 )
1072 raise LinstorVolumeManagerError(
1073 'Failed to copy volume properties: {}'.format(e)
1074 )
1076 try:
1077 # 6. After this point, it's ok we can remove the
1078 # PROP_UPDATING_UUID_SRC property and clear the src properties
1079 # without problems.
1081 # 7. Switch to old volume namespace.
1082 volume_properties.namespace = self._build_volume_namespace(
1083 volume_uuid
1084 )
1085 volume_properties.clear()
1087 # 8. Switch a last time to new volume namespace.
1088 volume_properties.namespace = self._build_volume_namespace(
1089 new_volume_uuid
1090 )
1091 volume_properties.pop(self.PROP_UPDATING_UUID_SRC)
1092 except Exception as e:
1093 raise LinstorVolumeManagerError(
1094 'Failed to clear volume properties '
1095 'after volume uuid update: {}'.format(e)
1096 )
1098 self._volumes.remove(volume_uuid)
1099 self._volumes.add(new_volume_uuid)
1101 self._logger(
1102 'UUID update succeeded of {} to {}! (properties={})'
1103 .format(
1104 volume_uuid, new_volume_uuid,
1105 self._get_filtered_properties(volume_properties)
1106 )
1107 )
1109 def update_volume_name(self, volume_uuid, volume_name):
1110 """
1111 Change the volume name of a volume.
1112 :param str volume_uuid: The volume to modify.
1113 :param str volume_name: The volume_name to use.
1114 """
1116 self._ensure_volume_exists(volume_uuid)
1117 self.ensure_volume_is_not_locked(volume_uuid)
1118 if not volume_name.startswith(self.PREFIX_VOLUME):
1119 raise LinstorVolumeManagerError(
1120 'Volume name `{}` must be start with `{}`'
1121 .format(volume_name, self.PREFIX_VOLUME)
1122 )
1124 if volume_name not in self._fetch_resource_names():
1125 raise LinstorVolumeManagerError(
1126 'Volume `{}` doesn\'t exist'.format(volume_name)
1127 )
1129 volume_properties = self._get_volume_properties(volume_uuid)
1130 volume_properties[self.PROP_VOLUME_NAME] = volume_name
1132 def get_usage_states(self, volume_uuid):
1133 """
1134 Check if a volume is currently used.
1135 :param str volume_uuid: The volume uuid to check.
1136 :return: A dictionnary that contains states.
1137 :rtype: dict(str, bool or None)
1138 """
1140 states = {}
1142 volume_name = self.get_volume_name(volume_uuid)
1143 for resource_state in self._linstor.resource_list_raise(
1144 filter_by_resources=[volume_name]
1145 ).resource_states:
1146 states[resource_state.node_name] = resource_state.in_use
1148 return states
1150 def get_volume_openers(self, volume_uuid):
1151 """
1152 Get openers of a volume.
1153 :param str volume_uuid: The volume uuid to monitor.
1154 :return: A dictionnary that contains openers.
1155 :rtype: dict(str, obj)
1156 """
1157 return get_all_volume_openers(self.get_volume_name(volume_uuid), '0')
1159 def get_volumes_with_name(self):
1160 """
1161 Give a volume dictionnary that contains names actually owned.
1162 :return: A volume/name dict.
1163 :rtype: dict(str, str)
1164 """
1165 return self._get_volumes_by_property(self.REG_VOLUME_NAME)
1167 def get_volumes_with_info(self):
1168 """
1169 Give a volume dictionnary that contains VolumeInfos.
1170 :return: A volume/VolumeInfo dict.
1171 :rtype: dict(str, VolumeInfo)
1172 """
1174 volumes = {}
1176 all_volume_info = self._get_volumes_info()
1177 volume_names = self.get_volumes_with_name()
1178 for volume_uuid, volume_name in volume_names.items():
1179 if volume_name:
1180 volume_info = all_volume_info.get(volume_name)
1181 if volume_info:
1182 volumes[volume_uuid] = volume_info
1183 continue
1185 # Well I suppose if this volume is not available,
1186 # LINSTOR has been used directly without using this API.
1187 volumes[volume_uuid] = self.VolumeInfo('')
1189 return volumes
1191 def get_volumes_with_metadata(self):
1192 """
1193 Give a volume dictionnary that contains metadata.
1194 :return: A volume/metadata dict.
1195 :rtype: dict(str, dict)
1196 """
1198 volumes = {}
1200 metadata = self._get_volumes_by_property(self.REG_METADATA)
1201 for volume_uuid, volume_metadata in metadata.items():
1202 if volume_metadata:
1203 volume_metadata = json.loads(volume_metadata)
1204 if isinstance(volume_metadata, dict):
1205 volumes[volume_uuid] = volume_metadata
1206 continue
1207 raise LinstorVolumeManagerError(
1208 'Expected dictionary in volume metadata: {}'
1209 .format(volume_uuid)
1210 )
1212 volumes[volume_uuid] = {}
1214 return volumes
1216 def get_volume_metadata(self, volume_uuid):
1217 """
1218 Get the metadata of a volume.
1219 :return: Dictionary that contains metadata.
1220 :rtype: dict
1221 """
1223 self._ensure_volume_exists(volume_uuid)
1224 volume_properties = self._get_volume_properties(volume_uuid)
1225 metadata = volume_properties.get(self.PROP_METADATA)
1226 if metadata:
1227 metadata = json.loads(metadata)
1228 if isinstance(metadata, dict):
1229 return metadata
1230 raise LinstorVolumeManagerError(
1231 'Expected dictionary in volume metadata: {}'
1232 .format(volume_uuid)
1233 )
1234 return {}
1236 def set_volume_metadata(self, volume_uuid, metadata):
1237 """
1238 Set the metadata of a volume.
1239 :param dict metadata: Dictionary that contains metadata.
1240 """
1242 self._ensure_volume_exists(volume_uuid)
1243 self.ensure_volume_is_not_locked(volume_uuid)
1245 assert isinstance(metadata, dict)
1246 volume_properties = self._get_volume_properties(volume_uuid)
1247 volume_properties[self.PROP_METADATA] = json.dumps(metadata)
1249 def update_volume_metadata(self, volume_uuid, metadata):
1250 """
1251 Update the metadata of a volume. It modify only the given keys.
1252 It doesn't remove unreferenced key instead of set_volume_metadata.
1253 :param dict metadata: Dictionary that contains metadata.
1254 """
1256 self._ensure_volume_exists(volume_uuid)
1257 self.ensure_volume_is_not_locked(volume_uuid)
1259 assert isinstance(metadata, dict)
1260 volume_properties = self._get_volume_properties(volume_uuid)
1262 current_metadata = json.loads(
1263 volume_properties.get(self.PROP_METADATA, '{}')
1264 )
1265 if not isinstance(metadata, dict):
1266 raise LinstorVolumeManagerError(
1267 'Expected dictionary in volume metadata: {}'
1268 .format(volume_uuid)
1269 )
1271 for key, value in metadata.items():
1272 current_metadata[key] = value
1273 volume_properties[self.PROP_METADATA] = json.dumps(current_metadata)
1275 def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True):
1276 """
1277 Clone a volume. Do not copy the data, this method creates a new volume
1278 with the same size.
1279 :param str volume_uuid: The volume to clone.
1280 :param str clone_uuid: The cloned volume.
1281 :param bool persistent: If false the volume will be unavailable
1282 on the next constructor call LinstorSR(...).
1283 :return: The current device path of the cloned volume.
1284 :rtype: str
1285 """
1287 volume_name = self.get_volume_name(volume_uuid)
1288 self.ensure_volume_is_not_locked(volume_uuid)
1290 # 1. Find ideal nodes + size to use.
1291 ideal_node_names, size = self._get_volume_node_names_and_size(
1292 volume_name
1293 )
1294 if size <= 0:
1295 raise LinstorVolumeManagerError(
1296 'Invalid size of {} for volume `{}`'.format(size, volume_name)
1297 )
1299 # 2. Create clone!
1300 return self.create_volume(clone_uuid, size, persistent)
1302 def remove_resourceless_volumes(self):
1303 """
1304 Remove all volumes without valid or non-empty name
1305 (i.e. without LINSTOR resource). It's different than
1306 LinstorVolumeManager constructor that takes a `repair` param that
1307 removes volumes with `PROP_NOT_EXISTS` to 1.
1308 """
1310 resource_names = self._fetch_resource_names()
1311 for volume_uuid, volume_name in self.get_volumes_with_name().items():
1312 if not volume_name or volume_name not in resource_names:
1313 # Don't force, we can be sure of what's happening.
1314 self.destroy_volume(volume_uuid)
1316 def destroy(self):
1317 """
1318 Destroy this SR. Object should not be used after that.
1319 :param bool force: Try to destroy volumes before if true.
1320 """
1322 # 1. Ensure volume list is empty. No cost.
1323 if self._volumes:
1324 raise LinstorVolumeManagerError(
1325 'Cannot destroy LINSTOR volume manager: '
1326 'It exists remaining volumes'
1327 )
1329 # 2. Fetch ALL resource names.
1330 # This list may therefore contain volumes created outside
1331 # the scope of the driver.
1332 resource_names = self._fetch_resource_names(ignore_deleted=False)
1333 try:
1334 resource_names.remove(DATABASE_VOLUME_NAME)
1335 except KeyError:
1336 # Really strange to reach that point.
1337 # Normally we always have the database volume in the list.
1338 pass
1340 # 3. Ensure the resource name list is entirely empty...
1341 if resource_names:
1342 raise LinstorVolumeManagerError(
1343 'Cannot destroy LINSTOR volume manager: '
1344 'It exists remaining volumes (created externally or being deleted)'
1345 )
1347 # 4. Destroying...
1348 controller_is_running = self._controller_is_running()
1349 uri = 'linstor://localhost'
1350 try:
1351 if controller_is_running:
1352 self._start_controller(start=False)
1354 # 4.1. Umount LINSTOR database.
1355 self._mount_database_volume(
1356 self.build_device_path(DATABASE_VOLUME_NAME),
1357 mount=False,
1358 force=True
1359 )
1361 # 4.2. Refresh instance.
1362 self._start_controller(start=True)
1363 self._linstor = self._create_linstor_instance(
1364 uri, keep_uri_unmodified=True
1365 )
1367 # 4.3. Destroy database volume.
1368 self._destroy_resource(DATABASE_VOLUME_NAME)
1370 # 4.4. Refresh linstor connection.
1371 # Without we get this error:
1372 # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.."
1373 # Because the deletion of the databse was not seen by Linstor for some reason.
1374 # It seems a simple refresh of the Linstor connection make it aware of the deletion.
1375 self._linstor.disconnect()
1376 self._linstor.connect()
1378 # 4.5. Destroy remaining drbd nodes on hosts.
1379 # We check if there is a DRBD node on hosts that could mean blocking when destroying resource groups.
1380 # It needs to be done locally by each host so we go through the linstor-manager plugin.
1381 # If we don't do this sometimes, the destroy will fail when trying to destroy the resource groups with:
1382 # "linstor-manager:destroy error: Failed to destroy SP `xcp-sr-linstor_group_thin_device` on node `r620-s2`: The specified storage pool 'xcp-sr-linstor_group_thin_device' on node 'r620-s2' can not be deleted as volumes / snapshot-volumes are still using it."
1383 session = util.timeout_call(5, util.get_localAPI_session)
1384 for host_ref in session.xenapi.host.get_all():
1385 try:
1386 response = session.xenapi.host.call_plugin(
1387 host_ref, 'linstor-manager', 'destroyDrbdVolumes', {'volume_group': self._group_name}
1388 )
1389 except Exception as e:
1390 util.SMlog('Calling destroyDrbdVolumes on host {} failed with error {}'.format(host_ref, e))
1392 # 4.6. Destroy group and storage pools.
1393 self._destroy_resource_group(self._linstor, self._group_name)
1394 self._destroy_resource_group(self._linstor, self._ha_group_name)
1395 for pool in self._get_storage_pools(force=True):
1396 self._destroy_storage_pool(
1397 self._linstor, pool.name, pool.node_name
1398 )
1399 except Exception as e:
1400 self._start_controller(start=controller_is_running)
1401 raise e
1403 try:
1404 self._start_controller(start=False)
1405 for file in os.listdir(DATABASE_PATH):
1406 if file != 'lost+found':
1407 os.remove(DATABASE_PATH + '/' + file)
1408 except Exception as e:
1409 util.SMlog(
1410 'Ignoring failure after LINSTOR SR destruction: {}'
1411 .format(e)
1412 )
1414 def find_up_to_date_diskful_nodes(self, volume_uuid):
1415 """
1416 Find all nodes that contain a specific volume using diskful disks.
1417 The disk must be up to data to be used.
1418 :param str volume_uuid: The volume to use.
1419 :return: The available nodes.
1420 :rtype: tuple(set(str), str)
1421 """
1423 volume_name = self.get_volume_name(volume_uuid)
1425 in_use_by = None
1426 node_names = set()
1428 resource_states = filter(
1429 lambda resource_state: resource_state.name == volume_name,
1430 self._get_resource_cache().resource_states
1431 )
1433 for resource_state in resource_states:
1434 volume_state = resource_state.volume_states[0]
1435 if volume_state.disk_state == 'UpToDate':
1436 node_names.add(resource_state.node_name)
1437 if resource_state.in_use:
1438 in_use_by = resource_state.node_name
1440 return (node_names, in_use_by)
1442 def invalidate_resource_cache(self):
1443 """
1444 If resources are impacted by external commands like vhdutil,
1445 it's necessary to call this function to invalidate current resource
1446 cache.
1447 """
1448 self._mark_resource_cache_as_dirty()
1450 def has_node(self, node_name):
1451 """
1452 Check if a node exists in the LINSTOR database.
1453 :rtype: bool
1454 """
1455 result = self._linstor.node_list()
1456 error_str = self._get_error_str(result)
1457 if error_str:
1458 raise LinstorVolumeManagerError(
1459 'Failed to list nodes using `{}`: {}'
1460 .format(node_name, error_str)
1461 )
1462 return bool(result[0].node(node_name))
1464 def create_node(self, node_name, ip):
1465 """
1466 Create a new node in the LINSTOR database.
1467 :param str node_name: Node name to use.
1468 :param str ip: Host IP to communicate.
1469 """
1470 result = self._linstor.node_create(
1471 node_name,
1472 linstor.consts.VAL_NODE_TYPE_CMBD,
1473 ip
1474 )
1475 errors = self._filter_errors(result)
1476 if errors:
1477 error_str = self._get_error_str(errors)
1478 raise LinstorVolumeManagerError(
1479 'Failed to create node `{}`: {}'.format(node_name, error_str)
1480 )
1482 def destroy_node(self, node_name):
1483 """
1484 Destroy a node in the LINSTOR database.
1485 :param str node_name: Node name to remove.
1486 """
1487 result = self._linstor.node_delete(node_name)
1488 errors = self._filter_errors(result)
1489 if errors:
1490 error_str = self._get_error_str(errors)
1491 raise LinstorVolumeManagerError(
1492 'Failed to destroy node `{}`: {}'.format(node_name, error_str)
1493 )
1495 def create_node_interface(self, node_name, name, ip):
1496 """
1497 Create a new node interface in the LINSTOR database.
1498 :param str node_name: Node name of the interface to use.
1499 :param str name: Interface to create.
1500 :param str ip: IP of the interface.
1501 """
1502 result = self._linstor.netinterface_create(node_name, name, ip)
1503 errors = self._filter_errors(result)
1504 if errors:
1505 error_str = self._get_error_str(errors)
1506 raise LinstorVolumeManagerError(
1507 'Failed to create node interface on `{}`: {}'.format(node_name, error_str)
1508 )
1510 def destroy_node_interface(self, node_name, name):
1511 """
1512 Destroy a node interface in the LINSTOR database.
1513 :param str node_name: Node name of the interface to remove.
1514 :param str name: Interface to remove.
1515 """
1517 if name == 'default':
1518 raise LinstorVolumeManagerError(
1519 'Unable to delete the default interface of a node!'
1520 )
1522 result = self._linstor.netinterface_delete(node_name, name)
1523 errors = self._filter_errors(result)
1524 if errors:
1525 error_str = self._get_error_str(errors)
1526 raise LinstorVolumeManagerError(
1527 'Failed to destroy node interface on `{}`: {}'.format(node_name, error_str)
1528 )
1530 def modify_node_interface(self, node_name, name, ip):
1531 """
1532 Modify a node interface in the LINSTOR database. Create it if necessary.
1533 :param str node_name: Node name of the interface to use.
1534 :param str name: Interface to modify or create.
1535 :param str ip: IP of the interface.
1536 """
1537 result = self._linstor.netinterface_create(node_name, name, ip)
1538 errors = self._filter_errors(result)
1539 if not errors:
1540 return
1542 if self._check_errors(errors, [linstor.consts.FAIL_EXISTS_NET_IF]):
1543 result = self._linstor.netinterface_modify(node_name, name, ip)
1544 errors = self._filter_errors(result)
1545 if not errors:
1546 return
1548 error_str = self._get_error_str(errors)
1549 raise LinstorVolumeManagerError(
1550 'Unable to modify interface on `{}`: {}'.format(node_name, error_str)
1551 )
1553 def list_node_interfaces(self, node_name):
1554 """
1555 List all node interfaces.
1556 :param str node_name: Node name to use to list interfaces.
1557 :rtype: list
1558 :
1559 """
1560 result = self._linstor.net_interface_list(node_name)
1561 if not result:
1562 raise LinstorVolumeManagerError(
1563 'Unable to list interfaces on `{}`: no list received'.format(node_name)
1564 )
1566 interfaces = {}
1567 for interface in result:
1568 interface = interface._rest_data
1569 interfaces[interface['name']] = {
1570 'address': interface['address'],
1571 'active': interface['is_active']
1572 }
1573 return interfaces
1575 def get_node_preferred_interface(self, node_name):
1576 """
1577 Get the preferred interface used by a node.
1578 :param str node_name: Node name of the interface to get.
1579 :rtype: str
1580 """
1581 try:
1582 nodes = self._linstor.node_list_raise([node_name]).nodes
1583 if nodes:
1584 properties = nodes[0].props
1585 return properties.get('PrefNic', 'default')
1586 return nodes
1587 except Exception as e:
1588 raise LinstorVolumeManagerError(
1589 'Failed to get preferred interface: `{}`'.format(e)
1590 )
1592 def set_node_preferred_interface(self, node_name, name):
1593 """
1594 Set the preferred interface to use on a node.
1595 :param str node_name: Node name of the interface.
1596 :param str name: Preferred interface to use.
1597 """
1598 result = self._linstor.node_modify(node_name, property_dict={'PrefNic': name})
1599 errors = self._filter_errors(result)
1600 if errors:
1601 error_str = self._get_error_str(errors)
1602 raise LinstorVolumeManagerError(
1603 'Failed to set preferred node interface on `{}`: {}'.format(node_name, error_str)
1604 )
1606 def get_nodes_info(self):
1607 """
1608 Get all nodes + statuses, used or not by the pool.
1609 :rtype: dict(str, dict)
1610 """
1611 try:
1612 nodes = {}
1613 for node in self._linstor.node_list_raise().nodes:
1614 nodes[node.name] = node.connection_status
1615 return nodes
1616 except Exception as e:
1617 raise LinstorVolumeManagerError(
1618 'Failed to get all nodes: `{}`'.format(e)
1619 )
1621 def get_storage_pools_info(self):
1622 """
1623 Give all storage pools of current group name.
1624 :rtype: dict(str, list)
1625 """
1626 storage_pools = {}
1627 for pool in self._get_storage_pools(force=True):
1628 if pool.node_name not in storage_pools:
1629 storage_pools[pool.node_name] = []
1631 size = -1
1632 capacity = -1
1634 space = pool.free_space
1635 if space:
1636 size = space.free_capacity
1637 if size < 0:
1638 size = -1
1639 else:
1640 size *= 1024
1641 capacity = space.total_capacity
1642 if capacity <= 0:
1643 capacity = -1
1644 else:
1645 capacity *= 1024
1647 storage_pools[pool.node_name].append({
1648 'name': pool.name,
1649 'linstor-uuid': pool.uuid,
1650 'free-size': size,
1651 'capacity': capacity
1652 })
1654 return storage_pools
1656 def get_resources_info(self):
1657 """
1658 Give all resources of current group name.
1659 :rtype: dict(str, list)
1660 """
1661 resources = {}
1662 resource_list = self._get_resource_cache()
1663 volume_names = self.get_volumes_with_name()
1664 for resource in resource_list.resources:
1665 if resource.name not in resources:
1666 resources[resource.name] = { 'nodes': {}, 'uuid': '' }
1667 resource_nodes = resources[resource.name]['nodes']
1669 resource_nodes[resource.node_name] = {
1670 'volumes': [],
1671 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags,
1672 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags
1673 }
1674 resource_volumes = resource_nodes[resource.node_name]['volumes']
1676 for volume in resource.volumes:
1677 # We ignore diskless pools of the form "DfltDisklessStorPool".
1678 if volume.storage_pool_name != self._group_name:
1679 continue
1681 usable_size = volume.usable_size
1682 if usable_size < 0:
1683 usable_size = -1
1684 else:
1685 usable_size *= 1024
1687 allocated_size = volume.allocated_size
1688 if allocated_size < 0:
1689 allocated_size = -1
1690 else:
1691 allocated_size *= 1024
1693 resource_volumes.append({
1694 'storage-pool-name': volume.storage_pool_name,
1695 'linstor-uuid': volume.uuid,
1696 'number': volume.number,
1697 'device-path': volume.device_path,
1698 'usable-size': usable_size,
1699 'allocated-size': allocated_size
1700 })
1702 for resource_state in resource_list.resource_states:
1703 resource = resources[resource_state.rsc_name]['nodes'][resource_state.node_name]
1704 resource['in-use'] = resource_state.in_use
1706 volumes = resource['volumes']
1707 for volume_state in resource_state.volume_states:
1708 volume = next((x for x in volumes if x['number'] == volume_state.number), None)
1709 if volume:
1710 volume['disk-state'] = volume_state.disk_state
1712 for volume_uuid, volume_name in volume_names.items():
1713 resource = resources.get(volume_name)
1714 if resource:
1715 resource['uuid'] = volume_uuid
1717 return resources
1719 def get_database_path(self):
1720 """
1721 Get the database path.
1722 :return: The current database path.
1723 :rtype: str
1724 """
1725 return self._request_database_path(self._linstor)
1727 @classmethod
1728 def get_all_group_names(cls, base_name):
1729 """
1730 Get all group names. I.e. list of current group + HA.
1731 :param str base_name: The SR group_name to use.
1732 :return: List of group names.
1733 :rtype: list
1734 """
1735 return [cls._build_group_name(base_name), cls._build_ha_group_name(base_name)]
1737 @classmethod
1738 def create_sr(
1739 cls, group_name, ips, redundancy,
1740 thin_provisioning, auto_quorum,
1741 logger=default_logger.__func__
1742 ):
1743 """
1744 Create a new SR on the given nodes.
1745 :param str group_name: The SR group_name to use.
1746 :param set(str) ips: Node ips.
1747 :param int redundancy: How many copy of volumes should we store?
1748 :param bool thin_provisioning: Use thin or thick provisioning.
1749 :param bool auto_quorum: DB quorum is monitored by LINSTOR.
1750 :param function logger: Function to log messages.
1751 :return: A new LinstorSr instance.
1752 :rtype: LinstorSr
1753 """
1755 try:
1756 cls._start_controller(start=True)
1757 sr = cls._create_sr(
1758 group_name,
1759 ips,
1760 redundancy,
1761 thin_provisioning,
1762 auto_quorum,
1763 logger
1764 )
1765 finally:
1766 # Controller must be stopped and volume unmounted because
1767 # it is the role of the drbd-reactor daemon to do the right
1768 # actions.
1769 cls._start_controller(start=False)
1770 cls._mount_volume(
1771 cls.build_device_path(DATABASE_VOLUME_NAME),
1772 DATABASE_PATH,
1773 mount=False
1774 )
1775 return sr
1777 @classmethod
1778 def _create_sr(
1779 cls, group_name, ips, redundancy,
1780 thin_provisioning, auto_quorum,
1781 logger=default_logger.__func__
1782 ):
1783 # 1. Check if SR already exists.
1784 uri = 'linstor://localhost'
1786 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
1788 node_names = list(ips.keys())
1789 for node_name, ip in ips.items():
1790 while True:
1791 # Try to create node.
1792 result = lin.node_create(
1793 node_name,
1794 linstor.consts.VAL_NODE_TYPE_CMBD,
1795 ip
1796 )
1798 errors = cls._filter_errors(result)
1799 if cls._check_errors(
1800 errors, [linstor.consts.FAIL_EXISTS_NODE]
1801 ):
1802 # If it already exists, remove, then recreate.
1803 result = lin.node_delete(node_name)
1804 error_str = cls._get_error_str(result)
1805 if error_str:
1806 raise LinstorVolumeManagerError(
1807 'Failed to remove old node `{}`: {}'
1808 .format(node_name, error_str)
1809 )
1810 elif not errors:
1811 break # Created!
1812 else:
1813 raise LinstorVolumeManagerError(
1814 'Failed to create node `{}` with ip `{}`: {}'.format(
1815 node_name, ip, cls._get_error_str(errors)
1816 )
1817 )
1819 driver_pool_name = group_name
1820 base_group_name = group_name
1821 group_name = cls._build_group_name(group_name)
1822 storage_pool_name = group_name
1823 pools = lin.storage_pool_list_raise(filter_by_stor_pools=[storage_pool_name]).storage_pools
1824 if pools:
1825 existing_node_names = [pool.node_name for pool in pools]
1826 raise LinstorVolumeManagerError(
1827 'Unable to create SR `{}`. It already exists on node(s): {}'
1828 .format(group_name, existing_node_names)
1829 )
1831 if lin.resource_group_list_raise(
1832 cls.get_all_group_names(base_group_name)
1833 ).resource_groups:
1834 if not lin.resource_dfn_list_raise().resource_definitions:
1835 backup_path = cls._create_database_backup_path()
1836 logger(
1837 'Group name already exists `{}` without LVs. '
1838 'Ignoring and moving the config files in {}'.format(group_name, backup_path)
1839 )
1840 cls._move_files(DATABASE_PATH, backup_path)
1841 else:
1842 raise LinstorVolumeManagerError(
1843 'Unable to create SR `{}`: The group name already exists'
1844 .format(group_name)
1845 )
1847 if thin_provisioning:
1848 driver_pool_parts = driver_pool_name.split('/')
1849 if not len(driver_pool_parts) == 2:
1850 raise LinstorVolumeManagerError(
1851 'Invalid group name using thin provisioning. '
1852 'Expected format: \'VG/LV`\''
1853 )
1855 # 2. Create storage pool on each node + resource group.
1856 reg_volume_group_not_found = re.compile(
1857 ".*Volume group '.*' not found$"
1858 )
1860 i = 0
1861 try:
1862 # 2.a. Create storage pools.
1863 storage_pool_count = 0
1864 while i < len(node_names):
1865 node_name = node_names[i]
1867 result = lin.storage_pool_create(
1868 node_name=node_name,
1869 storage_pool_name=storage_pool_name,
1870 storage_driver='LVM_THIN' if thin_provisioning else 'LVM',
1871 driver_pool_name=driver_pool_name
1872 )
1874 errors = linstor.Linstor.filter_api_call_response_errors(
1875 result
1876 )
1877 if errors:
1878 if len(errors) == 1 and errors[0].is_error(
1879 linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR
1880 ) and reg_volume_group_not_found.match(errors[0].message):
1881 logger(
1882 'Volume group `{}` not found on `{}`. Ignoring...'
1883 .format(group_name, node_name)
1884 )
1885 cls._destroy_storage_pool(lin, storage_pool_name, node_name)
1886 else:
1887 error_str = cls._get_error_str(result)
1888 raise LinstorVolumeManagerError(
1889 'Could not create SP `{}` on node `{}`: {}'
1890 .format(group_name, node_name, error_str)
1891 )
1892 else:
1893 storage_pool_count += 1
1894 i += 1
1896 if not storage_pool_count:
1897 raise LinstorVolumeManagerError(
1898 'Unable to create SR `{}`: No VG group found'.format(
1899 group_name,
1900 )
1901 )
1903 # 2.b. Create resource groups.
1904 ha_group_name = cls._build_ha_group_name(base_group_name)
1905 cls._create_resource_group(
1906 lin,
1907 group_name,
1908 storage_pool_name,
1909 redundancy,
1910 True
1911 )
1912 cls._create_resource_group(
1913 lin,
1914 ha_group_name,
1915 storage_pool_name,
1916 3,
1917 True
1918 )
1920 # 3. Create the LINSTOR database volume and mount it.
1921 try:
1922 logger('Creating database volume...')
1923 volume_path = cls._create_database_volume(
1924 lin, ha_group_name, storage_pool_name, node_names, redundancy, auto_quorum
1925 )
1926 except LinstorVolumeManagerError as e:
1927 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
1928 logger('Destroying database volume after creation fail...')
1929 cls._force_destroy_database_volume(lin, group_name)
1930 raise
1932 try:
1933 logger('Mounting database volume...')
1935 # First we must disable the controller to move safely the
1936 # LINSTOR config.
1937 cls._start_controller(start=False)
1939 cls._mount_database_volume(volume_path)
1940 except Exception as e:
1941 # Ensure we are connected because controller has been
1942 # restarted during mount call.
1943 logger('Destroying database volume after mount fail...')
1945 try:
1946 cls._start_controller(start=True)
1947 except Exception:
1948 pass
1950 lin = cls._create_linstor_instance(
1951 uri, keep_uri_unmodified=True
1952 )
1953 cls._force_destroy_database_volume(lin, group_name)
1954 raise e
1956 cls._start_controller(start=True)
1957 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
1959 # 4. Remove storage pools/resource/volume group in the case of errors.
1960 except Exception as e:
1961 logger('Destroying resource group and storage pools after fail...')
1962 try:
1963 cls._destroy_resource_group(lin, group_name)
1964 cls._destroy_resource_group(lin, ha_group_name)
1965 except Exception as e2:
1966 logger('Failed to destroy resource group: {}'.format(e2))
1967 pass
1968 j = 0
1969 i = min(i, len(node_names) - 1)
1970 while j <= i:
1971 try:
1972 cls._destroy_storage_pool(lin, storage_pool_name, node_names[j])
1973 except Exception as e2:
1974 logger('Failed to destroy resource group: {}'.format(e2))
1975 pass
1976 j += 1
1977 raise e
1979 # 5. Return new instance.
1980 instance = cls.__new__(cls)
1981 instance._linstor = lin
1982 instance._logger = logger
1983 instance._redundancy = redundancy
1984 instance._base_group_name = base_group_name
1985 instance._group_name = group_name
1986 instance._volumes = set()
1987 instance._storage_pools_time = 0
1988 instance._kv_cache = instance._create_kv_cache()
1989 instance._resource_cache = None
1990 instance._resource_cache_dirty = True
1991 instance._volume_info_cache = None
1992 instance._volume_info_cache_dirty = True
1993 return instance
1995 @classmethod
1996 def build_device_path(cls, volume_name):
1997 """
1998 Build a device path given a volume name.
1999 :param str volume_name: The volume name to use.
2000 :return: A valid or not device path.
2001 :rtype: str
2002 """
2004 return '{}{}/0'.format(cls.DEV_ROOT_PATH, volume_name)
2006 @classmethod
2007 def build_volume_name(cls, base_name):
2008 """
2009 Build a volume name given a base name (i.e. a UUID).
2010 :param str base_name: The volume name to use.
2011 :return: A valid or not device path.
2012 :rtype: str
2013 """
2014 return '{}{}'.format(cls.PREFIX_VOLUME, base_name)
2016 @classmethod
2017 def round_up_volume_size(cls, volume_size):
2018 """
2019 Align volume size on higher multiple of BLOCK_SIZE.
2020 :param int volume_size: The volume size to align.
2021 :return: An aligned volume size.
2022 :rtype: int
2023 """
2024 return round_up(volume_size, cls.BLOCK_SIZE)
2026 @classmethod
2027 def round_down_volume_size(cls, volume_size):
2028 """
2029 Align volume size on lower multiple of BLOCK_SIZE.
2030 :param int volume_size: The volume size to align.
2031 :return: An aligned volume size.
2032 :rtype: int
2033 """
2034 return round_down(volume_size, cls.BLOCK_SIZE)
2036 # --------------------------------------------------------------------------
2037 # Private helpers.
2038 # --------------------------------------------------------------------------
2040 def _create_kv_cache(self):
2041 self._kv_cache = self._create_linstor_kv('/')
2042 self._kv_cache_dirty = False
2043 return self._kv_cache
2045 def _get_kv_cache(self):
2046 if self._kv_cache_dirty:
2047 self._kv_cache = self._create_kv_cache()
2048 return self._kv_cache
2050 def _create_resource_cache(self):
2051 self._resource_cache = self._linstor.resource_list_raise()
2052 self._resource_cache_dirty = False
2053 return self._resource_cache
2055 def _get_resource_cache(self):
2056 if self._resource_cache_dirty:
2057 self._resource_cache = self._create_resource_cache()
2058 return self._resource_cache
2060 def _mark_resource_cache_as_dirty(self):
2061 self._resource_cache_dirty = True
2062 self._volume_info_cache_dirty = True
2064 # --------------------------------------------------------------------------
2066 def _ensure_volume_exists(self, volume_uuid):
2067 if volume_uuid not in self._volumes:
2068 raise LinstorVolumeManagerError(
2069 'volume `{}` doesn\'t exist'.format(volume_uuid),
2070 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
2071 )
2073 def _find_best_size_candidates(self):
2074 result = self._linstor.resource_group_qmvs(self._group_name)
2075 error_str = self._get_error_str(result)
2076 if error_str:
2077 raise LinstorVolumeManagerError(
2078 'Failed to get max volume size allowed of SR `{}`: {}'.format(
2079 self._group_name,
2080 error_str
2081 )
2082 )
2083 return result[0].candidates
2085 def _fetch_resource_names(self, ignore_deleted=True):
2086 resource_names = set()
2087 dfns = self._linstor.resource_dfn_list_raise().resource_definitions
2088 for dfn in dfns:
2089 if dfn.resource_group_name in self.get_all_group_names(self._base_group_name) and (
2090 ignore_deleted or
2091 linstor.consts.FLAG_DELETE not in dfn.flags
2092 ):
2093 resource_names.add(dfn.name)
2094 return resource_names
2096 def _get_volumes_info(self, volume_name=None):
2097 all_volume_info = {}
2099 if not self._volume_info_cache_dirty:
2100 return self._volume_info_cache
2102 for resource in self._get_resource_cache().resources:
2103 if resource.name not in all_volume_info:
2104 current = all_volume_info[resource.name] = self.VolumeInfo(
2105 resource.name
2106 )
2107 else:
2108 current = all_volume_info[resource.name]
2110 if linstor.consts.FLAG_DISKLESS not in resource.flags:
2111 current.diskful.append(resource.node_name)
2113 for volume in resource.volumes:
2114 # We ignore diskless pools of the form "DfltDisklessStorPool".
2115 if volume.storage_pool_name == self._group_name:
2116 if volume.allocated_size < 0:
2117 raise LinstorVolumeManagerError(
2118 'Failed to get allocated size of `{}` on `{}`'
2119 .format(resource.name, volume.storage_pool_name)
2120 )
2121 allocated_size = volume.allocated_size
2123 current.allocated_size = current.allocated_size and \
2124 max(current.allocated_size, allocated_size) or \
2125 allocated_size
2127 usable_size = volume.usable_size
2128 if usable_size > 0 and (
2129 usable_size < current.virtual_size or
2130 not current.virtual_size
2131 ):
2132 current.virtual_size = usable_size
2134 if current.virtual_size <= 0:
2135 raise LinstorVolumeManagerError(
2136 'Failed to get usable size of `{}` on `{}`'
2137 .format(resource.name, volume.storage_pool_name)
2138 )
2140 for current in all_volume_info.values():
2141 current.allocated_size *= 1024
2142 current.virtual_size *= 1024
2144 self._volume_info_cache_dirty = False
2145 self._volume_info_cache = all_volume_info
2147 return all_volume_info
2149 def _get_volume_node_names_and_size(self, volume_name):
2150 node_names = set()
2151 size = -1
2152 for resource in self._linstor.resource_list_raise(
2153 filter_by_resources=[volume_name]
2154 ).resources:
2155 for volume in resource.volumes:
2156 # We ignore diskless pools of the form "DfltDisklessStorPool".
2157 if volume.storage_pool_name == self._group_name:
2158 node_names.add(resource.node_name)
2160 current_size = volume.usable_size
2161 if current_size < 0:
2162 raise LinstorVolumeManagerError(
2163 'Failed to get usable size of `{}` on `{}`'
2164 .format(resource.name, volume.storage_pool_name)
2165 )
2167 if size < 0:
2168 size = current_size
2169 else:
2170 size = min(size, current_size)
2172 return (node_names, size * 1024)
2174 def _compute_size(self, attr):
2175 capacity = 0
2176 for pool in self._get_storage_pools(force=True):
2177 space = pool.free_space
2178 if space:
2179 size = getattr(space, attr)
2180 if size < 0:
2181 raise LinstorVolumeManagerError(
2182 'Failed to get pool {} attr of `{}`'
2183 .format(attr, pool.node_name)
2184 )
2185 capacity += size
2186 return capacity * 1024
2188 def _get_node_names(self):
2189 node_names = set()
2190 for pool in self._get_storage_pools():
2191 node_names.add(pool.node_name)
2192 return node_names
2194 def _get_storage_pools(self, force=False):
2195 cur_time = time.time()
2196 elsaped_time = cur_time - self._storage_pools_time
2198 if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL:
2199 self._storage_pools = self._linstor.storage_pool_list_raise(
2200 filter_by_stor_pools=[self._group_name]
2201 ).storage_pools
2202 self._storage_pools_time = time.time()
2204 return self._storage_pools
2206 def _create_volume(
2207 self,
2208 volume_uuid,
2209 volume_name,
2210 size,
2211 place_resources,
2212 high_availability
2213 ):
2214 size = self.round_up_volume_size(size)
2215 self._mark_resource_cache_as_dirty()
2217 group_name = self._ha_group_name if high_availability else self._group_name
2218 def create_definition():
2219 first_attempt = True
2220 while True:
2221 try:
2222 self._check_volume_creation_errors(
2223 self._linstor.resource_group_spawn(
2224 rsc_grp_name=group_name,
2225 rsc_dfn_name=volume_name,
2226 vlm_sizes=['{}B'.format(size)],
2227 definitions_only=True
2228 ),
2229 volume_uuid,
2230 self._group_name
2231 )
2232 break
2233 except LinstorVolumeManagerError as e:
2234 if (
2235 not first_attempt or
2236 not high_availability or
2237 e.code != LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS
2238 ):
2239 raise
2241 first_attempt = False
2242 self._create_resource_group(
2243 self._linstor,
2244 group_name,
2245 self._group_name,
2246 3,
2247 True
2248 )
2250 self._configure_volume_peer_slots(self._linstor, volume_name)
2252 def clean():
2253 try:
2254 self._destroy_volume(volume_uuid, force=True, preserve_properties=True)
2255 except Exception as e:
2256 self._logger(
2257 'Unable to destroy volume {} after creation fail: {}'
2258 .format(volume_uuid, e)
2259 )
2261 def create():
2262 try:
2263 create_definition()
2264 if place_resources:
2265 # Basic case when we use the default redundancy of the group.
2266 self._check_volume_creation_errors(
2267 self._linstor.resource_auto_place(
2268 rsc_name=volume_name,
2269 place_count=self._redundancy,
2270 diskless_on_remaining=False
2271 ),
2272 volume_uuid,
2273 self._group_name
2274 )
2275 except LinstorVolumeManagerError as e:
2276 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
2277 clean()
2278 raise
2279 except Exception:
2280 clean()
2281 raise
2283 util.retry(create, maxretry=5)
2285 def _create_volume_with_properties(
2286 self,
2287 volume_uuid,
2288 volume_name,
2289 size,
2290 place_resources,
2291 high_availability
2292 ):
2293 if self.check_volume_exists(volume_uuid):
2294 raise LinstorVolumeManagerError(
2295 'Could not create volume `{}` from SR `{}`, it already exists'
2296 .format(volume_uuid, self._group_name) + ' in properties',
2297 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
2298 )
2300 if volume_name in self._fetch_resource_names():
2301 raise LinstorVolumeManagerError(
2302 'Could not create volume `{}` from SR `{}`, '.format(
2303 volume_uuid, self._group_name
2304 ) + 'resource of the same name already exists in LINSTOR'
2305 )
2307 # I am paranoid.
2308 volume_properties = self._get_volume_properties(volume_uuid)
2309 if (volume_properties.get(self.PROP_NOT_EXISTS) is not None):
2310 raise LinstorVolumeManagerError(
2311 'Could not create volume `{}`, '.format(volume_uuid) +
2312 'properties already exist'
2313 )
2315 try:
2316 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_CREATING
2317 volume_properties[self.PROP_VOLUME_NAME] = volume_name
2319 self._create_volume(
2320 volume_uuid,
2321 volume_name,
2322 size,
2323 place_resources,
2324 high_availability
2325 )
2327 assert volume_properties.namespace == \
2328 self._build_volume_namespace(volume_uuid)
2329 return volume_properties
2330 except LinstorVolumeManagerError as e:
2331 # Do not destroy existing resource!
2332 # In theory we can't get this error because we check this event
2333 # before the `self._create_volume` case.
2334 # It can only happen if the same volume uuid is used in the same
2335 # call in another host.
2336 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
2337 self._destroy_volume(volume_uuid, force=True)
2338 raise
2340 def _find_device_path(self, volume_uuid, volume_name):
2341 current_device_path = self._request_device_path(
2342 volume_uuid, volume_name, activate=True
2343 )
2345 # We use realpath here to get the /dev/drbd<id> path instead of
2346 # /dev/drbd/by-res/<resource_name>.
2347 expected_device_path = self.build_device_path(volume_name)
2348 util.wait_for_path(expected_device_path, 5)
2350 device_realpath = os.path.realpath(expected_device_path)
2351 if current_device_path != device_realpath:
2352 raise LinstorVolumeManagerError(
2353 'Invalid path, current={}, expected={} (realpath={})'
2354 .format(
2355 current_device_path,
2356 expected_device_path,
2357 device_realpath
2358 )
2359 )
2360 return expected_device_path
2362 def _request_device_path(self, volume_uuid, volume_name, activate=False):
2363 node_name = socket.gethostname()
2365 resource = next(filter(
2366 lambda resource: resource.node_name == node_name and
2367 resource.name == volume_name,
2368 self._get_resource_cache().resources
2369 ), None)
2371 if not resource:
2372 if activate:
2373 self._mark_resource_cache_as_dirty()
2374 self._activate_device_path(
2375 self._linstor, node_name, volume_name
2376 )
2377 return self._request_device_path(volume_uuid, volume_name)
2378 raise LinstorVolumeManagerError(
2379 'Empty dev path for `{}`, but definition "seems" to exist'
2380 .format(volume_uuid)
2381 )
2382 # Contains a path of the /dev/drbd<id> form.
2383 return resource.volumes[0].device_path
2385 def _destroy_resource(self, resource_name, force=False):
2386 result = self._linstor.resource_dfn_delete(resource_name)
2387 error_str = self._get_error_str(result)
2388 if not error_str:
2389 self._mark_resource_cache_as_dirty()
2390 return
2392 if not force:
2393 self._mark_resource_cache_as_dirty()
2394 raise LinstorVolumeManagerError(
2395 'Could not destroy resource `{}` from SR `{}`: {}'
2396 .format(resource_name, self._group_name, error_str)
2397 )
2399 # If force is used, ensure there is no opener.
2400 all_openers = get_all_volume_openers(resource_name, '0')
2401 for openers in all_openers.values():
2402 if openers:
2403 self._mark_resource_cache_as_dirty()
2404 raise LinstorVolumeManagerError(
2405 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)'
2406 .format(resource_name, self._group_name, error_str, all_openers)
2407 )
2409 # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue?
2410 resource_states = filter(
2411 lambda resource_state: resource_state.name == resource_name,
2412 self._get_resource_cache().resource_states
2413 )
2415 # Mark only after computation of states.
2416 self._mark_resource_cache_as_dirty()
2418 for resource_state in resource_states:
2419 volume_state = resource_state.volume_states[0]
2420 if resource_state.in_use:
2421 demote_drbd_resource(resource_state.node_name, resource_name)
2422 break
2423 self._destroy_resource(resource_name)
2425 def _destroy_volume(self, volume_uuid, force=False, preserve_properties=False):
2426 volume_properties = self._get_volume_properties(volume_uuid)
2427 try:
2428 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
2429 if volume_name in self._fetch_resource_names():
2430 self._destroy_resource(volume_name, force)
2432 # Assume this call is atomic.
2433 if not preserve_properties:
2434 volume_properties.clear()
2435 except Exception as e:
2436 raise LinstorVolumeManagerError(
2437 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e)
2438 )
2440 def _build_volumes(self, repair):
2441 properties = self._kv_cache
2442 resource_names = self._fetch_resource_names()
2444 self._volumes = set()
2446 updating_uuid_volumes = self._get_volumes_by_property(
2447 self.REG_UPDATING_UUID_SRC, ignore_inexisting_volumes=False
2448 )
2449 if updating_uuid_volumes and not repair:
2450 raise LinstorVolumeManagerError(
2451 'Cannot build LINSTOR volume list: '
2452 'It exists invalid "updating uuid volumes", repair is required'
2453 )
2455 existing_volumes = self._get_volumes_by_property(
2456 self.REG_NOT_EXISTS, ignore_inexisting_volumes=False
2457 )
2458 for volume_uuid, not_exists in existing_volumes.items():
2459 properties.namespace = self._build_volume_namespace(volume_uuid)
2461 src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC)
2462 if src_uuid:
2463 self._logger(
2464 'Ignoring volume during manager initialization with prop '
2465 ' PROP_UPDATING_UUID_SRC: {} (properties={})'
2466 .format(
2467 volume_uuid,
2468 self._get_filtered_properties(properties)
2469 )
2470 )
2471 continue
2473 # Insert volume in list if the volume exists. Or if the volume
2474 # is being created and a slave wants to use it (repair = False).
2475 #
2476 # If we are on the master and if repair is True and state is
2477 # Creating, it's probably a bug or crash: the creation process has
2478 # been stopped.
2479 if not_exists == self.STATE_EXISTS or (
2480 not repair and not_exists == self.STATE_CREATING
2481 ):
2482 self._volumes.add(volume_uuid)
2483 continue
2485 if not repair:
2486 self._logger(
2487 'Ignoring bad volume during manager initialization: {} '
2488 '(properties={})'.format(
2489 volume_uuid,
2490 self._get_filtered_properties(properties)
2491 )
2492 )
2493 continue
2495 # Remove bad volume.
2496 try:
2497 self._logger(
2498 'Removing bad volume during manager initialization: {} '
2499 '(properties={})'.format(
2500 volume_uuid,
2501 self._get_filtered_properties(properties)
2502 )
2503 )
2504 volume_name = properties.get(self.PROP_VOLUME_NAME)
2506 # Little optimization, don't call `self._destroy_volume`,
2507 # we already have resource name list.
2508 if volume_name in resource_names:
2509 self._destroy_resource(volume_name, force=True)
2511 # Assume this call is atomic.
2512 properties.clear()
2513 except Exception as e:
2514 # Do not raise, we don't want to block user action.
2515 self._logger(
2516 'Cannot clean volume {}: {}'.format(volume_uuid, e)
2517 )
2519 # The volume can't be removed, maybe it's still in use,
2520 # in this case rename it with the "DELETED_" prefix.
2521 # This prefix is mandatory if it exists a snap transaction to
2522 # rollback because the original VDI UUID can try to be renamed
2523 # with the UUID we are trying to delete...
2524 if not volume_uuid.startswith('DELETED_'):
2525 self.update_volume_uuid(
2526 volume_uuid, 'DELETED_' + volume_uuid, force=True
2527 )
2529 for dest_uuid, src_uuid in updating_uuid_volumes.items():
2530 dest_namespace = self._build_volume_namespace(dest_uuid)
2532 properties.namespace = dest_namespace
2533 if int(properties.get(self.PROP_NOT_EXISTS)):
2534 properties.clear()
2535 continue
2537 properties.namespace = self._build_volume_namespace(src_uuid)
2538 properties.clear()
2540 properties.namespace = dest_namespace
2541 properties.pop(self.PROP_UPDATING_UUID_SRC)
2543 if src_uuid in self._volumes:
2544 self._volumes.remove(src_uuid)
2545 self._volumes.add(dest_uuid)
2547 def _get_sr_properties(self):
2548 return self._create_linstor_kv(self._build_sr_namespace())
2550 def _get_volumes_by_property(
2551 self, reg_prop, ignore_inexisting_volumes=True
2552 ):
2553 base_properties = self._get_kv_cache()
2554 base_properties.namespace = self._build_volume_namespace()
2556 volume_properties = {}
2557 for volume_uuid in self._volumes:
2558 volume_properties[volume_uuid] = ''
2560 for key, value in base_properties.items():
2561 res = reg_prop.match(key)
2562 if res:
2563 volume_uuid = res.groups()[0]
2564 if not ignore_inexisting_volumes or \
2565 volume_uuid in self._volumes:
2566 volume_properties[volume_uuid] = value
2568 return volume_properties
2570 def _create_linstor_kv(self, namespace):
2571 return linstor.KV(
2572 self._group_name,
2573 uri=self._linstor.controller_host(),
2574 namespace=namespace
2575 )
2577 def _get_volume_properties(self, volume_uuid):
2578 properties = self._get_kv_cache()
2579 properties.namespace = self._build_volume_namespace(volume_uuid)
2580 return properties
2582 @classmethod
2583 def _build_sr_namespace(cls):
2584 return '/{}/'.format(cls.NAMESPACE_SR)
2586 @classmethod
2587 def _build_volume_namespace(cls, volume_uuid=None):
2588 # Return a path to all volumes if `volume_uuid` is not given.
2589 if volume_uuid is None:
2590 return '/{}/'.format(cls.NAMESPACE_VOLUME)
2591 return '/{}/{}/'.format(cls.NAMESPACE_VOLUME, volume_uuid)
2593 @classmethod
2594 def _get_error_str(cls, result):
2595 return ', '.join([
2596 err.message for err in cls._filter_errors(result)
2597 ])
2599 @classmethod
2600 def _create_linstor_instance(
2601 cls, uri, keep_uri_unmodified=False, attempt_count=30
2602 ):
2603 retry = False
2605 def connect(uri):
2606 if not uri:
2607 uri = get_controller_uri()
2608 if not uri:
2609 raise LinstorVolumeManagerError(
2610 'Unable to find controller uri...'
2611 )
2612 instance = linstor.Linstor(uri, keep_alive=True)
2613 instance.connect()
2614 return instance
2616 try:
2617 return connect(uri)
2618 except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError):
2619 pass
2621 if not keep_uri_unmodified:
2622 uri = None
2624 return util.retry(
2625 lambda: connect(uri),
2626 maxretry=attempt_count,
2627 period=1,
2628 exceptions=[
2629 linstor.errors.LinstorNetworkError,
2630 LinstorVolumeManagerError
2631 ]
2632 )
2634 @classmethod
2635 def _configure_volume_peer_slots(cls, lin, volume_name):
2636 result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3)
2637 error_str = cls._get_error_str(result)
2638 if error_str:
2639 raise LinstorVolumeManagerError(
2640 'Could not configure volume peer slots of {}: {}'
2641 .format(volume_name, error_str)
2642 )
2644 @classmethod
2645 def _activate_device_path(cls, lin, node_name, volume_name):
2646 result = lin.resource_make_available(node_name, volume_name, diskful=False)
2647 if linstor.Linstor.all_api_responses_no_error(result):
2648 return
2649 errors = linstor.Linstor.filter_api_call_response_errors(result)
2650 if len(errors) == 1 and errors[0].is_error(
2651 linstor.consts.FAIL_EXISTS_RSC
2652 ):
2653 return
2655 raise LinstorVolumeManagerError(
2656 'Unable to activate device path of `{}` on node `{}`: {}'
2657 .format(volume_name, node_name, ', '.join(
2658 [str(x) for x in result]))
2659 )
2661 @classmethod
2662 def _request_database_path(cls, lin, activate=False):
2663 node_name = socket.gethostname()
2665 try:
2666 resource = next(filter(
2667 lambda resource: resource.node_name == node_name and
2668 resource.name == DATABASE_VOLUME_NAME,
2669 lin.resource_list_raise().resources
2670 ), None)
2671 except Exception as e:
2672 raise LinstorVolumeManagerError(
2673 'Unable to fetch database resource: {}'
2674 .format(e)
2675 )
2677 if not resource:
2678 if activate:
2679 cls._activate_device_path(
2680 lin, node_name, DATABASE_VOLUME_NAME
2681 )
2682 return cls._request_database_path(
2683 DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME
2684 )
2685 raise LinstorVolumeManagerError(
2686 'Empty dev path for `{}`, but definition "seems" to exist'
2687 .format(DATABASE_PATH)
2688 )
2689 # Contains a path of the /dev/drbd<id> form.
2690 return resource.volumes[0].device_path
2692 @classmethod
2693 def _create_database_volume(
2694 cls, lin, group_name, storage_pool_name, node_names, redundancy, auto_quorum
2695 ):
2696 try:
2697 dfns = lin.resource_dfn_list_raise().resource_definitions
2698 except Exception as e:
2699 raise LinstorVolumeManagerError(
2700 'Unable to get definitions during database creation: {}'
2701 .format(e)
2702 )
2704 if dfns:
2705 raise LinstorVolumeManagerError(
2706 'Could not create volume `{}` from SR `{}`, '.format(
2707 DATABASE_VOLUME_NAME, group_name
2708 ) + 'LINSTOR volume list must be empty.'
2709 )
2711 # Workaround to use thin lvm. Without this line an error is returned:
2712 # "Not enough available nodes"
2713 # I don't understand why but this command protect against this bug.
2714 try:
2715 pools = lin.storage_pool_list_raise(
2716 filter_by_stor_pools=[storage_pool_name]
2717 )
2718 except Exception as e:
2719 raise LinstorVolumeManagerError(
2720 'Failed to get storage pool list before database creation: {}'
2721 .format(e)
2722 )
2724 # Ensure we have a correct list of storage pools.
2725 assert pools.storage_pools # We must have at least one storage pool!
2726 nodes_with_pool = list(map(lambda pool: pool.node_name, pools.storage_pools))
2727 for node_name in nodes_with_pool:
2728 assert node_name in node_names
2729 util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool))
2731 # Create the database definition.
2732 size = cls.round_up_volume_size(DATABASE_SIZE)
2733 cls._check_volume_creation_errors(lin.resource_group_spawn(
2734 rsc_grp_name=group_name,
2735 rsc_dfn_name=DATABASE_VOLUME_NAME,
2736 vlm_sizes=['{}B'.format(size)],
2737 definitions_only=True
2738 ), DATABASE_VOLUME_NAME, group_name)
2739 cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME)
2741 # Create real resources on the first nodes.
2742 resources = []
2744 diskful_nodes = []
2745 diskless_nodes = []
2746 for node_name in node_names:
2747 if node_name in nodes_with_pool:
2748 diskful_nodes.append(node_name)
2749 else:
2750 diskless_nodes.append(node_name)
2752 assert diskful_nodes
2753 for node_name in diskful_nodes[:redundancy]:
2754 util.SMlog('Create database diskful on {}'.format(node_name))
2755 resources.append(linstor.ResourceData(
2756 node_name=node_name,
2757 rsc_name=DATABASE_VOLUME_NAME,
2758 storage_pool=storage_pool_name
2759 ))
2760 # Create diskless resources on the remaining set.
2761 for node_name in diskful_nodes[redundancy:] + diskless_nodes:
2762 util.SMlog('Create database diskless on {}'.format(node_name))
2763 resources.append(linstor.ResourceData(
2764 node_name=node_name,
2765 rsc_name=DATABASE_VOLUME_NAME,
2766 diskless=True
2767 ))
2769 result = lin.resource_create(resources)
2770 error_str = cls._get_error_str(result)
2771 if error_str:
2772 raise LinstorVolumeManagerError(
2773 'Could not create database volume from SR `{}`: {}'.format(
2774 group_name, error_str
2775 )
2776 )
2778 # We must modify the quorum. Otherwise we can't use correctly the
2779 # drbd-reactor daemon.
2780 if auto_quorum:
2781 result = lin.resource_dfn_modify(DATABASE_VOLUME_NAME, {
2782 'DrbdOptions/auto-quorum': 'disabled',
2783 'DrbdOptions/Resource/quorum': 'majority'
2784 })
2785 error_str = cls._get_error_str(result)
2786 if error_str:
2787 raise LinstorVolumeManagerError(
2788 'Could not activate quorum on database volume: {}'
2789 .format(error_str)
2790 )
2792 # Create database and ensure path exists locally and
2793 # on replicated devices.
2794 current_device_path = cls._request_database_path(lin, activate=True)
2796 # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be
2797 # plugged.
2798 for node_name in node_names:
2799 cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME)
2801 # We use realpath here to get the /dev/drbd<id> path instead of
2802 # /dev/drbd/by-res/<resource_name>.
2803 expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME)
2804 util.wait_for_path(expected_device_path, 5)
2806 device_realpath = os.path.realpath(expected_device_path)
2807 if current_device_path != device_realpath:
2808 raise LinstorVolumeManagerError(
2809 'Invalid path, current={}, expected={} (realpath={})'
2810 .format(
2811 current_device_path,
2812 expected_device_path,
2813 device_realpath
2814 )
2815 )
2817 try:
2818 util.retry(
2819 lambda: util.pread2([DATABASE_MKFS, expected_device_path]),
2820 maxretry=5
2821 )
2822 except Exception as e:
2823 raise LinstorVolumeManagerError(
2824 'Failed to execute {} on database volume: {}'
2825 .format(DATABASE_MKFS, e)
2826 )
2828 return expected_device_path
2830 @classmethod
2831 def _destroy_database_volume(cls, lin, group_name):
2832 error_str = cls._get_error_str(
2833 lin.resource_dfn_delete(DATABASE_VOLUME_NAME)
2834 )
2835 if error_str:
2836 raise LinstorVolumeManagerError(
2837 'Could not destroy resource `{}` from SR `{}`: {}'
2838 .format(DATABASE_VOLUME_NAME, group_name, error_str)
2839 )
2841 @classmethod
2842 def _mount_database_volume(cls, volume_path, mount=True, force=False):
2843 try:
2844 # 1. Create a backup config folder.
2845 database_not_empty = bool(os.listdir(DATABASE_PATH))
2846 backup_path = cls._create_database_backup_path()
2848 # 2. Move the config in the mounted volume.
2849 if database_not_empty:
2850 cls._move_files(DATABASE_PATH, backup_path)
2852 cls._mount_volume(volume_path, DATABASE_PATH, mount)
2854 if database_not_empty:
2855 cls._move_files(backup_path, DATABASE_PATH, force)
2857 # 3. Remove useless backup directory.
2858 try:
2859 os.rmdir(backup_path)
2860 except Exception as e:
2861 raise LinstorVolumeManagerError(
2862 'Failed to remove backup path {} of LINSTOR config: {}'
2863 .format(backup_path, e)
2864 )
2865 except Exception as e:
2866 def force_exec(fn):
2867 try:
2868 fn()
2869 except Exception:
2870 pass
2872 if mount == cls._is_mounted(DATABASE_PATH):
2873 force_exec(lambda: cls._move_files(
2874 DATABASE_PATH, backup_path
2875 ))
2876 force_exec(lambda: cls._mount_volume(
2877 volume_path, DATABASE_PATH, not mount
2878 ))
2880 if mount != cls._is_mounted(DATABASE_PATH):
2881 force_exec(lambda: cls._move_files(
2882 backup_path, DATABASE_PATH
2883 ))
2885 force_exec(lambda: os.rmdir(backup_path))
2886 raise e
2888 @classmethod
2889 def _force_destroy_database_volume(cls, lin, group_name):
2890 try:
2891 cls._destroy_database_volume(lin, group_name)
2892 except Exception:
2893 pass
2895 @classmethod
2896 def _destroy_storage_pool(cls, lin, group_name, node_name):
2897 def destroy():
2898 result = lin.storage_pool_delete(node_name, group_name)
2899 errors = cls._filter_errors(result)
2900 if cls._check_errors(errors, [
2901 linstor.consts.FAIL_NOT_FOUND_STOR_POOL,
2902 linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN
2903 ]):
2904 return
2906 if errors:
2907 raise LinstorVolumeManagerError(
2908 'Failed to destroy SP `{}` on node `{}`: {}'.format(
2909 group_name,
2910 node_name,
2911 cls._get_error_str(errors)
2912 )
2913 )
2915 # We must retry to avoid errors like:
2916 # "can not be deleted as volumes / snapshot-volumes are still using it"
2917 # after LINSTOR database volume destruction.
2918 return util.retry(destroy, maxretry=10)
2920 @classmethod
2921 def _create_resource_group(
2922 cls,
2923 lin,
2924 group_name,
2925 storage_pool_name,
2926 redundancy,
2927 destroy_old_group
2928 ):
2929 rg_creation_attempt = 0
2930 while True:
2931 result = lin.resource_group_create(
2932 name=group_name,
2933 place_count=redundancy,
2934 storage_pool=storage_pool_name,
2935 diskless_on_remaining=False
2936 )
2937 error_str = cls._get_error_str(result)
2938 if not error_str:
2939 break
2941 errors = cls._filter_errors(result)
2942 if destroy_old_group and cls._check_errors(errors, [
2943 linstor.consts.FAIL_EXISTS_RSC_GRP
2944 ]):
2945 rg_creation_attempt += 1
2946 if rg_creation_attempt < 2:
2947 try:
2948 cls._destroy_resource_group(lin, group_name)
2949 except Exception as e:
2950 error_str = 'Failed to destroy old and empty RG: {}'.format(e)
2951 else:
2952 continue
2954 raise LinstorVolumeManagerError(
2955 'Could not create RG `{}`: {}'.format(
2956 group_name, error_str
2957 )
2958 )
2960 result = lin.volume_group_create(group_name)
2961 error_str = cls._get_error_str(result)
2962 if error_str:
2963 raise LinstorVolumeManagerError(
2964 'Could not create VG `{}`: {}'.format(
2965 group_name, error_str
2966 )
2967 )
2969 @classmethod
2970 def _destroy_resource_group(cls, lin, group_name):
2971 def destroy():
2972 result = lin.resource_group_delete(group_name)
2973 errors = cls._filter_errors(result)
2974 if cls._check_errors(errors, [
2975 linstor.consts.FAIL_NOT_FOUND_RSC_GRP
2976 ]):
2977 return
2979 if errors:
2980 raise LinstorVolumeManagerError(
2981 'Failed to destroy RG `{}`: {}'
2982 .format(group_name, cls._get_error_str(errors))
2983 )
2985 return util.retry(destroy, maxretry=10)
2987 @classmethod
2988 def _build_group_name(cls, base_name):
2989 # If thin provisioning is used we have a path like this:
2990 # `VG/LV`. "/" is not accepted by LINSTOR.
2991 return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_'))
2993 # Used to store important data in a HA context,
2994 # i.e. a replication count of 3.
2995 @classmethod
2996 def _build_ha_group_name(cls, base_name):
2997 return '{}{}'.format(cls.PREFIX_HA, base_name.replace('/', '_'))
2999 @classmethod
3000 def _check_volume_creation_errors(cls, result, volume_uuid, group_name):
3001 errors = cls._filter_errors(result)
3002 if cls._check_errors(errors, [
3003 linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN
3004 ]):
3005 raise LinstorVolumeManagerError(
3006 'Failed to create volume `{}` from SR `{}`, it already exists'
3007 .format(volume_uuid, group_name),
3008 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
3009 )
3011 if cls._check_errors(errors, [linstor.consts.FAIL_NOT_FOUND_RSC_GRP]):
3012 raise LinstorVolumeManagerError(
3013 'Failed to create volume `{}` from SR `{}`, resource group doesn\'t exist'
3014 .format(volume_uuid, group_name),
3015 LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS
3016 )
3018 if errors:
3019 raise LinstorVolumeManagerError(
3020 'Failed to create volume `{}` from SR `{}`: {}'.format(
3021 volume_uuid,
3022 group_name,
3023 cls._get_error_str(errors)
3024 )
3025 )
3027 @classmethod
3028 def _move_files(cls, src_dir, dest_dir, force=False):
3029 def listdir(dir):
3030 ignored = ['lost+found']
3031 return [file for file in os.listdir(dir) if file not in ignored]
3033 try:
3034 if not force:
3035 files = listdir(dest_dir)
3036 if files:
3037 raise LinstorVolumeManagerError(
3038 'Cannot move files from {} to {} because destination '
3039 'contains: {}'.format(src_dir, dest_dir, files)
3040 )
3041 except LinstorVolumeManagerError:
3042 raise
3043 except Exception as e:
3044 raise LinstorVolumeManagerError(
3045 'Cannot list dir {}: {}'.format(dest_dir, e)
3046 )
3048 try:
3049 for file in listdir(src_dir):
3050 try:
3051 dest_file = os.path.join(dest_dir, file)
3052 if not force and os.path.exists(dest_file):
3053 raise LinstorVolumeManagerError(
3054 'Cannot move {} because it already exists in the '
3055 'destination'.format(file)
3056 )
3057 shutil.move(os.path.join(src_dir, file), dest_file)
3058 except LinstorVolumeManagerError:
3059 raise
3060 except Exception as e:
3061 raise LinstorVolumeManagerError(
3062 'Cannot move {}: {}'.format(file, e)
3063 )
3064 except Exception as e:
3065 if not force:
3066 try:
3067 cls._move_files(dest_dir, src_dir, force=True)
3068 except Exception:
3069 pass
3071 raise LinstorVolumeManagerError(
3072 'Failed to move files from {} to {}: {}'.format(
3073 src_dir, dest_dir, e
3074 )
3075 )
3077 @staticmethod
3078 def _create_database_backup_path():
3079 path = DATABASE_PATH + '-' + str(uuid.uuid4())
3080 try:
3081 os.mkdir(path)
3082 return path
3083 except Exception as e:
3084 raise LinstorVolumeManagerError(
3085 'Failed to create backup path {} of LINSTOR config: {}'
3086 .format(path, e)
3087 )
3089 @staticmethod
3090 def _get_filtered_properties(properties):
3091 return dict(properties.items())
3093 @staticmethod
3094 def _filter_errors(result):
3095 return [
3096 err for err in result
3097 if hasattr(err, 'is_error') and err.is_error()
3098 ]
3100 @staticmethod
3101 def _check_errors(result, codes):
3102 for err in result:
3103 for code in codes:
3104 if err.is_error(code):
3105 return True
3106 return False
3108 @classmethod
3109 def _controller_is_running(cls):
3110 return cls._service_is_running('linstor-controller')
3112 @classmethod
3113 def _start_controller(cls, start=True):
3114 return cls._start_service('linstor-controller', start)
3116 @staticmethod
3117 def _start_service(name, start=True):
3118 action = 'start' if start else 'stop'
3119 (ret, out, err) = util.doexec([
3120 'systemctl', action, name
3121 ])
3122 if ret != 0:
3123 raise LinstorVolumeManagerError(
3124 'Failed to {} {}: {} {}'
3125 .format(action, name, out, err)
3126 )
3128 @staticmethod
3129 def _service_is_running(name):
3130 (ret, out, err) = util.doexec([
3131 'systemctl', 'is-active', '--quiet', name
3132 ])
3133 return not ret
3135 @staticmethod
3136 def _is_mounted(mountpoint):
3137 (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint])
3138 return ret == 0
3140 @classmethod
3141 def _mount_volume(cls, volume_path, mountpoint, mount=True):
3142 if mount:
3143 try:
3144 util.pread(['mount', volume_path, mountpoint])
3145 except Exception as e:
3146 raise LinstorVolumeManagerError(
3147 'Failed to mount volume {} on {}: {}'
3148 .format(volume_path, mountpoint, e)
3149 )
3150 else:
3151 try:
3152 if cls._is_mounted(mountpoint):
3153 util.pread(['umount', mountpoint])
3154 except Exception as e:
3155 raise LinstorVolumeManagerError(
3156 'Failed to umount volume {} on {}: {}'
3157 .format(volume_path, mountpoint, e)
3158 )
3161# ==============================================================================
3163# Check if a path is a DRBD resource and log the process name/pid
3164# that opened it.
3165def log_drbd_openers(path):
3166 # Ignore if it's not a symlink to DRBD resource.
3167 if not path.startswith(DRBD_BY_RES_PATH):
3168 return
3170 # Compute resource name.
3171 res_name_end = path.find('/', len(DRBD_BY_RES_PATH))
3172 if res_name_end == -1:
3173 return
3174 res_name = path[len(DRBD_BY_RES_PATH):res_name_end]
3176 volume_end = path.rfind('/')
3177 if volume_end == res_name_end:
3178 return
3179 volume = path[volume_end + 1:]
3181 try:
3182 # Ensure path is a DRBD.
3183 drbd_path = os.path.realpath(path)
3184 stats = os.stat(drbd_path)
3185 if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147:
3186 return
3188 # Find where the device is open.
3189 (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name])
3190 if ret != 0:
3191 util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format(
3192 res_name, stderr
3193 ))
3194 return
3196 # Is it a local device?
3197 if stdout.startswith('{} role:Primary'.format(res_name)):
3198 util.SMlog(
3199 'DRBD resource `{}` is open on local host: {}'
3200 .format(path, get_local_volume_openers(res_name, volume))
3201 )
3202 return
3204 # Is it a remote device?
3205 util.SMlog(
3206 'DRBD resource `{}` is open on hosts: {}'
3207 .format(path, get_all_volume_openers(res_name, volume))
3208 )
3209 except Exception as e:
3210 util.SMlog(
3211 'Got exception while trying to determine where DRBD resource ' +
3212 '`{}` is open: {}'.format(path, e)
3213 )