Coverage for drivers/linstorvolumemanager.py : 10%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python3
2#
3# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16#
19import distutils.util
20import errno
21import json
22import linstor
23import os.path
24import re
25import shutil
26import socket
27import stat
28import time
29import util
30import uuid
32# Persistent prefix to add to RAW persistent volumes.
33PERSISTENT_PREFIX = 'xcp-persistent-'
35# Contains the data of the "/var/lib/linstor" directory.
36DATABASE_VOLUME_NAME = PERSISTENT_PREFIX + 'database'
37DATABASE_SIZE = 1 << 30 # 1GB.
38DATABASE_PATH = '/var/lib/linstor'
39DATABASE_MKFS = 'mkfs.ext4'
41REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary")
42REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$')
44DRBD_BY_RES_PATH = '/dev/drbd/by-res/'
46PLUGIN = 'linstor-manager'
49# ==============================================================================
51def get_local_volume_openers(resource_name, volume):
52 if not resource_name or volume is None:
53 raise Exception('Cannot get DRBD openers without resource name and/or volume.')
55 path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format(
56 resource_name, volume
57 )
59 with open(path, 'r') as openers:
60 # Not a big cost, so read all lines directly.
61 lines = openers.readlines()
63 result = {}
65 opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)')
66 for line in lines:
67 match = opener_re.match(line)
68 assert match
70 groups = match.groups()
71 process_name = groups[0]
72 pid = groups[1]
73 open_duration_ms = groups[2]
74 result[pid] = {
75 'process-name': process_name,
76 'open-duration': open_duration_ms
77 }
79 return json.dumps(result)
81def get_all_volume_openers(resource_name, volume):
82 PLUGIN_CMD = 'getDrbdOpeners'
84 volume = str(volume)
85 openers = {}
87 # Make sure this call never stucks because this function can be called
88 # during HA init and in this case we can wait forever.
89 session = util.timeout_call(10, util.get_localAPI_session)
91 hosts = session.xenapi.host.get_all_records()
92 for host_ref, host_record in hosts.items():
93 node_name = host_record['hostname']
94 try:
95 if not session.xenapi.host_metrics.get_record(
96 host_record['metrics']
97 )['live']:
98 # Ensure we call plugin on online hosts only.
99 continue
101 openers[node_name] = json.loads(
102 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {
103 'resourceName': resource_name,
104 'volume': volume
105 })
106 )
107 except Exception as e:
108 util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format(
109 resource_name, node_name, e
110 ))
112 return openers
115# ==============================================================================
117def round_up(value, divisor):
118 assert divisor
119 divisor = int(divisor)
120 return ((int(value) + divisor - 1) // divisor) * divisor
123def round_down(value, divisor):
124 assert divisor
125 value = int(value)
126 return value - (value % int(divisor))
129# ==============================================================================
131def get_remote_host_ip(node_name):
132 (ret, stdout, stderr) = util.doexec([
133 'drbdsetup', 'show', DATABASE_VOLUME_NAME, '--json'
134 ])
135 if ret != 0:
136 return
138 try:
139 conf = json.loads(stdout)
140 if not conf:
141 return
143 for connection in conf[0]['connections']:
144 if connection['net']['_name'] == node_name:
145 value = connection['path']['_remote_host']
146 res = REG_DRBDSETUP_IP.match(value)
147 if res:
148 return res.groups()[0]
149 break
150 except Exception:
151 pass
154def _get_controller_uri():
155 PLUGIN_CMD = 'hasControllerRunning'
157 # Try to find controller using drbdadm.
158 (ret, stdout, stderr) = util.doexec([
159 'drbdadm', 'status', DATABASE_VOLUME_NAME
160 ])
161 if ret == 0:
162 # If we are here, the database device exists locally.
164 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)):
165 # Nice case, we have the controller running on this local host.
166 return 'linstor://localhost'
168 # Try to find the host using DRBD connections.
169 res = REG_DRBDADM_PRIMARY.search(stdout)
170 if res:
171 node_name = res.groups()[0]
172 ip = get_remote_host_ip(node_name)
173 if ip:
174 return 'linstor://' + ip
176 # Worst case: we use many hosts in the pool (>= 4), so we can't find the
177 # primary using drbdadm because we don't have all connections to the
178 # replicated volume. `drbdadm status xcp-persistent-database` returns
179 # 3 connections by default.
180 try:
181 session = util.timeout_call(10, util.get_localAPI_session)
183 for host_ref, host_record in session.xenapi.host.get_all_records().items():
184 node_name = host_record['hostname']
185 try:
186 if distutils.util.strtobool(
187 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {})
188 ):
189 return 'linstor://' + host_record['address']
190 except Exception as e:
191 # Can throw and exception if a host is offline. So catch it.
192 util.SMlog('Unable to search controller on `{}`: {}'.format(
193 node_name, e
194 ))
195 except:
196 # Not found, maybe we are trying to create the SR...
197 pass
199def get_controller_uri():
200 retries = 0
201 while True:
202 uri = _get_controller_uri()
203 if uri:
204 return uri
206 retries += 1
207 if retries >= 10:
208 break
209 time.sleep(1)
212def get_controller_node_name():
213 PLUGIN_CMD = 'hasControllerRunning'
215 (ret, stdout, stderr) = util.doexec([
216 'drbdadm', 'status', DATABASE_VOLUME_NAME
217 ])
219 if ret == 0:
220 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)):
221 return 'localhost'
223 res = REG_DRBDADM_PRIMARY.search(stdout)
224 if res:
225 return res.groups()[0]
227 session = util.timeout_call(5, util.get_localAPI_session)
229 for host_ref, host_record in session.xenapi.host.get_all_records().items():
230 node_name = host_record['hostname']
231 try:
232 if not session.xenapi.host_metrics.get_record(
233 host_record['metrics']
234 )['live']:
235 continue
237 if distutils.util.strtobool(session.xenapi.host.call_plugin(
238 host_ref, PLUGIN, PLUGIN_CMD, {}
239 )):
240 return node_name
241 except Exception as e:
242 util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format(
243 node_name, e
244 ))
247def demote_drbd_resource(node_name, resource_name):
248 PLUGIN_CMD = 'demoteDrbdResource'
250 session = util.timeout_call(5, util.get_localAPI_session)
252 for host_ref, host_record in session.xenapi.host.get_all_records().items():
253 if host_record['hostname'] != node_name:
254 continue
256 try:
257 session.xenapi.host.call_plugin(
258 host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name}
259 )
260 except Exception as e:
261 util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format(
262 resource_name, node_name, e
263 ))
264 raise Exception(
265 'Can\'t demote resource `{}`, unable to find node `{}`'
266 .format(resource_name, node_name)
267 )
269# ==============================================================================
271class LinstorVolumeManagerError(Exception):
272 ERR_GENERIC = 0,
273 ERR_VOLUME_EXISTS = 1,
274 ERR_VOLUME_NOT_EXISTS = 2,
275 ERR_VOLUME_DESTROY = 3,
276 ERR_GROUP_NOT_EXISTS = 4
278 def __init__(self, message, code=ERR_GENERIC):
279 super(LinstorVolumeManagerError, self).__init__(message)
280 self._code = code
282 @property
283 def code(self):
284 return self._code
287# ==============================================================================
289# Note:
290# If a storage pool is not accessible after a network change:
291# linstor node interface modify <NODE> default --ip <IP>
294class LinstorVolumeManager(object):
295 """
296 API to manager LINSTOR volumes in XCP-ng.
297 A volume in this context is a physical part of the storage layer.
298 """
300 __slots__ = (
301 '_linstor', '_logger', '_redundancy',
302 '_base_group_name', '_group_name', '_ha_group_name',
303 '_volumes', '_storage_pools', '_storage_pools_time',
304 '_kv_cache', '_resource_cache', '_volume_info_cache',
305 '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty'
306 )
308 DEV_ROOT_PATH = DRBD_BY_RES_PATH
310 # Default sector size.
311 BLOCK_SIZE = 512
313 # List of volume properties.
314 PROP_METADATA = 'metadata'
315 PROP_NOT_EXISTS = 'not-exists'
316 PROP_VOLUME_NAME = 'volume-name'
317 PROP_IS_READONLY_TIMESTAMP = 'readonly-timestamp'
319 # A volume can only be locked for a limited duration.
320 # The goal is to give enough time to slaves to execute some actions on
321 # a device before an UUID update or a coalesce for example.
322 # Expiration is expressed in seconds.
323 LOCKED_EXPIRATION_DELAY = 1 * 60
325 # Used when volume uuid is being updated.
326 PROP_UPDATING_UUID_SRC = 'updating-uuid-src'
328 # States of property PROP_NOT_EXISTS.
329 STATE_EXISTS = '0'
330 STATE_NOT_EXISTS = '1'
331 STATE_CREATING = '2'
333 # Property namespaces.
334 NAMESPACE_SR = 'xcp/sr'
335 NAMESPACE_VOLUME = 'xcp/volume'
337 # Regex to match properties.
338 REG_PROP = '^([^/]+)/{}$'
340 REG_METADATA = re.compile(REG_PROP.format(PROP_METADATA))
341 REG_NOT_EXISTS = re.compile(REG_PROP.format(PROP_NOT_EXISTS))
342 REG_VOLUME_NAME = re.compile(REG_PROP.format(PROP_VOLUME_NAME))
343 REG_UPDATING_UUID_SRC = re.compile(REG_PROP.format(PROP_UPDATING_UUID_SRC))
345 # Prefixes of SR/VOLUME in the LINSTOR DB.
346 # A LINSTOR (resource, group, ...) name cannot start with a number.
347 # So we add a prefix behind our SR/VOLUME uuids.
348 PREFIX_SR = 'xcp-sr-'
349 PREFIX_HA = 'xcp-ha-'
350 PREFIX_VOLUME = 'xcp-volume-'
352 # Limit request number when storage pool info is asked, we fetch
353 # the current pool status after N elapsed seconds.
354 STORAGE_POOLS_FETCH_INTERVAL = 15
356 @staticmethod
357 def default_logger(*args):
358 print(args)
360 # --------------------------------------------------------------------------
361 # API.
362 # --------------------------------------------------------------------------
364 class VolumeInfo(object):
365 __slots__ = (
366 'name',
367 'allocated_size', # Allocated size, place count is not used.
368 'virtual_size', # Total virtual available size of this volume
369 # (i.e. the user size at creation).
370 'diskful' # Array of nodes that have a diskful volume.
371 )
373 def __init__(self, name):
374 self.name = name
375 self.allocated_size = 0
376 self.virtual_size = 0
377 self.diskful = []
379 def __repr__(self):
380 return 'VolumeInfo("{}", {}, {}, {})'.format(
381 self.name, self.allocated_size, self.virtual_size,
382 self.diskful
383 )
385 # --------------------------------------------------------------------------
387 def __init__(
388 self, uri, group_name, repair=False, logger=default_logger.__func__,
389 attempt_count=30
390 ):
391 """
392 Create a new LinstorVolumeManager object.
393 :param str uri: URI to communicate with the LINSTOR controller.
394 :param str group_name: The SR goup name to use.
395 :param bool repair: If true we try to remove bad volumes due to a crash
396 or unexpected behavior.
397 :param function logger: Function to log messages.
398 :param int attempt_count: Number of attempts to join the controller.
399 """
401 self._linstor = self._create_linstor_instance(
402 uri, attempt_count=attempt_count
403 )
404 self._base_group_name = group_name
406 # Ensure group exists.
407 group_name = self._build_group_name(group_name)
408 groups = self._linstor.resource_group_list_raise([group_name]).resource_groups
409 if not groups:
410 raise LinstorVolumeManagerError(
411 'Unable to find `{}` Linstor SR'.format(group_name)
412 )
414 # Ok. ;)
415 self._logger = logger
416 self._redundancy = groups[0].select_filter.place_count
417 self._group_name = group_name
418 self._ha_group_name = self._build_ha_group_name(self._base_group_name)
419 self._volumes = set()
420 self._storage_pools_time = 0
422 # To increate performance and limit request count to LINSTOR services,
423 # we use caches.
424 self._kv_cache = self._create_kv_cache()
425 self._resource_cache = None
426 self._resource_cache_dirty = True
427 self._volume_info_cache = None
428 self._volume_info_cache_dirty = True
429 self._build_volumes(repair=repair)
431 @property
432 def group_name(self):
433 """
434 Give the used group name.
435 :return: The group name.
436 :rtype: str
437 """
438 return self._base_group_name
440 @property
441 def redundancy(self):
442 """
443 Give the used redundancy.
444 :return: The redundancy.
445 :rtype: int
446 """
447 return self._redundancy
449 @property
450 def volumes(self):
451 """
452 Give the volumes uuid set.
453 :return: The volumes uuid set.
454 :rtype: set(str)
455 """
456 return self._volumes
458 @property
459 def max_volume_size_allowed(self):
460 """
461 Give the max volume size currently available in B.
462 :return: The current size.
463 :rtype: int
464 """
466 candidates = self._find_best_size_candidates()
467 if not candidates:
468 raise LinstorVolumeManagerError(
469 'Failed to get max volume size allowed'
470 )
472 size = candidates[0].max_volume_size
473 if size < 0:
474 raise LinstorVolumeManagerError(
475 'Invalid max volume size allowed given: {}'.format(size)
476 )
477 return self.round_down_volume_size(size * 1024)
479 @property
480 def physical_size(self):
481 """
482 Give the total physical size of the SR.
483 :return: The physical size.
484 :rtype: int
485 """
486 return self._compute_size('total_capacity')
488 @property
489 def physical_free_size(self):
490 """
491 Give the total free physical size of the SR.
492 :return: The physical free size.
493 :rtype: int
494 """
495 return self._compute_size('free_capacity')
497 @property
498 def allocated_volume_size(self):
499 """
500 Give the allocated size for all volumes. The place count is not
501 used here. When thick lvm is used, the size for one volume should
502 be equal to the virtual volume size. With thin lvm, the size is equal
503 or lower to the volume size.
504 :return: The allocated size of all volumes.
505 :rtype: int
506 """
508 # Paths: /res_name/vol_number/size
509 sizes = {}
511 for resource in self._get_resource_cache().resources:
512 if resource.name not in sizes:
513 current = sizes[resource.name] = {}
514 else:
515 current = sizes[resource.name]
517 for volume in resource.volumes:
518 # We ignore diskless pools of the form "DfltDisklessStorPool".
519 if volume.storage_pool_name != self._group_name:
520 continue
522 current_size = volume.allocated_size
523 if current_size < 0:
524 raise LinstorVolumeManagerError(
525 'Failed to get allocated size of `{}` on `{}`'
526 .format(resource.name, volume.storage_pool_name)
527 )
528 current[volume.number] = max(current_size, current.get(volume.number) or 0)
530 total_size = 0
531 for volumes in sizes.values():
532 for size in volumes.values():
533 total_size += size
535 return total_size * 1024
537 def get_min_physical_size(self):
538 """
539 Give the minimum physical size of the SR.
540 I.e. the size of the smallest disk + the number of pools.
541 :return: The physical min size.
542 :rtype: tuple(int, int)
543 """
544 size = None
545 pool_count = 0
546 for pool in self._get_storage_pools(force=True):
547 space = pool.free_space
548 if space:
549 pool_count += 1
550 current_size = space.total_capacity
551 if current_size < 0:
552 raise LinstorVolumeManagerError(
553 'Failed to get pool total_capacity attr of `{}`'
554 .format(pool.node_name)
555 )
556 if size is None or current_size < size:
557 size = current_size
558 return (pool_count, (size or 0) * 1024)
560 @property
561 def metadata(self):
562 """
563 Get the metadata of the SR.
564 :return: Dictionary that contains metadata.
565 :rtype: dict(str, dict)
566 """
568 sr_properties = self._get_sr_properties()
569 metadata = sr_properties.get(self.PROP_METADATA)
570 if metadata is not None:
571 metadata = json.loads(metadata)
572 if isinstance(metadata, dict):
573 return metadata
574 raise LinstorVolumeManagerError(
575 'Expected dictionary in SR metadata: {}'.format(
576 self._group_name
577 )
578 )
580 return {}
582 @metadata.setter
583 def metadata(self, metadata):
584 """
585 Set the metadata of the SR.
586 :param dict metadata: Dictionary that contains metadata.
587 """
589 assert isinstance(metadata, dict)
590 sr_properties = self._get_sr_properties()
591 sr_properties[self.PROP_METADATA] = json.dumps(metadata)
593 @property
594 def disconnected_hosts(self):
595 """
596 Get the list of disconnected hosts.
597 :return: Set that contains disconnected hosts.
598 :rtype: set(str)
599 """
601 disconnected_hosts = set()
602 for pool in self._get_storage_pools():
603 for report in pool.reports:
604 if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \
605 linstor.consts.WARN_NOT_CONNECTED:
606 disconnected_hosts.add(pool.node_name)
607 break
608 return disconnected_hosts
610 def check_volume_exists(self, volume_uuid):
611 """
612 Check if a volume exists in the SR.
613 :return: True if volume exists.
614 :rtype: bool
615 """
616 return volume_uuid in self._volumes
618 def create_volume(
619 self,
620 volume_uuid,
621 size,
622 persistent=True,
623 volume_name=None,
624 high_availability=False
625 ):
626 """
627 Create a new volume on the SR.
628 :param str volume_uuid: The volume uuid to use.
629 :param int size: volume size in B.
630 :param bool persistent: If false the volume will be unavailable
631 on the next constructor call LinstorSR(...).
632 :param str volume_name: If set, this name is used in the LINSTOR
633 database instead of a generated name.
634 :param bool high_availability: If set, the volume is created in
635 the HA group.
636 :return: The current device path of the volume.
637 :rtype: str
638 """
640 self._logger('Creating LINSTOR volume {}...'.format(volume_uuid))
641 if not volume_name:
642 volume_name = self.build_volume_name(util.gen_uuid())
643 volume_properties = self._create_volume_with_properties(
644 volume_uuid,
645 volume_name,
646 size,
647 True, # place_resources
648 high_availability
649 )
651 # Volume created! Now try to find the device path.
652 try:
653 self._logger(
654 'Find device path of LINSTOR volume {}...'.format(volume_uuid)
655 )
656 device_path = self._find_device_path(volume_uuid, volume_name)
657 if persistent:
658 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
659 self._volumes.add(volume_uuid)
660 self._logger(
661 'LINSTOR volume {} created!'.format(volume_uuid)
662 )
663 return device_path
664 except Exception:
665 # There is an issue to find the path.
666 # At this point the volume has just been created, so force flag can be used.
667 self._destroy_volume(volume_uuid, force=True)
668 raise
670 def mark_volume_as_persistent(self, volume_uuid):
671 """
672 Mark volume as persistent if created with persistent=False.
673 :param str volume_uuid: The volume uuid to mark.
674 """
676 self._ensure_volume_exists(volume_uuid)
678 # Mark volume as persistent.
679 volume_properties = self._get_volume_properties(volume_uuid)
680 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
682 def destroy_volume(self, volume_uuid):
683 """
684 Destroy a volume.
685 :param str volume_uuid: The volume uuid to destroy.
686 """
688 self._ensure_volume_exists(volume_uuid)
689 self.ensure_volume_is_not_locked(volume_uuid)
691 # Mark volume as destroyed.
692 volume_properties = self._get_volume_properties(volume_uuid)
693 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
695 try:
696 self._volumes.remove(volume_uuid)
697 self._destroy_volume(volume_uuid)
698 except Exception as e:
699 raise LinstorVolumeManagerError(
700 str(e),
701 LinstorVolumeManagerError.ERR_VOLUME_DESTROY
702 )
704 def lock_volume(self, volume_uuid, locked=True):
705 """
706 Prevent modifications of the volume properties during
707 "self.LOCKED_EXPIRATION_DELAY" seconds. The SR must be locked
708 when used. This method is useful to attach/detach correctly a volume on
709 a slave. Without it the GC can rename a volume, in this case the old
710 volume path can be used by a slave...
711 :param str volume_uuid: The volume uuid to protect/unprotect.
712 :param bool locked: Lock/unlock the volume.
713 """
715 self._ensure_volume_exists(volume_uuid)
717 self._logger(
718 '{} volume {} as locked'.format(
719 'Mark' if locked else 'Unmark',
720 volume_uuid
721 )
722 )
724 volume_properties = self._get_volume_properties(volume_uuid)
725 if locked:
726 volume_properties[
727 self.PROP_IS_READONLY_TIMESTAMP
728 ] = str(time.time())
729 elif self.PROP_IS_READONLY_TIMESTAMP in volume_properties:
730 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP)
732 def ensure_volume_is_not_locked(self, volume_uuid, timeout=None):
733 """
734 Ensure a volume is not locked. Wait if necessary.
735 :param str volume_uuid: The volume uuid to check.
736 :param int timeout: If the volume is always locked after the expiration
737 of the timeout, an exception is thrown.
738 """
739 return self.ensure_volume_list_is_not_locked([volume_uuid], timeout)
741 def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None):
742 checked = set()
743 for volume_uuid in volume_uuids:
744 if volume_uuid in self._volumes:
745 checked.add(volume_uuid)
747 if not checked:
748 return
750 waiting = False
752 volume_properties = self._get_kv_cache()
754 start = time.time()
755 while True:
756 # Can't delete in for loop, use a copy of the list.
757 remaining = checked.copy()
758 for volume_uuid in checked:
759 volume_properties.namespace = \
760 self._build_volume_namespace(volume_uuid)
761 timestamp = volume_properties.get(
762 self.PROP_IS_READONLY_TIMESTAMP
763 )
764 if timestamp is None:
765 remaining.remove(volume_uuid)
766 continue
768 now = time.time()
769 if now - float(timestamp) > self.LOCKED_EXPIRATION_DELAY:
770 self._logger(
771 'Remove readonly timestamp on {}'.format(volume_uuid)
772 )
773 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP)
774 remaining.remove(volume_uuid)
775 continue
777 if not waiting:
778 self._logger(
779 'Volume {} is locked, waiting...'.format(volume_uuid)
780 )
781 waiting = True
782 break
784 if not remaining:
785 break
786 checked = remaining
788 if timeout is not None and now - start > timeout:
789 raise LinstorVolumeManagerError(
790 'volume `{}` is locked and timeout has been reached'
791 .format(volume_uuid),
792 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
793 )
795 # We must wait to use the volume. After that we can modify it
796 # ONLY if the SR is locked to avoid bad reads on the slaves.
797 time.sleep(1)
798 volume_properties = self._create_kv_cache()
800 if waiting:
801 self._logger('No volume locked now!')
803 def remove_volume_if_diskless(self, volume_uuid):
804 """
805 Remove disless path from local node.
806 :param str volume_uuid: The volume uuid to remove.
807 """
809 self._ensure_volume_exists(volume_uuid)
811 volume_properties = self._get_volume_properties(volume_uuid)
812 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
814 node_name = socket.gethostname()
816 for resource in self._get_resource_cache().resources:
817 if resource.name == volume_name and resource.node_name == node_name:
818 if linstor.consts.FLAG_TIE_BREAKER in resource.flags:
819 return
820 break
822 result = self._linstor.resource_delete_if_diskless(
823 node_name=node_name, rsc_name=volume_name
824 )
825 if not linstor.Linstor.all_api_responses_no_error(result):
826 raise LinstorVolumeManagerError(
827 'Unable to delete diskless path of `{}` on node `{}`: {}'
828 .format(volume_name, node_name, ', '.join(
829 [str(x) for x in result]))
830 )
832 def introduce_volume(self, volume_uuid):
833 pass # TODO: Implement me.
835 def resize_volume(self, volume_uuid, new_size):
836 """
837 Resize a volume.
838 :param str volume_uuid: The volume uuid to resize.
839 :param int new_size: New size in B.
840 """
842 volume_name = self.get_volume_name(volume_uuid)
843 self.ensure_volume_is_not_locked(volume_uuid)
844 new_size = self.round_up_volume_size(new_size) // 1024
846 retry_count = 30
847 while True:
848 result = self._linstor.volume_dfn_modify(
849 rsc_name=volume_name,
850 volume_nr=0,
851 size=new_size
852 )
854 self._mark_resource_cache_as_dirty()
856 error_str = self._get_error_str(result)
857 if not error_str:
858 break
860 # After volume creation, DRBD volume can be unusable during many seconds.
861 # So we must retry the definition change if the device is not up to date.
862 # Often the case for thick provisioning.
863 if retry_count and error_str.find('non-UpToDate DRBD device') >= 0:
864 time.sleep(2)
865 retry_count -= 1
866 continue
868 raise LinstorVolumeManagerError(
869 'Could not resize volume `{}` from SR `{}`: {}'
870 .format(volume_uuid, self._group_name, error_str)
871 )
873 def get_volume_name(self, volume_uuid):
874 """
875 Get the name of a particular volume.
876 :param str volume_uuid: The volume uuid of the name to get.
877 :return: The volume name.
878 :rtype: str
879 """
881 self._ensure_volume_exists(volume_uuid)
882 volume_properties = self._get_volume_properties(volume_uuid)
883 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
884 if volume_name:
885 return volume_name
886 raise LinstorVolumeManagerError(
887 'Failed to get volume name of {}'.format(volume_uuid)
888 )
890 def get_volume_size(self, volume_uuid):
891 """
892 Get the size of a particular volume.
893 :param str volume_uuid: The volume uuid of the size to get.
894 :return: The volume size.
895 :rtype: int
896 """
898 volume_name = self.get_volume_name(volume_uuid)
899 dfns = self._linstor.resource_dfn_list_raise(
900 query_volume_definitions=True,
901 filter_by_resource_definitions=[volume_name]
902 ).resource_definitions
904 size = dfns[0].volume_definitions[0].size
905 if size < 0:
906 raise LinstorVolumeManagerError(
907 'Failed to get volume size of: {}'.format(volume_uuid)
908 )
909 return size * 1024
911 def set_auto_promote_timeout(self, volume_uuid, timeout):
912 """
913 Define the blocking time of open calls when a DRBD
914 is already open on another host.
915 :param str volume_uuid: The volume uuid to modify.
916 """
918 volume_name = self.get_volume_name(volume_uuid)
919 result = self._linstor.resource_dfn_modify(volume_name, {
920 'DrbdOptions/Resource/auto-promote-timeout': timeout
921 })
922 error_str = self._get_error_str(result)
923 if error_str:
924 raise LinstorVolumeManagerError(
925 'Could not change the auto promote timeout of `{}`: {}'
926 .format(volume_uuid, error_str)
927 )
929 def get_volume_info(self, volume_uuid):
930 """
931 Get the volume info of a particular volume.
932 :param str volume_uuid: The volume uuid of the volume info to get.
933 :return: The volume info.
934 :rtype: VolumeInfo
935 """
937 volume_name = self.get_volume_name(volume_uuid)
938 return self._get_volumes_info()[volume_name]
940 def get_device_path(self, volume_uuid):
941 """
942 Get the dev path of a volume, create a diskless if necessary.
943 :param str volume_uuid: The volume uuid to get the dev path.
944 :return: The current device path of the volume.
945 :rtype: str
946 """
948 volume_name = self.get_volume_name(volume_uuid)
949 return self._find_device_path(volume_uuid, volume_name)
951 def get_volume_uuid_from_device_path(self, device_path):
952 """
953 Get the volume uuid of a device_path.
954 :param str device_path: The dev path to find the volume uuid.
955 :return: The volume uuid of the local device path.
956 :rtype: str
957 """
959 expected_volume_name = \
960 self.get_volume_name_from_device_path(device_path)
962 volume_names = self.get_volumes_with_name()
963 for volume_uuid, volume_name in volume_names.items():
964 if volume_name == expected_volume_name:
965 return volume_uuid
967 raise LinstorVolumeManagerError(
968 'Unable to find volume uuid from dev path `{}`'.format(device_path)
969 )
971 def get_volume_name_from_device_path(self, device_path):
972 """
973 Get the volume name of a device_path.
974 :param str device_path: The dev path to find the volume name.
975 :return: The volume name of the device path.
976 :rtype: str
977 """
979 # Assume that we have a path like this:
980 # - "/dev/drbd/by-res/xcp-volume-<UUID>/0"
981 # - "../xcp-volume-<UUID>/0"
982 if device_path.startswith(DRBD_BY_RES_PATH):
983 prefix_len = len(DRBD_BY_RES_PATH)
984 else:
985 assert device_path.startswith('../')
986 prefix_len = 3
988 res_name_end = device_path.find('/', prefix_len)
989 assert res_name_end != -1
990 return device_path[prefix_len:res_name_end]
992 def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False):
993 """
994 Change the uuid of a volume.
995 :param str volume_uuid: The volume to modify.
996 :param str new_volume_uuid: The new volume uuid to use.
997 :param bool force: If true we doesn't check if volume_uuid is in the
998 volume list. I.e. the volume can be marked as deleted but the volume
999 can still be in the LINSTOR KV store if the deletion has failed.
1000 In specific cases like "undo" after a failed clone we must rename a bad
1001 deleted VDI.
1002 """
1004 self._logger(
1005 'Trying to update volume UUID {} to {}...'
1006 .format(volume_uuid, new_volume_uuid)
1007 )
1008 assert volume_uuid != new_volume_uuid, 'can\'t update volume UUID, same value'
1010 if not force:
1011 self._ensure_volume_exists(volume_uuid)
1012 self.ensure_volume_is_not_locked(volume_uuid)
1014 if new_volume_uuid in self._volumes:
1015 raise LinstorVolumeManagerError(
1016 'Volume `{}` already exists'.format(new_volume_uuid),
1017 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
1018 )
1020 volume_properties = self._get_volume_properties(volume_uuid)
1021 if volume_properties.get(self.PROP_UPDATING_UUID_SRC):
1022 raise LinstorVolumeManagerError(
1023 'Cannot update volume uuid {}: invalid state'
1024 .format(volume_uuid)
1025 )
1027 # 1. Copy in temp variables metadata and volume_name.
1028 metadata = volume_properties.get(self.PROP_METADATA)
1029 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
1031 # 2. Switch to new volume namespace.
1032 volume_properties.namespace = self._build_volume_namespace(
1033 new_volume_uuid
1034 )
1036 if list(volume_properties.items()):
1037 raise LinstorVolumeManagerError(
1038 'Cannot update volume uuid {} to {}: '
1039 .format(volume_uuid, new_volume_uuid) +
1040 'this last one is not empty'
1041 )
1043 try:
1044 # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC.
1045 # If we crash after that, the new properties can be removed
1046 # properly.
1047 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
1048 volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid
1050 # 4. Copy the properties.
1051 # Note: On new volumes, during clone for example, the metadata
1052 # may be missing. So we must test it to avoid this error:
1053 # "None has to be a str/unicode, but is <type 'NoneType'>"
1054 if metadata:
1055 volume_properties[self.PROP_METADATA] = metadata
1056 volume_properties[self.PROP_VOLUME_NAME] = volume_name
1058 # 5. Ok!
1059 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
1060 except Exception as e:
1061 try:
1062 # Clear the new volume properties in case of failure.
1063 assert volume_properties.namespace == \
1064 self._build_volume_namespace(new_volume_uuid)
1065 volume_properties.clear()
1066 except Exception as e:
1067 self._logger(
1068 'Failed to clear new volume properties: {} (ignoring...)'
1069 .format(e)
1070 )
1071 raise LinstorVolumeManagerError(
1072 'Failed to copy volume properties: {}'.format(e)
1073 )
1075 try:
1076 # 6. After this point, it's ok we can remove the
1077 # PROP_UPDATING_UUID_SRC property and clear the src properties
1078 # without problems.
1080 # 7. Switch to old volume namespace.
1081 volume_properties.namespace = self._build_volume_namespace(
1082 volume_uuid
1083 )
1084 volume_properties.clear()
1086 # 8. Switch a last time to new volume namespace.
1087 volume_properties.namespace = self._build_volume_namespace(
1088 new_volume_uuid
1089 )
1090 volume_properties.pop(self.PROP_UPDATING_UUID_SRC)
1091 except Exception as e:
1092 raise LinstorVolumeManagerError(
1093 'Failed to clear volume properties '
1094 'after volume uuid update: {}'.format(e)
1095 )
1097 self._volumes.remove(volume_uuid)
1098 self._volumes.add(new_volume_uuid)
1100 self._logger(
1101 'UUID update succeeded of {} to {}! (properties={})'
1102 .format(
1103 volume_uuid, new_volume_uuid,
1104 self._get_filtered_properties(volume_properties)
1105 )
1106 )
1108 def update_volume_name(self, volume_uuid, volume_name):
1109 """
1110 Change the volume name of a volume.
1111 :param str volume_uuid: The volume to modify.
1112 :param str volume_name: The volume_name to use.
1113 """
1115 self._ensure_volume_exists(volume_uuid)
1116 self.ensure_volume_is_not_locked(volume_uuid)
1117 if not volume_name.startswith(self.PREFIX_VOLUME):
1118 raise LinstorVolumeManagerError(
1119 'Volume name `{}` must be start with `{}`'
1120 .format(volume_name, self.PREFIX_VOLUME)
1121 )
1123 if volume_name not in self._fetch_resource_names():
1124 raise LinstorVolumeManagerError(
1125 'Volume `{}` doesn\'t exist'.format(volume_name)
1126 )
1128 volume_properties = self._get_volume_properties(volume_uuid)
1129 volume_properties[self.PROP_VOLUME_NAME] = volume_name
1131 def get_usage_states(self, volume_uuid):
1132 """
1133 Check if a volume is currently used.
1134 :param str volume_uuid: The volume uuid to check.
1135 :return: A dictionnary that contains states.
1136 :rtype: dict(str, bool or None)
1137 """
1139 states = {}
1141 volume_name = self.get_volume_name(volume_uuid)
1142 for resource_state in self._linstor.resource_list_raise(
1143 filter_by_resources=[volume_name]
1144 ).resource_states:
1145 states[resource_state.node_name] = resource_state.in_use
1147 return states
1149 def get_volume_openers(self, volume_uuid):
1150 """
1151 Get openers of a volume.
1152 :param str volume_uuid: The volume uuid to monitor.
1153 :return: A dictionnary that contains openers.
1154 :rtype: dict(str, obj)
1155 """
1156 return get_all_volume_openers(self.get_volume_name(volume_uuid), '0')
1158 def get_volumes_with_name(self):
1159 """
1160 Give a volume dictionnary that contains names actually owned.
1161 :return: A volume/name dict.
1162 :rtype: dict(str, str)
1163 """
1164 return self._get_volumes_by_property(self.REG_VOLUME_NAME)
1166 def get_volumes_with_info(self):
1167 """
1168 Give a volume dictionnary that contains VolumeInfos.
1169 :return: A volume/VolumeInfo dict.
1170 :rtype: dict(str, VolumeInfo)
1171 """
1173 volumes = {}
1175 all_volume_info = self._get_volumes_info()
1176 volume_names = self.get_volumes_with_name()
1177 for volume_uuid, volume_name in volume_names.items():
1178 if volume_name:
1179 volume_info = all_volume_info.get(volume_name)
1180 if volume_info:
1181 volumes[volume_uuid] = volume_info
1182 continue
1184 # Well I suppose if this volume is not available,
1185 # LINSTOR has been used directly without using this API.
1186 volumes[volume_uuid] = self.VolumeInfo('')
1188 return volumes
1190 def get_volumes_with_metadata(self):
1191 """
1192 Give a volume dictionnary that contains metadata.
1193 :return: A volume/metadata dict.
1194 :rtype: dict(str, dict)
1195 """
1197 volumes = {}
1199 metadata = self._get_volumes_by_property(self.REG_METADATA)
1200 for volume_uuid, volume_metadata in metadata.items():
1201 if volume_metadata:
1202 volume_metadata = json.loads(volume_metadata)
1203 if isinstance(volume_metadata, dict):
1204 volumes[volume_uuid] = volume_metadata
1205 continue
1206 raise LinstorVolumeManagerError(
1207 'Expected dictionary in volume metadata: {}'
1208 .format(volume_uuid)
1209 )
1211 volumes[volume_uuid] = {}
1213 return volumes
1215 def get_volume_metadata(self, volume_uuid):
1216 """
1217 Get the metadata of a volume.
1218 :return: Dictionary that contains metadata.
1219 :rtype: dict
1220 """
1222 self._ensure_volume_exists(volume_uuid)
1223 volume_properties = self._get_volume_properties(volume_uuid)
1224 metadata = volume_properties.get(self.PROP_METADATA)
1225 if metadata:
1226 metadata = json.loads(metadata)
1227 if isinstance(metadata, dict):
1228 return metadata
1229 raise LinstorVolumeManagerError(
1230 'Expected dictionary in volume metadata: {}'
1231 .format(volume_uuid)
1232 )
1233 return {}
1235 def set_volume_metadata(self, volume_uuid, metadata):
1236 """
1237 Set the metadata of a volume.
1238 :param dict metadata: Dictionary that contains metadata.
1239 """
1241 self._ensure_volume_exists(volume_uuid)
1242 self.ensure_volume_is_not_locked(volume_uuid)
1244 assert isinstance(metadata, dict)
1245 volume_properties = self._get_volume_properties(volume_uuid)
1246 volume_properties[self.PROP_METADATA] = json.dumps(metadata)
1248 def update_volume_metadata(self, volume_uuid, metadata):
1249 """
1250 Update the metadata of a volume. It modify only the given keys.
1251 It doesn't remove unreferenced key instead of set_volume_metadata.
1252 :param dict metadata: Dictionary that contains metadata.
1253 """
1255 self._ensure_volume_exists(volume_uuid)
1256 self.ensure_volume_is_not_locked(volume_uuid)
1258 assert isinstance(metadata, dict)
1259 volume_properties = self._get_volume_properties(volume_uuid)
1261 current_metadata = json.loads(
1262 volume_properties.get(self.PROP_METADATA, '{}')
1263 )
1264 if not isinstance(metadata, dict):
1265 raise LinstorVolumeManagerError(
1266 'Expected dictionary in volume metadata: {}'
1267 .format(volume_uuid)
1268 )
1270 for key, value in metadata.items():
1271 current_metadata[key] = value
1272 volume_properties[self.PROP_METADATA] = json.dumps(current_metadata)
1274 def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True):
1275 """
1276 Clone a volume. Do not copy the data, this method creates a new volume
1277 with the same size.
1278 :param str volume_uuid: The volume to clone.
1279 :param str clone_uuid: The cloned volume.
1280 :param bool persistent: If false the volume will be unavailable
1281 on the next constructor call LinstorSR(...).
1282 :return: The current device path of the cloned volume.
1283 :rtype: str
1284 """
1286 volume_name = self.get_volume_name(volume_uuid)
1287 self.ensure_volume_is_not_locked(volume_uuid)
1289 # 1. Find ideal nodes + size to use.
1290 ideal_node_names, size = self._get_volume_node_names_and_size(
1291 volume_name
1292 )
1293 if size <= 0:
1294 raise LinstorVolumeManagerError(
1295 'Invalid size of {} for volume `{}`'.format(size, volume_name)
1296 )
1298 # 2. Create clone!
1299 return self.create_volume(clone_uuid, size, persistent)
1301 def remove_resourceless_volumes(self):
1302 """
1303 Remove all volumes without valid or non-empty name
1304 (i.e. without LINSTOR resource). It's different than
1305 LinstorVolumeManager constructor that takes a `repair` param that
1306 removes volumes with `PROP_NOT_EXISTS` to 1.
1307 """
1309 resource_names = self._fetch_resource_names()
1310 for volume_uuid, volume_name in self.get_volumes_with_name().items():
1311 if not volume_name or volume_name not in resource_names:
1312 # Don't force, we can be sure of what's happening.
1313 self.destroy_volume(volume_uuid)
1315 def destroy(self):
1316 """
1317 Destroy this SR. Object should not be used after that.
1318 :param bool force: Try to destroy volumes before if true.
1319 """
1321 # 1. Ensure volume list is empty. No cost.
1322 if self._volumes:
1323 raise LinstorVolumeManagerError(
1324 'Cannot destroy LINSTOR volume manager: '
1325 'It exists remaining volumes'
1326 )
1328 # 2. Fetch ALL resource names.
1329 # This list may therefore contain volumes created outside
1330 # the scope of the driver.
1331 resource_names = self._fetch_resource_names(ignore_deleted=False)
1332 try:
1333 resource_names.remove(DATABASE_VOLUME_NAME)
1334 except KeyError:
1335 # Really strange to reach that point.
1336 # Normally we always have the database volume in the list.
1337 pass
1339 # 3. Ensure the resource name list is entirely empty...
1340 if resource_names:
1341 raise LinstorVolumeManagerError(
1342 'Cannot destroy LINSTOR volume manager: '
1343 'It exists remaining volumes (created externally or being deleted)'
1344 )
1346 # 4. Destroying...
1347 controller_is_running = self._controller_is_running()
1348 uri = 'linstor://localhost'
1349 try:
1350 if controller_is_running:
1351 self._start_controller(start=False)
1353 # 4.1. Umount LINSTOR database.
1354 self._mount_database_volume(
1355 self.build_device_path(DATABASE_VOLUME_NAME),
1356 mount=False,
1357 force=True
1358 )
1360 # 4.2. Refresh instance.
1361 self._start_controller(start=True)
1362 self._linstor = self._create_linstor_instance(
1363 uri, keep_uri_unmodified=True
1364 )
1366 # 4.3. Destroy database volume.
1367 self._destroy_resource(DATABASE_VOLUME_NAME)
1369 # 4.4. Refresh linstor connection.
1370 # Without we get this error:
1371 # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.."
1372 # Because the deletion of the databse was not seen by Linstor for some reason.
1373 # It seems a simple refresh of the Linstor connection make it aware of the deletion.
1374 self._linstor.disconnect()
1375 self._linstor.connect()
1377 # 4.5. Destroy remaining drbd nodes on hosts.
1378 # We check if there is a DRBD node on hosts that could mean blocking when destroying resource groups.
1379 # It needs to be done locally by each host so we go through the linstor-manager plugin.
1380 # If we don't do this sometimes, the destroy will fail when trying to destroy the resource groups with:
1381 # "linstor-manager:destroy error: Failed to destroy SP `xcp-sr-linstor_group_thin_device` on node `r620-s2`: The specified storage pool 'xcp-sr-linstor_group_thin_device' on node 'r620-s2' can not be deleted as volumes / snapshot-volumes are still using it."
1382 session = util.timeout_call(5, util.get_localAPI_session)
1383 for host_ref in session.xenapi.host.get_all():
1384 try:
1385 response = session.xenapi.host.call_plugin(
1386 host_ref, 'linstor-manager', 'destroyDrbdVolumes', {'volume_group': self._group_name}
1387 )
1388 except Exception as e:
1389 util.SMlog('Calling destroyDrbdVolumes on host {} failed with error {}'.format(host_ref, e))
1391 # 4.6. Destroy group and storage pools.
1392 self._destroy_resource_group(self._linstor, self._group_name)
1393 self._destroy_resource_group(self._linstor, self._ha_group_name)
1394 for pool in self._get_storage_pools(force=True):
1395 self._destroy_storage_pool(
1396 self._linstor, pool.name, pool.node_name
1397 )
1398 except Exception as e:
1399 self._start_controller(start=controller_is_running)
1400 raise e
1402 try:
1403 self._start_controller(start=False)
1404 for file in os.listdir(DATABASE_PATH):
1405 if file != 'lost+found':
1406 os.remove(DATABASE_PATH + '/' + file)
1407 except Exception as e:
1408 util.SMlog(
1409 'Ignoring failure after LINSTOR SR destruction: {}'
1410 .format(e)
1411 )
1413 def find_up_to_date_diskful_nodes(self, volume_uuid):
1414 """
1415 Find all nodes that contain a specific volume using diskful disks.
1416 The disk must be up to data to be used.
1417 :param str volume_uuid: The volume to use.
1418 :return: The available nodes.
1419 :rtype: tuple(set(str), str)
1420 """
1422 volume_name = self.get_volume_name(volume_uuid)
1424 in_use_by = None
1425 node_names = set()
1427 resource_states = filter(
1428 lambda resource_state: resource_state.name == volume_name,
1429 self._get_resource_cache().resource_states
1430 )
1432 for resource_state in resource_states:
1433 volume_state = resource_state.volume_states[0]
1434 if volume_state.disk_state == 'UpToDate':
1435 node_names.add(resource_state.node_name)
1436 if resource_state.in_use:
1437 in_use_by = resource_state.node_name
1439 return (node_names, in_use_by)
1441 def invalidate_resource_cache(self):
1442 """
1443 If resources are impacted by external commands like vhdutil,
1444 it's necessary to call this function to invalidate current resource
1445 cache.
1446 """
1447 self._mark_resource_cache_as_dirty()
1449 def has_node(self, node_name):
1450 """
1451 Check if a node exists in the LINSTOR database.
1452 :rtype: bool
1453 """
1454 result = self._linstor.node_list()
1455 error_str = self._get_error_str(result)
1456 if error_str:
1457 raise LinstorVolumeManagerError(
1458 'Failed to list nodes using `{}`: {}'
1459 .format(node_name, error_str)
1460 )
1461 return bool(result[0].node(node_name))
1463 def create_node(self, node_name, ip):
1464 """
1465 Create a new node in the LINSTOR database.
1466 :param str node_name: Node name to use.
1467 :param str ip: Host IP to communicate.
1468 """
1469 result = self._linstor.node_create(
1470 node_name,
1471 linstor.consts.VAL_NODE_TYPE_CMBD,
1472 ip
1473 )
1474 errors = self._filter_errors(result)
1475 if errors:
1476 error_str = self._get_error_str(errors)
1477 raise LinstorVolumeManagerError(
1478 'Failed to create node `{}`: {}'.format(node_name, error_str)
1479 )
1481 def destroy_node(self, node_name):
1482 """
1483 Destroy a node in the LINSTOR database.
1484 :param str node_name: Node name to remove.
1485 """
1486 result = self._linstor.node_delete(node_name)
1487 errors = self._filter_errors(result)
1488 if errors:
1489 error_str = self._get_error_str(errors)
1490 raise LinstorVolumeManagerError(
1491 'Failed to destroy node `{}`: {}'.format(node_name, error_str)
1492 )
1494 def create_node_interface(self, node_name, name, ip):
1495 """
1496 Create a new node interface in the LINSTOR database.
1497 :param str node_name: Node name of the interface to use.
1498 :param str name: Interface to create.
1499 :param str ip: IP of the interface.
1500 """
1501 result = self._linstor.netinterface_create(node_name, name, ip)
1502 errors = self._filter_errors(result)
1503 if errors:
1504 error_str = self._get_error_str(errors)
1505 raise LinstorVolumeManagerError(
1506 'Failed to create node interface on `{}`: {}'.format(node_name, error_str)
1507 )
1509 def destroy_node_interface(self, node_name, name):
1510 """
1511 Destroy a node interface in the LINSTOR database.
1512 :param str node_name: Node name of the interface to remove.
1513 :param str name: Interface to remove.
1514 """
1516 if name == 'default':
1517 raise LinstorVolumeManagerError(
1518 'Unable to delete the default interface of a node!'
1519 )
1521 result = self._linstor.netinterface_delete(node_name, name)
1522 errors = self._filter_errors(result)
1523 if errors:
1524 error_str = self._get_error_str(errors)
1525 raise LinstorVolumeManagerError(
1526 'Failed to destroy node interface on `{}`: {}'.format(node_name, error_str)
1527 )
1529 def modify_node_interface(self, node_name, name, ip):
1530 """
1531 Modify a node interface in the LINSTOR database. Create it if necessary.
1532 :param str node_name: Node name of the interface to use.
1533 :param str name: Interface to modify or create.
1534 :param str ip: IP of the interface.
1535 """
1536 result = self._linstor.netinterface_create(node_name, name, ip)
1537 errors = self._filter_errors(result)
1538 if not errors:
1539 return
1541 if self._check_errors(errors, [linstor.consts.FAIL_EXISTS_NET_IF]):
1542 result = self._linstor.netinterface_modify(node_name, name, ip)
1543 errors = self._filter_errors(result)
1544 if not errors:
1545 return
1547 error_str = self._get_error_str(errors)
1548 raise LinstorVolumeManagerError(
1549 'Unable to modify interface on `{}`: {}'.format(node_name, error_str)
1550 )
1552 def list_node_interfaces(self, node_name):
1553 """
1554 List all node interfaces.
1555 :param str node_name: Node name to use to list interfaces.
1556 :rtype: list
1557 :
1558 """
1559 result = self._linstor.net_interface_list(node_name)
1560 if not result:
1561 raise LinstorVolumeManagerError(
1562 'Unable to list interfaces on `{}`: no list received'.format(node_name)
1563 )
1565 interfaces = {}
1566 for interface in result:
1567 interface = interface._rest_data
1568 interfaces[interface['name']] = {
1569 'address': interface['address'],
1570 'active': interface['is_active']
1571 }
1572 return interfaces
1574 def get_node_preferred_interface(self, node_name):
1575 """
1576 Get the preferred interface used by a node.
1577 :param str node_name: Node name of the interface to get.
1578 :rtype: str
1579 """
1580 try:
1581 nodes = self._linstor.node_list_raise([node_name]).nodes
1582 if nodes:
1583 properties = nodes[0].props
1584 return properties.get('PrefNic', 'default')
1585 return nodes
1586 except Exception as e:
1587 raise LinstorVolumeManagerError(
1588 'Failed to get preferred interface: `{}`'.format(e)
1589 )
1591 def set_node_preferred_interface(self, node_name, name):
1592 """
1593 Set the preferred interface to use on a node.
1594 :param str node_name: Node name of the interface.
1595 :param str name: Preferred interface to use.
1596 """
1597 result = self._linstor.node_modify(node_name, property_dict={'PrefNic': name})
1598 errors = self._filter_errors(result)
1599 if errors:
1600 error_str = self._get_error_str(errors)
1601 raise LinstorVolumeManagerError(
1602 'Failed to set preferred node interface on `{}`: {}'.format(node_name, error_str)
1603 )
1605 def get_nodes_info(self):
1606 """
1607 Get all nodes + statuses, used or not by the pool.
1608 :rtype: dict(str, dict)
1609 """
1610 try:
1611 nodes = {}
1612 for node in self._linstor.node_list_raise().nodes:
1613 nodes[node.name] = node.connection_status
1614 return nodes
1615 except Exception as e:
1616 raise LinstorVolumeManagerError(
1617 'Failed to get all nodes: `{}`'.format(e)
1618 )
1620 def get_storage_pools_info(self):
1621 """
1622 Give all storage pools of current group name.
1623 :rtype: dict(str, list)
1624 """
1625 storage_pools = {}
1626 for pool in self._get_storage_pools(force=True):
1627 if pool.node_name not in storage_pools:
1628 storage_pools[pool.node_name] = []
1630 size = -1
1631 capacity = -1
1633 space = pool.free_space
1634 if space:
1635 size = space.free_capacity
1636 if size < 0:
1637 size = -1
1638 else:
1639 size *= 1024
1640 capacity = space.total_capacity
1641 if capacity <= 0:
1642 capacity = -1
1643 else:
1644 capacity *= 1024
1646 storage_pools[pool.node_name].append({
1647 'name': pool.name,
1648 'linstor-uuid': pool.uuid,
1649 'free-size': size,
1650 'capacity': capacity
1651 })
1653 return storage_pools
1655 def get_resources_info(self):
1656 """
1657 Give all resources of current group name.
1658 :rtype: dict(str, list)
1659 """
1660 resources = {}
1661 resource_list = self._get_resource_cache()
1662 volume_names = self.get_volumes_with_name()
1663 for resource in resource_list.resources:
1664 if resource.name not in resources:
1665 resources[resource.name] = { 'nodes': {}, 'uuid': '' }
1666 resource_nodes = resources[resource.name]['nodes']
1668 resource_nodes[resource.node_name] = {
1669 'volumes': [],
1670 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags,
1671 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags
1672 }
1673 resource_volumes = resource_nodes[resource.node_name]['volumes']
1675 for volume in resource.volumes:
1676 # We ignore diskless pools of the form "DfltDisklessStorPool".
1677 if volume.storage_pool_name != self._group_name:
1678 continue
1680 usable_size = volume.usable_size
1681 if usable_size < 0:
1682 usable_size = -1
1683 else:
1684 usable_size *= 1024
1686 allocated_size = volume.allocated_size
1687 if allocated_size < 0:
1688 allocated_size = -1
1689 else:
1690 allocated_size *= 1024
1692 resource_volumes.append({
1693 'storage-pool-name': volume.storage_pool_name,
1694 'linstor-uuid': volume.uuid,
1695 'number': volume.number,
1696 'device-path': volume.device_path,
1697 'usable-size': usable_size,
1698 'allocated-size': allocated_size
1699 })
1701 for resource_state in resource_list.resource_states:
1702 resource = resources[resource_state.rsc_name]['nodes'][resource_state.node_name]
1703 resource['in-use'] = resource_state.in_use
1705 volumes = resource['volumes']
1706 for volume_state in resource_state.volume_states:
1707 volume = next((x for x in volumes if x['number'] == volume_state.number), None)
1708 if volume:
1709 volume['disk-state'] = volume_state.disk_state
1711 for volume_uuid, volume_name in volume_names.items():
1712 resource = resources.get(volume_name)
1713 if resource:
1714 resource['uuid'] = volume_uuid
1716 return resources
1718 def get_database_path(self):
1719 """
1720 Get the database path.
1721 :return: The current database path.
1722 :rtype: str
1723 """
1724 return self._request_database_path(self._linstor)
1726 @classmethod
1727 def get_all_group_names(cls, base_name):
1728 """
1729 Get all group names. I.e. list of current group + HA.
1730 :param str base_name: The SR group_name to use.
1731 :return: List of group names.
1732 :rtype: list
1733 """
1734 return [cls._build_group_name(base_name), cls._build_ha_group_name(base_name)]
1736 @classmethod
1737 def create_sr(
1738 cls, group_name, ips, redundancy,
1739 thin_provisioning, auto_quorum,
1740 logger=default_logger.__func__
1741 ):
1742 """
1743 Create a new SR on the given nodes.
1744 :param str group_name: The SR group_name to use.
1745 :param set(str) ips: Node ips.
1746 :param int redundancy: How many copy of volumes should we store?
1747 :param bool thin_provisioning: Use thin or thick provisioning.
1748 :param bool auto_quorum: DB quorum is monitored by LINSTOR.
1749 :param function logger: Function to log messages.
1750 :return: A new LinstorSr instance.
1751 :rtype: LinstorSr
1752 """
1754 try:
1755 cls._start_controller(start=True)
1756 sr = cls._create_sr(
1757 group_name,
1758 ips,
1759 redundancy,
1760 thin_provisioning,
1761 auto_quorum,
1762 logger
1763 )
1764 finally:
1765 # Controller must be stopped and volume unmounted because
1766 # it is the role of the drbd-reactor daemon to do the right
1767 # actions.
1768 cls._start_controller(start=False)
1769 cls._mount_volume(
1770 cls.build_device_path(DATABASE_VOLUME_NAME),
1771 DATABASE_PATH,
1772 mount=False
1773 )
1774 return sr
1776 @classmethod
1777 def _create_sr(
1778 cls, group_name, ips, redundancy,
1779 thin_provisioning, auto_quorum,
1780 logger=default_logger.__func__
1781 ):
1782 # 1. Check if SR already exists.
1783 uri = 'linstor://localhost'
1785 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
1787 node_names = list(ips.keys())
1788 for node_name, ip in ips.items():
1789 while True:
1790 # Try to create node.
1791 result = lin.node_create(
1792 node_name,
1793 linstor.consts.VAL_NODE_TYPE_CMBD,
1794 ip
1795 )
1797 errors = cls._filter_errors(result)
1798 if cls._check_errors(
1799 errors, [linstor.consts.FAIL_EXISTS_NODE]
1800 ):
1801 # If it already exists, remove, then recreate.
1802 result = lin.node_delete(node_name)
1803 error_str = cls._get_error_str(result)
1804 if error_str:
1805 raise LinstorVolumeManagerError(
1806 'Failed to remove old node `{}`: {}'
1807 .format(node_name, error_str)
1808 )
1809 elif not errors:
1810 break # Created!
1811 else:
1812 raise LinstorVolumeManagerError(
1813 'Failed to create node `{}` with ip `{}`: {}'.format(
1814 node_name, ip, cls._get_error_str(errors)
1815 )
1816 )
1818 driver_pool_name = group_name
1819 base_group_name = group_name
1820 group_name = cls._build_group_name(group_name)
1821 storage_pool_name = group_name
1822 pools = lin.storage_pool_list_raise(filter_by_stor_pools=[storage_pool_name]).storage_pools
1823 if pools:
1824 existing_node_names = [pool.node_name for pool in pools]
1825 raise LinstorVolumeManagerError(
1826 'Unable to create SR `{}`. It already exists on node(s): {}'
1827 .format(group_name, existing_node_names)
1828 )
1830 if lin.resource_group_list_raise(
1831 cls.get_all_group_names(base_group_name)
1832 ).resource_groups:
1833 if not lin.resource_dfn_list_raise().resource_definitions:
1834 backup_path = cls._create_database_backup_path()
1835 logger(
1836 'Group name already exists `{}` without LVs. '
1837 'Ignoring and moving the config files in {}'.format(group_name, backup_path)
1838 )
1839 cls._move_files(DATABASE_PATH, backup_path)
1840 else:
1841 raise LinstorVolumeManagerError(
1842 'Unable to create SR `{}`: The group name already exists'
1843 .format(group_name)
1844 )
1846 if thin_provisioning:
1847 driver_pool_parts = driver_pool_name.split('/')
1848 if not len(driver_pool_parts) == 2:
1849 raise LinstorVolumeManagerError(
1850 'Invalid group name using thin provisioning. '
1851 'Expected format: \'VG/LV`\''
1852 )
1854 # 2. Create storage pool on each node + resource group.
1855 reg_volume_group_not_found = re.compile(
1856 ".*Volume group '.*' not found$"
1857 )
1859 i = 0
1860 try:
1861 # 2.a. Create storage pools.
1862 storage_pool_count = 0
1863 while i < len(node_names):
1864 node_name = node_names[i]
1866 result = lin.storage_pool_create(
1867 node_name=node_name,
1868 storage_pool_name=storage_pool_name,
1869 storage_driver='LVM_THIN' if thin_provisioning else 'LVM',
1870 driver_pool_name=driver_pool_name
1871 )
1873 errors = linstor.Linstor.filter_api_call_response_errors(
1874 result
1875 )
1876 if errors:
1877 if len(errors) == 1 and errors[0].is_error(
1878 linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR
1879 ) and reg_volume_group_not_found.match(errors[0].message):
1880 logger(
1881 'Volume group `{}` not found on `{}`. Ignoring...'
1882 .format(group_name, node_name)
1883 )
1884 cls._destroy_storage_pool(lin, storage_pool_name, node_name)
1885 else:
1886 error_str = cls._get_error_str(result)
1887 raise LinstorVolumeManagerError(
1888 'Could not create SP `{}` on node `{}`: {}'
1889 .format(group_name, node_name, error_str)
1890 )
1891 else:
1892 storage_pool_count += 1
1893 i += 1
1895 if not storage_pool_count:
1896 raise LinstorVolumeManagerError(
1897 'Unable to create SR `{}`: No VG group found'.format(
1898 group_name,
1899 )
1900 )
1902 # 2.b. Create resource groups.
1903 ha_group_name = cls._build_ha_group_name(base_group_name)
1904 cls._create_resource_group(
1905 lin,
1906 group_name,
1907 storage_pool_name,
1908 redundancy,
1909 True
1910 )
1911 cls._create_resource_group(
1912 lin,
1913 ha_group_name,
1914 storage_pool_name,
1915 3,
1916 True
1917 )
1919 # 3. Create the LINSTOR database volume and mount it.
1920 try:
1921 logger('Creating database volume...')
1922 volume_path = cls._create_database_volume(
1923 lin, ha_group_name, storage_pool_name, node_names, redundancy, auto_quorum
1924 )
1925 except LinstorVolumeManagerError as e:
1926 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
1927 logger('Destroying database volume after creation fail...')
1928 cls._force_destroy_database_volume(lin, group_name)
1929 raise
1931 try:
1932 logger('Mounting database volume...')
1934 # First we must disable the controller to move safely the
1935 # LINSTOR config.
1936 cls._start_controller(start=False)
1938 cls._mount_database_volume(volume_path)
1939 except Exception as e:
1940 # Ensure we are connected because controller has been
1941 # restarted during mount call.
1942 logger('Destroying database volume after mount fail...')
1944 try:
1945 cls._start_controller(start=True)
1946 except Exception:
1947 pass
1949 lin = cls._create_linstor_instance(
1950 uri, keep_uri_unmodified=True
1951 )
1952 cls._force_destroy_database_volume(lin, group_name)
1953 raise e
1955 cls._start_controller(start=True)
1956 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
1958 # 4. Remove storage pools/resource/volume group in the case of errors.
1959 except Exception as e:
1960 logger('Destroying resource group and storage pools after fail...')
1961 try:
1962 cls._destroy_resource_group(lin, group_name)
1963 cls._destroy_resource_group(lin, ha_group_name)
1964 except Exception as e2:
1965 logger('Failed to destroy resource group: {}'.format(e2))
1966 pass
1967 j = 0
1968 i = min(i, len(node_names) - 1)
1969 while j <= i:
1970 try:
1971 cls._destroy_storage_pool(lin, storage_pool_name, node_names[j])
1972 except Exception as e2:
1973 logger('Failed to destroy resource group: {}'.format(e2))
1974 pass
1975 j += 1
1976 raise e
1978 # 5. Return new instance.
1979 instance = cls.__new__(cls)
1980 instance._linstor = lin
1981 instance._logger = logger
1982 instance._redundancy = redundancy
1983 instance._base_group_name = base_group_name
1984 instance._group_name = group_name
1985 instance._volumes = set()
1986 instance._storage_pools_time = 0
1987 instance._kv_cache = instance._create_kv_cache()
1988 instance._resource_cache = None
1989 instance._resource_cache_dirty = True
1990 instance._volume_info_cache = None
1991 instance._volume_info_cache_dirty = True
1992 return instance
1994 @classmethod
1995 def build_device_path(cls, volume_name):
1996 """
1997 Build a device path given a volume name.
1998 :param str volume_name: The volume name to use.
1999 :return: A valid or not device path.
2000 :rtype: str
2001 """
2003 return '{}{}/0'.format(cls.DEV_ROOT_PATH, volume_name)
2005 @classmethod
2006 def build_volume_name(cls, base_name):
2007 """
2008 Build a volume name given a base name (i.e. a UUID).
2009 :param str base_name: The volume name to use.
2010 :return: A valid or not device path.
2011 :rtype: str
2012 """
2013 return '{}{}'.format(cls.PREFIX_VOLUME, base_name)
2015 @classmethod
2016 def round_up_volume_size(cls, volume_size):
2017 """
2018 Align volume size on higher multiple of BLOCK_SIZE.
2019 :param int volume_size: The volume size to align.
2020 :return: An aligned volume size.
2021 :rtype: int
2022 """
2023 return round_up(volume_size, cls.BLOCK_SIZE)
2025 @classmethod
2026 def round_down_volume_size(cls, volume_size):
2027 """
2028 Align volume size on lower multiple of BLOCK_SIZE.
2029 :param int volume_size: The volume size to align.
2030 :return: An aligned volume size.
2031 :rtype: int
2032 """
2033 return round_down(volume_size, cls.BLOCK_SIZE)
2035 # --------------------------------------------------------------------------
2036 # Private helpers.
2037 # --------------------------------------------------------------------------
2039 def _create_kv_cache(self):
2040 self._kv_cache = self._create_linstor_kv('/')
2041 self._kv_cache_dirty = False
2042 return self._kv_cache
2044 def _get_kv_cache(self):
2045 if self._kv_cache_dirty:
2046 self._kv_cache = self._create_kv_cache()
2047 return self._kv_cache
2049 def _create_resource_cache(self):
2050 self._resource_cache = self._linstor.resource_list_raise()
2051 self._resource_cache_dirty = False
2052 return self._resource_cache
2054 def _get_resource_cache(self):
2055 if self._resource_cache_dirty:
2056 self._resource_cache = self._create_resource_cache()
2057 return self._resource_cache
2059 def _mark_resource_cache_as_dirty(self):
2060 self._resource_cache_dirty = True
2061 self._volume_info_cache_dirty = True
2063 # --------------------------------------------------------------------------
2065 def _ensure_volume_exists(self, volume_uuid):
2066 if volume_uuid not in self._volumes:
2067 raise LinstorVolumeManagerError(
2068 'volume `{}` doesn\'t exist'.format(volume_uuid),
2069 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
2070 )
2072 def _find_best_size_candidates(self):
2073 result = self._linstor.resource_group_qmvs(self._group_name)
2074 error_str = self._get_error_str(result)
2075 if error_str:
2076 raise LinstorVolumeManagerError(
2077 'Failed to get max volume size allowed of SR `{}`: {}'.format(
2078 self._group_name,
2079 error_str
2080 )
2081 )
2082 return result[0].candidates
2084 def _fetch_resource_names(self, ignore_deleted=True):
2085 resource_names = set()
2086 dfns = self._linstor.resource_dfn_list_raise().resource_definitions
2087 for dfn in dfns:
2088 if dfn.resource_group_name in self.get_all_group_names(self._base_group_name) and (
2089 ignore_deleted or
2090 linstor.consts.FLAG_DELETE not in dfn.flags
2091 ):
2092 resource_names.add(dfn.name)
2093 return resource_names
2095 def _get_volumes_info(self, volume_name=None):
2096 all_volume_info = {}
2098 if not self._volume_info_cache_dirty:
2099 return self._volume_info_cache
2101 for resource in self._get_resource_cache().resources:
2102 if resource.name not in all_volume_info:
2103 current = all_volume_info[resource.name] = self.VolumeInfo(
2104 resource.name
2105 )
2106 else:
2107 current = all_volume_info[resource.name]
2109 if linstor.consts.FLAG_DISKLESS not in resource.flags:
2110 current.diskful.append(resource.node_name)
2112 for volume in resource.volumes:
2113 # We ignore diskless pools of the form "DfltDisklessStorPool".
2114 if volume.storage_pool_name == self._group_name:
2115 if volume.allocated_size < 0:
2116 raise LinstorVolumeManagerError(
2117 'Failed to get allocated size of `{}` on `{}`'
2118 .format(resource.name, volume.storage_pool_name)
2119 )
2120 allocated_size = volume.allocated_size
2122 current.allocated_size = current.allocated_size and \
2123 max(current.allocated_size, allocated_size) or \
2124 allocated_size
2126 usable_size = volume.usable_size
2127 if usable_size > 0 and (
2128 usable_size < current.virtual_size or
2129 not current.virtual_size
2130 ):
2131 current.virtual_size = usable_size
2133 if current.virtual_size <= 0:
2134 raise LinstorVolumeManagerError(
2135 'Failed to get usable size of `{}` on `{}`'
2136 .format(resource.name, volume.storage_pool_name)
2137 )
2139 for current in all_volume_info.values():
2140 current.allocated_size *= 1024
2141 current.virtual_size *= 1024
2143 self._volume_info_cache_dirty = False
2144 self._volume_info_cache = all_volume_info
2146 return all_volume_info
2148 def _get_volume_node_names_and_size(self, volume_name):
2149 node_names = set()
2150 size = -1
2151 for resource in self._linstor.resource_list_raise(
2152 filter_by_resources=[volume_name]
2153 ).resources:
2154 for volume in resource.volumes:
2155 # We ignore diskless pools of the form "DfltDisklessStorPool".
2156 if volume.storage_pool_name == self._group_name:
2157 node_names.add(resource.node_name)
2159 current_size = volume.usable_size
2160 if current_size < 0:
2161 raise LinstorVolumeManagerError(
2162 'Failed to get usable size of `{}` on `{}`'
2163 .format(resource.name, volume.storage_pool_name)
2164 )
2166 if size < 0:
2167 size = current_size
2168 else:
2169 size = min(size, current_size)
2171 return (node_names, size * 1024)
2173 def _compute_size(self, attr):
2174 capacity = 0
2175 for pool in self._get_storage_pools(force=True):
2176 space = pool.free_space
2177 if space:
2178 size = getattr(space, attr)
2179 if size < 0:
2180 raise LinstorVolumeManagerError(
2181 'Failed to get pool {} attr of `{}`'
2182 .format(attr, pool.node_name)
2183 )
2184 capacity += size
2185 return capacity * 1024
2187 def _get_node_names(self):
2188 node_names = set()
2189 for pool in self._get_storage_pools():
2190 node_names.add(pool.node_name)
2191 return node_names
2193 def _get_storage_pools(self, force=False):
2194 cur_time = time.time()
2195 elsaped_time = cur_time - self._storage_pools_time
2197 if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL:
2198 self._storage_pools = self._linstor.storage_pool_list_raise(
2199 filter_by_stor_pools=[self._group_name]
2200 ).storage_pools
2201 self._storage_pools_time = time.time()
2203 return self._storage_pools
2205 def _create_volume(
2206 self,
2207 volume_uuid,
2208 volume_name,
2209 size,
2210 place_resources,
2211 high_availability
2212 ):
2213 size = self.round_up_volume_size(size)
2214 self._mark_resource_cache_as_dirty()
2216 group_name = self._ha_group_name if high_availability else self._group_name
2217 def create_definition():
2218 first_attempt = True
2219 while True:
2220 try:
2221 self._check_volume_creation_errors(
2222 self._linstor.resource_group_spawn(
2223 rsc_grp_name=group_name,
2224 rsc_dfn_name=volume_name,
2225 vlm_sizes=['{}B'.format(size)],
2226 definitions_only=True
2227 ),
2228 volume_uuid,
2229 self._group_name
2230 )
2231 break
2232 except LinstorVolumeManagerError as e:
2233 if (
2234 not first_attempt or
2235 not high_availability or
2236 e.code != LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS
2237 ):
2238 raise
2240 first_attempt = False
2241 self._create_resource_group(
2242 self._linstor,
2243 group_name,
2244 self._group_name,
2245 3,
2246 True
2247 )
2249 self._configure_volume_peer_slots(self._linstor, volume_name)
2251 def clean():
2252 try:
2253 self._destroy_volume(volume_uuid, force=True, preserve_properties=True)
2254 except Exception as e:
2255 self._logger(
2256 'Unable to destroy volume {} after creation fail: {}'
2257 .format(volume_uuid, e)
2258 )
2260 def create():
2261 try:
2262 create_definition()
2263 if place_resources:
2264 # Basic case when we use the default redundancy of the group.
2265 self._check_volume_creation_errors(
2266 self._linstor.resource_auto_place(
2267 rsc_name=volume_name,
2268 place_count=self._redundancy,
2269 diskless_on_remaining=False
2270 ),
2271 volume_uuid,
2272 self._group_name
2273 )
2274 except LinstorVolumeManagerError as e:
2275 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
2276 clean()
2277 raise
2278 except Exception:
2279 clean()
2280 raise
2282 util.retry(create, maxretry=5)
2284 def _create_volume_with_properties(
2285 self,
2286 volume_uuid,
2287 volume_name,
2288 size,
2289 place_resources,
2290 high_availability
2291 ):
2292 if self.check_volume_exists(volume_uuid):
2293 raise LinstorVolumeManagerError(
2294 'Could not create volume `{}` from SR `{}`, it already exists'
2295 .format(volume_uuid, self._group_name) + ' in properties',
2296 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
2297 )
2299 if volume_name in self._fetch_resource_names():
2300 raise LinstorVolumeManagerError(
2301 'Could not create volume `{}` from SR `{}`, '.format(
2302 volume_uuid, self._group_name
2303 ) + 'resource of the same name already exists in LINSTOR'
2304 )
2306 # I am paranoid.
2307 volume_properties = self._get_volume_properties(volume_uuid)
2308 if (volume_properties.get(self.PROP_NOT_EXISTS) is not None):
2309 raise LinstorVolumeManagerError(
2310 'Could not create volume `{}`, '.format(volume_uuid) +
2311 'properties already exist'
2312 )
2314 try:
2315 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_CREATING
2316 volume_properties[self.PROP_VOLUME_NAME] = volume_name
2318 self._create_volume(
2319 volume_uuid,
2320 volume_name,
2321 size,
2322 place_resources,
2323 high_availability
2324 )
2326 assert volume_properties.namespace == \
2327 self._build_volume_namespace(volume_uuid)
2328 return volume_properties
2329 except LinstorVolumeManagerError as e:
2330 # Do not destroy existing resource!
2331 # In theory we can't get this error because we check this event
2332 # before the `self._create_volume` case.
2333 # It can only happen if the same volume uuid is used in the same
2334 # call in another host.
2335 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
2336 self._destroy_volume(volume_uuid, force=True)
2337 raise
2339 def _find_device_path(self, volume_uuid, volume_name):
2340 current_device_path = self._request_device_path(
2341 volume_uuid, volume_name, activate=True
2342 )
2344 # We use realpath here to get the /dev/drbd<id> path instead of
2345 # /dev/drbd/by-res/<resource_name>.
2346 expected_device_path = self.build_device_path(volume_name)
2347 util.wait_for_path(expected_device_path, 5)
2349 device_realpath = os.path.realpath(expected_device_path)
2350 if current_device_path != device_realpath:
2351 raise LinstorVolumeManagerError(
2352 'Invalid path, current={}, expected={} (realpath={})'
2353 .format(
2354 current_device_path,
2355 expected_device_path,
2356 device_realpath
2357 )
2358 )
2359 return expected_device_path
2361 def _request_device_path(self, volume_uuid, volume_name, activate=False):
2362 node_name = socket.gethostname()
2364 resource = next(filter(
2365 lambda resource: resource.node_name == node_name and
2366 resource.name == volume_name,
2367 self._get_resource_cache().resources
2368 ), None)
2370 if not resource:
2371 if activate:
2372 self._mark_resource_cache_as_dirty()
2373 self._activate_device_path(
2374 self._linstor, node_name, volume_name
2375 )
2376 return self._request_device_path(volume_uuid, volume_name)
2377 raise LinstorVolumeManagerError(
2378 'Empty dev path for `{}`, but definition "seems" to exist'
2379 .format(volume_uuid)
2380 )
2381 # Contains a path of the /dev/drbd<id> form.
2382 return resource.volumes[0].device_path
2384 def _destroy_resource(self, resource_name, force=False):
2385 result = self._linstor.resource_dfn_delete(resource_name)
2386 error_str = self._get_error_str(result)
2387 if not error_str:
2388 self._mark_resource_cache_as_dirty()
2389 return
2391 if not force:
2392 self._mark_resource_cache_as_dirty()
2393 raise LinstorVolumeManagerError(
2394 'Could not destroy resource `{}` from SR `{}`: {}'
2395 .format(resource_name, self._group_name, error_str)
2396 )
2398 # If force is used, ensure there is no opener.
2399 all_openers = get_all_volume_openers(resource_name, '0')
2400 for openers in all_openers.values():
2401 if openers:
2402 self._mark_resource_cache_as_dirty()
2403 raise LinstorVolumeManagerError(
2404 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)'
2405 .format(resource_name, self._group_name, error_str, all_openers)
2406 )
2408 # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue?
2409 resource_states = filter(
2410 lambda resource_state: resource_state.name == resource_name,
2411 self._get_resource_cache().resource_states
2412 )
2414 # Mark only after computation of states.
2415 self._mark_resource_cache_as_dirty()
2417 for resource_state in resource_states:
2418 volume_state = resource_state.volume_states[0]
2419 if resource_state.in_use:
2420 demote_drbd_resource(resource_state.node_name, resource_name)
2421 break
2422 self._destroy_resource(resource_name)
2424 def _destroy_volume(self, volume_uuid, force=False, preserve_properties=False):
2425 volume_properties = self._get_volume_properties(volume_uuid)
2426 try:
2427 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
2428 if volume_name in self._fetch_resource_names():
2429 self._destroy_resource(volume_name, force)
2431 # Assume this call is atomic.
2432 if not preserve_properties:
2433 volume_properties.clear()
2434 except Exception as e:
2435 raise LinstorVolumeManagerError(
2436 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e)
2437 )
2439 def _build_volumes(self, repair):
2440 properties = self._kv_cache
2441 resource_names = self._fetch_resource_names()
2443 self._volumes = set()
2445 updating_uuid_volumes = self._get_volumes_by_property(
2446 self.REG_UPDATING_UUID_SRC, ignore_inexisting_volumes=False
2447 )
2448 if updating_uuid_volumes and not repair:
2449 raise LinstorVolumeManagerError(
2450 'Cannot build LINSTOR volume list: '
2451 'It exists invalid "updating uuid volumes", repair is required'
2452 )
2454 existing_volumes = self._get_volumes_by_property(
2455 self.REG_NOT_EXISTS, ignore_inexisting_volumes=False
2456 )
2457 for volume_uuid, not_exists in existing_volumes.items():
2458 properties.namespace = self._build_volume_namespace(volume_uuid)
2460 src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC)
2461 if src_uuid:
2462 self._logger(
2463 'Ignoring volume during manager initialization with prop '
2464 ' PROP_UPDATING_UUID_SRC: {} (properties={})'
2465 .format(
2466 volume_uuid,
2467 self._get_filtered_properties(properties)
2468 )
2469 )
2470 continue
2472 # Insert volume in list if the volume exists. Or if the volume
2473 # is being created and a slave wants to use it (repair = False).
2474 #
2475 # If we are on the master and if repair is True and state is
2476 # Creating, it's probably a bug or crash: the creation process has
2477 # been stopped.
2478 if not_exists == self.STATE_EXISTS or (
2479 not repair and not_exists == self.STATE_CREATING
2480 ):
2481 self._volumes.add(volume_uuid)
2482 continue
2484 if not repair:
2485 self._logger(
2486 'Ignoring bad volume during manager initialization: {} '
2487 '(properties={})'.format(
2488 volume_uuid,
2489 self._get_filtered_properties(properties)
2490 )
2491 )
2492 continue
2494 # Remove bad volume.
2495 try:
2496 self._logger(
2497 'Removing bad volume during manager initialization: {} '
2498 '(properties={})'.format(
2499 volume_uuid,
2500 self._get_filtered_properties(properties)
2501 )
2502 )
2503 volume_name = properties.get(self.PROP_VOLUME_NAME)
2505 # Little optimization, don't call `self._destroy_volume`,
2506 # we already have resource name list.
2507 if volume_name in resource_names:
2508 self._destroy_resource(volume_name, force=True)
2510 # Assume this call is atomic.
2511 properties.clear()
2512 except Exception as e:
2513 # Do not raise, we don't want to block user action.
2514 self._logger(
2515 'Cannot clean volume {}: {}'.format(volume_uuid, e)
2516 )
2518 # The volume can't be removed, maybe it's still in use,
2519 # in this case rename it with the "DELETED_" prefix.
2520 # This prefix is mandatory if it exists a snap transaction to
2521 # rollback because the original VDI UUID can try to be renamed
2522 # with the UUID we are trying to delete...
2523 if not volume_uuid.startswith('DELETED_'):
2524 self.update_volume_uuid(
2525 volume_uuid, 'DELETED_' + volume_uuid, force=True
2526 )
2528 for dest_uuid, src_uuid in updating_uuid_volumes.items():
2529 dest_namespace = self._build_volume_namespace(dest_uuid)
2531 properties.namespace = dest_namespace
2532 if int(properties.get(self.PROP_NOT_EXISTS)):
2533 properties.clear()
2534 continue
2536 properties.namespace = self._build_volume_namespace(src_uuid)
2537 properties.clear()
2539 properties.namespace = dest_namespace
2540 properties.pop(self.PROP_UPDATING_UUID_SRC)
2542 if src_uuid in self._volumes:
2543 self._volumes.remove(src_uuid)
2544 self._volumes.add(dest_uuid)
2546 def _get_sr_properties(self):
2547 return self._create_linstor_kv(self._build_sr_namespace())
2549 def _get_volumes_by_property(
2550 self, reg_prop, ignore_inexisting_volumes=True
2551 ):
2552 base_properties = self._get_kv_cache()
2553 base_properties.namespace = self._build_volume_namespace()
2555 volume_properties = {}
2556 for volume_uuid in self._volumes:
2557 volume_properties[volume_uuid] = ''
2559 for key, value in base_properties.items():
2560 res = reg_prop.match(key)
2561 if res:
2562 volume_uuid = res.groups()[0]
2563 if not ignore_inexisting_volumes or \
2564 volume_uuid in self._volumes:
2565 volume_properties[volume_uuid] = value
2567 return volume_properties
2569 def _create_linstor_kv(self, namespace):
2570 return linstor.KV(
2571 self._group_name,
2572 uri=self._linstor.controller_host(),
2573 namespace=namespace
2574 )
2576 def _get_volume_properties(self, volume_uuid):
2577 properties = self._get_kv_cache()
2578 properties.namespace = self._build_volume_namespace(volume_uuid)
2579 return properties
2581 @classmethod
2582 def _build_sr_namespace(cls):
2583 return '/{}/'.format(cls.NAMESPACE_SR)
2585 @classmethod
2586 def _build_volume_namespace(cls, volume_uuid=None):
2587 # Return a path to all volumes if `volume_uuid` is not given.
2588 if volume_uuid is None:
2589 return '/{}/'.format(cls.NAMESPACE_VOLUME)
2590 return '/{}/{}/'.format(cls.NAMESPACE_VOLUME, volume_uuid)
2592 @classmethod
2593 def _get_error_str(cls, result):
2594 return ', '.join([
2595 err.message for err in cls._filter_errors(result)
2596 ])
2598 @classmethod
2599 def _create_linstor_instance(
2600 cls, uri, keep_uri_unmodified=False, attempt_count=30
2601 ):
2602 retry = False
2604 def connect(uri):
2605 if not uri:
2606 uri = get_controller_uri()
2607 if not uri:
2608 raise LinstorVolumeManagerError(
2609 'Unable to find controller uri...'
2610 )
2611 instance = linstor.Linstor(uri, keep_alive=True)
2612 instance.connect()
2613 return instance
2615 try:
2616 return connect(uri)
2617 except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError):
2618 pass
2620 if not keep_uri_unmodified:
2621 uri = None
2623 return util.retry(
2624 lambda: connect(uri),
2625 maxretry=attempt_count,
2626 period=1,
2627 exceptions=[
2628 linstor.errors.LinstorNetworkError,
2629 LinstorVolumeManagerError
2630 ]
2631 )
2633 @classmethod
2634 def _configure_volume_peer_slots(cls, lin, volume_name):
2635 result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3)
2636 error_str = cls._get_error_str(result)
2637 if error_str:
2638 raise LinstorVolumeManagerError(
2639 'Could not configure volume peer slots of {}: {}'
2640 .format(volume_name, error_str)
2641 )
2643 @classmethod
2644 def _activate_device_path(cls, lin, node_name, volume_name):
2645 result = lin.resource_make_available(node_name, volume_name, diskful=False)
2646 if linstor.Linstor.all_api_responses_no_error(result):
2647 return
2648 errors = linstor.Linstor.filter_api_call_response_errors(result)
2649 if len(errors) == 1 and errors[0].is_error(
2650 linstor.consts.FAIL_EXISTS_RSC
2651 ):
2652 return
2654 raise LinstorVolumeManagerError(
2655 'Unable to activate device path of `{}` on node `{}`: {}'
2656 .format(volume_name, node_name, ', '.join(
2657 [str(x) for x in result]))
2658 )
2660 @classmethod
2661 def _request_database_path(cls, lin, activate=False):
2662 node_name = socket.gethostname()
2664 try:
2665 resource = next(filter(
2666 lambda resource: resource.node_name == node_name and
2667 resource.name == DATABASE_VOLUME_NAME,
2668 lin.resource_list_raise().resources
2669 ), None)
2670 except Exception as e:
2671 raise LinstorVolumeManagerError(
2672 'Unable to fetch database resource: {}'
2673 .format(e)
2674 )
2676 if not resource:
2677 if activate:
2678 cls._activate_device_path(
2679 lin, node_name, DATABASE_VOLUME_NAME
2680 )
2681 return cls._request_database_path(
2682 DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME
2683 )
2684 raise LinstorVolumeManagerError(
2685 'Empty dev path for `{}`, but definition "seems" to exist'
2686 .format(DATABASE_PATH)
2687 )
2688 # Contains a path of the /dev/drbd<id> form.
2689 return resource.volumes[0].device_path
2691 @classmethod
2692 def _create_database_volume(
2693 cls, lin, group_name, storage_pool_name, node_names, redundancy, auto_quorum
2694 ):
2695 try:
2696 dfns = lin.resource_dfn_list_raise().resource_definitions
2697 except Exception as e:
2698 raise LinstorVolumeManagerError(
2699 'Unable to get definitions during database creation: {}'
2700 .format(e)
2701 )
2703 if dfns:
2704 raise LinstorVolumeManagerError(
2705 'Could not create volume `{}` from SR `{}`, '.format(
2706 DATABASE_VOLUME_NAME, group_name
2707 ) + 'LINSTOR volume list must be empty.'
2708 )
2710 # Workaround to use thin lvm. Without this line an error is returned:
2711 # "Not enough available nodes"
2712 # I don't understand why but this command protect against this bug.
2713 try:
2714 pools = lin.storage_pool_list_raise(
2715 filter_by_stor_pools=[storage_pool_name]
2716 )
2717 except Exception as e:
2718 raise LinstorVolumeManagerError(
2719 'Failed to get storage pool list before database creation: {}'
2720 .format(e)
2721 )
2723 # Ensure we have a correct list of storage pools.
2724 assert pools.storage_pools # We must have at least one storage pool!
2725 nodes_with_pool = list(map(lambda pool: pool.node_name, pools.storage_pools))
2726 for node_name in nodes_with_pool:
2727 assert node_name in node_names
2728 util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool))
2730 # Create the database definition.
2731 size = cls.round_up_volume_size(DATABASE_SIZE)
2732 cls._check_volume_creation_errors(lin.resource_group_spawn(
2733 rsc_grp_name=group_name,
2734 rsc_dfn_name=DATABASE_VOLUME_NAME,
2735 vlm_sizes=['{}B'.format(size)],
2736 definitions_only=True
2737 ), DATABASE_VOLUME_NAME, group_name)
2738 cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME)
2740 # Create real resources on the first nodes.
2741 resources = []
2743 diskful_nodes = []
2744 diskless_nodes = []
2745 for node_name in node_names:
2746 if node_name in nodes_with_pool:
2747 diskful_nodes.append(node_name)
2748 else:
2749 diskless_nodes.append(node_name)
2751 assert diskful_nodes
2752 for node_name in diskful_nodes[:redundancy]:
2753 util.SMlog('Create database diskful on {}'.format(node_name))
2754 resources.append(linstor.ResourceData(
2755 node_name=node_name,
2756 rsc_name=DATABASE_VOLUME_NAME,
2757 storage_pool=storage_pool_name
2758 ))
2759 # Create diskless resources on the remaining set.
2760 for node_name in diskful_nodes[redundancy:] + diskless_nodes:
2761 util.SMlog('Create database diskless on {}'.format(node_name))
2762 resources.append(linstor.ResourceData(
2763 node_name=node_name,
2764 rsc_name=DATABASE_VOLUME_NAME,
2765 diskless=True
2766 ))
2768 result = lin.resource_create(resources)
2769 error_str = cls._get_error_str(result)
2770 if error_str:
2771 raise LinstorVolumeManagerError(
2772 'Could not create database volume from SR `{}`: {}'.format(
2773 group_name, error_str
2774 )
2775 )
2777 # We must modify the quorum. Otherwise we can't use correctly the
2778 # drbd-reactor daemon.
2779 if auto_quorum:
2780 result = lin.resource_dfn_modify(DATABASE_VOLUME_NAME, {
2781 'DrbdOptions/auto-quorum': 'disabled',
2782 'DrbdOptions/Resource/quorum': 'majority'
2783 })
2784 error_str = cls._get_error_str(result)
2785 if error_str:
2786 raise LinstorVolumeManagerError(
2787 'Could not activate quorum on database volume: {}'
2788 .format(error_str)
2789 )
2791 # Create database and ensure path exists locally and
2792 # on replicated devices.
2793 current_device_path = cls._request_database_path(lin, activate=True)
2795 # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be
2796 # plugged.
2797 for node_name in node_names:
2798 cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME)
2800 # We use realpath here to get the /dev/drbd<id> path instead of
2801 # /dev/drbd/by-res/<resource_name>.
2802 expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME)
2803 util.wait_for_path(expected_device_path, 5)
2805 device_realpath = os.path.realpath(expected_device_path)
2806 if current_device_path != device_realpath:
2807 raise LinstorVolumeManagerError(
2808 'Invalid path, current={}, expected={} (realpath={})'
2809 .format(
2810 current_device_path,
2811 expected_device_path,
2812 device_realpath
2813 )
2814 )
2816 try:
2817 util.retry(
2818 lambda: util.pread2([DATABASE_MKFS, expected_device_path]),
2819 maxretry=5
2820 )
2821 except Exception as e:
2822 raise LinstorVolumeManagerError(
2823 'Failed to execute {} on database volume: {}'
2824 .format(DATABASE_MKFS, e)
2825 )
2827 return expected_device_path
2829 @classmethod
2830 def _destroy_database_volume(cls, lin, group_name):
2831 error_str = cls._get_error_str(
2832 lin.resource_dfn_delete(DATABASE_VOLUME_NAME)
2833 )
2834 if error_str:
2835 raise LinstorVolumeManagerError(
2836 'Could not destroy resource `{}` from SR `{}`: {}'
2837 .format(DATABASE_VOLUME_NAME, group_name, error_str)
2838 )
2840 @classmethod
2841 def _mount_database_volume(cls, volume_path, mount=True, force=False):
2842 try:
2843 # 1. Create a backup config folder.
2844 database_not_empty = bool(os.listdir(DATABASE_PATH))
2845 backup_path = cls._create_database_backup_path()
2847 # 2. Move the config in the mounted volume.
2848 if database_not_empty:
2849 cls._move_files(DATABASE_PATH, backup_path)
2851 cls._mount_volume(volume_path, DATABASE_PATH, mount)
2853 if database_not_empty:
2854 cls._move_files(backup_path, DATABASE_PATH, force)
2856 # 3. Remove useless backup directory.
2857 try:
2858 os.rmdir(backup_path)
2859 except Exception as e:
2860 raise LinstorVolumeManagerError(
2861 'Failed to remove backup path {} of LINSTOR config: {}'
2862 .format(backup_path, e)
2863 )
2864 except Exception as e:
2865 def force_exec(fn):
2866 try:
2867 fn()
2868 except Exception:
2869 pass
2871 if mount == cls._is_mounted(DATABASE_PATH):
2872 force_exec(lambda: cls._move_files(
2873 DATABASE_PATH, backup_path
2874 ))
2875 force_exec(lambda: cls._mount_volume(
2876 volume_path, DATABASE_PATH, not mount
2877 ))
2879 if mount != cls._is_mounted(DATABASE_PATH):
2880 force_exec(lambda: cls._move_files(
2881 backup_path, DATABASE_PATH
2882 ))
2884 force_exec(lambda: os.rmdir(backup_path))
2885 raise e
2887 @classmethod
2888 def _force_destroy_database_volume(cls, lin, group_name):
2889 try:
2890 cls._destroy_database_volume(lin, group_name)
2891 except Exception:
2892 pass
2894 @classmethod
2895 def _destroy_storage_pool(cls, lin, group_name, node_name):
2896 def destroy():
2897 result = lin.storage_pool_delete(node_name, group_name)
2898 errors = cls._filter_errors(result)
2899 if cls._check_errors(errors, [
2900 linstor.consts.FAIL_NOT_FOUND_STOR_POOL,
2901 linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN
2902 ]):
2903 return
2905 if errors:
2906 raise LinstorVolumeManagerError(
2907 'Failed to destroy SP `{}` on node `{}`: {}'.format(
2908 group_name,
2909 node_name,
2910 cls._get_error_str(errors)
2911 )
2912 )
2914 # We must retry to avoid errors like:
2915 # "can not be deleted as volumes / snapshot-volumes are still using it"
2916 # after LINSTOR database volume destruction.
2917 return util.retry(destroy, maxretry=10)
2919 @classmethod
2920 def _create_resource_group(
2921 cls,
2922 lin,
2923 group_name,
2924 storage_pool_name,
2925 redundancy,
2926 destroy_old_group
2927 ):
2928 rg_creation_attempt = 0
2929 while True:
2930 result = lin.resource_group_create(
2931 name=group_name,
2932 place_count=redundancy,
2933 storage_pool=storage_pool_name,
2934 diskless_on_remaining=False
2935 )
2936 error_str = cls._get_error_str(result)
2937 if not error_str:
2938 break
2940 errors = cls._filter_errors(result)
2941 if destroy_old_group and cls._check_errors(errors, [
2942 linstor.consts.FAIL_EXISTS_RSC_GRP
2943 ]):
2944 rg_creation_attempt += 1
2945 if rg_creation_attempt < 2:
2946 try:
2947 cls._destroy_resource_group(lin, group_name)
2948 except Exception as e:
2949 error_str = 'Failed to destroy old and empty RG: {}'.format(e)
2950 else:
2951 continue
2953 raise LinstorVolumeManagerError(
2954 'Could not create RG `{}`: {}'.format(
2955 group_name, error_str
2956 )
2957 )
2959 result = lin.volume_group_create(group_name)
2960 error_str = cls._get_error_str(result)
2961 if error_str:
2962 raise LinstorVolumeManagerError(
2963 'Could not create VG `{}`: {}'.format(
2964 group_name, error_str
2965 )
2966 )
2968 @classmethod
2969 def _destroy_resource_group(cls, lin, group_name):
2970 def destroy():
2971 result = lin.resource_group_delete(group_name)
2972 errors = cls._filter_errors(result)
2973 if cls._check_errors(errors, [
2974 linstor.consts.FAIL_NOT_FOUND_RSC_GRP
2975 ]):
2976 return
2978 if errors:
2979 raise LinstorVolumeManagerError(
2980 'Failed to destroy RG `{}`: {}'
2981 .format(group_name, cls._get_error_str(errors))
2982 )
2984 return util.retry(destroy, maxretry=10)
2986 @classmethod
2987 def _build_group_name(cls, base_name):
2988 # If thin provisioning is used we have a path like this:
2989 # `VG/LV`. "/" is not accepted by LINSTOR.
2990 return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_'))
2992 # Used to store important data in a HA context,
2993 # i.e. a replication count of 3.
2994 @classmethod
2995 def _build_ha_group_name(cls, base_name):
2996 return '{}{}'.format(cls.PREFIX_HA, base_name.replace('/', '_'))
2998 @classmethod
2999 def _check_volume_creation_errors(cls, result, volume_uuid, group_name):
3000 errors = cls._filter_errors(result)
3001 if cls._check_errors(errors, [
3002 linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN
3003 ]):
3004 raise LinstorVolumeManagerError(
3005 'Failed to create volume `{}` from SR `{}`, it already exists'
3006 .format(volume_uuid, group_name),
3007 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
3008 )
3010 if cls._check_errors(errors, [linstor.consts.FAIL_NOT_FOUND_RSC_GRP]):
3011 raise LinstorVolumeManagerError(
3012 'Failed to create volume `{}` from SR `{}`, resource group doesn\'t exist'
3013 .format(volume_uuid, group_name),
3014 LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS
3015 )
3017 if errors:
3018 raise LinstorVolumeManagerError(
3019 'Failed to create volume `{}` from SR `{}`: {}'.format(
3020 volume_uuid,
3021 group_name,
3022 cls._get_error_str(errors)
3023 )
3024 )
3026 @classmethod
3027 def _move_files(cls, src_dir, dest_dir, force=False):
3028 def listdir(dir):
3029 ignored = ['lost+found']
3030 return [file for file in os.listdir(dir) if file not in ignored]
3032 try:
3033 if not force:
3034 files = listdir(dest_dir)
3035 if files:
3036 raise LinstorVolumeManagerError(
3037 'Cannot move files from {} to {} because destination '
3038 'contains: {}'.format(src_dir, dest_dir, files)
3039 )
3040 except LinstorVolumeManagerError:
3041 raise
3042 except Exception as e:
3043 raise LinstorVolumeManagerError(
3044 'Cannot list dir {}: {}'.format(dest_dir, e)
3045 )
3047 try:
3048 for file in listdir(src_dir):
3049 try:
3050 dest_file = os.path.join(dest_dir, file)
3051 if not force and os.path.exists(dest_file):
3052 raise LinstorVolumeManagerError(
3053 'Cannot move {} because it already exists in the '
3054 'destination'.format(file)
3055 )
3056 shutil.move(os.path.join(src_dir, file), dest_file)
3057 except LinstorVolumeManagerError:
3058 raise
3059 except Exception as e:
3060 raise LinstorVolumeManagerError(
3061 'Cannot move {}: {}'.format(file, e)
3062 )
3063 except Exception as e:
3064 if not force:
3065 try:
3066 cls._move_files(dest_dir, src_dir, force=True)
3067 except Exception:
3068 pass
3070 raise LinstorVolumeManagerError(
3071 'Failed to move files from {} to {}: {}'.format(
3072 src_dir, dest_dir, e
3073 )
3074 )
3076 @staticmethod
3077 def _create_database_backup_path():
3078 path = DATABASE_PATH + '-' + str(uuid.uuid4())
3079 try:
3080 os.mkdir(path)
3081 return path
3082 except Exception as e:
3083 raise LinstorVolumeManagerError(
3084 'Failed to create backup path {} of LINSTOR config: {}'
3085 .format(path, e)
3086 )
3088 @staticmethod
3089 def _get_filtered_properties(properties):
3090 return dict(properties.items())
3092 @staticmethod
3093 def _filter_errors(result):
3094 return [
3095 err for err in result
3096 if hasattr(err, 'is_error') and err.is_error()
3097 ]
3099 @staticmethod
3100 def _check_errors(result, codes):
3101 for err in result:
3102 for code in codes:
3103 if err.is_error(code):
3104 return True
3105 return False
3107 @classmethod
3108 def _controller_is_running(cls):
3109 return cls._service_is_running('linstor-controller')
3111 @classmethod
3112 def _start_controller(cls, start=True):
3113 return cls._start_service('linstor-controller', start)
3115 @staticmethod
3116 def _start_service(name, start=True):
3117 action = 'start' if start else 'stop'
3118 (ret, out, err) = util.doexec([
3119 'systemctl', action, name
3120 ])
3121 if ret != 0:
3122 raise LinstorVolumeManagerError(
3123 'Failed to {} {}: {} {}'
3124 .format(action, name, out, err)
3125 )
3127 @staticmethod
3128 def _service_is_running(name):
3129 (ret, out, err) = util.doexec([
3130 'systemctl', 'is-active', '--quiet', name
3131 ])
3132 return not ret
3134 @staticmethod
3135 def _is_mounted(mountpoint):
3136 (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint])
3137 return ret == 0
3139 @classmethod
3140 def _mount_volume(cls, volume_path, mountpoint, mount=True):
3141 if mount:
3142 try:
3143 util.pread(['mount', volume_path, mountpoint])
3144 except Exception as e:
3145 raise LinstorVolumeManagerError(
3146 'Failed to mount volume {} on {}: {}'
3147 .format(volume_path, mountpoint, e)
3148 )
3149 else:
3150 try:
3151 if cls._is_mounted(mountpoint):
3152 util.pread(['umount', mountpoint])
3153 except Exception as e:
3154 raise LinstorVolumeManagerError(
3155 'Failed to umount volume {} on {}: {}'
3156 .format(volume_path, mountpoint, e)
3157 )
3160# ==============================================================================
3162# Check if a path is a DRBD resource and log the process name/pid
3163# that opened it.
3164def log_drbd_openers(path):
3165 # Ignore if it's not a symlink to DRBD resource.
3166 if not path.startswith(DRBD_BY_RES_PATH):
3167 return
3169 # Compute resource name.
3170 res_name_end = path.find('/', len(DRBD_BY_RES_PATH))
3171 if res_name_end == -1:
3172 return
3173 res_name = path[len(DRBD_BY_RES_PATH):res_name_end]
3175 volume_end = path.rfind('/')
3176 if volume_end == res_name_end:
3177 return
3178 volume = path[volume_end + 1:]
3180 try:
3181 # Ensure path is a DRBD.
3182 drbd_path = os.path.realpath(path)
3183 stats = os.stat(drbd_path)
3184 if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147:
3185 return
3187 # Find where the device is open.
3188 (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name])
3189 if ret != 0:
3190 util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format(
3191 res_name, stderr
3192 ))
3193 return
3195 # Is it a local device?
3196 if stdout.startswith('{} role:Primary'.format(res_name)):
3197 util.SMlog(
3198 'DRBD resource `{}` is open on local host: {}'
3199 .format(path, get_local_volume_openers(res_name, volume))
3200 )
3201 return
3203 # Is it a remote device?
3204 util.SMlog(
3205 'DRBD resource `{}` is open on hosts: {}'
3206 .format(path, get_all_volume_openers(res_name, volume))
3207 )
3208 except Exception as e:
3209 util.SMlog(
3210 'Got exception while trying to determine where DRBD resource ' +
3211 '`{}` is open: {}'.format(path, e)
3212 )