Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python3 

2# 

3# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr 

4# 

5# This program is free software: you can redistribute it and/or modify 

6# it under the terms of the GNU General Public License as published by 

7# the Free Software Foundation, either version 3 of the License, or 

8# (at your option) any later version. 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU General Public License for more details. 

13# 

14# You should have received a copy of the GNU General Public License 

15# along with this program. If not, see <https://www.gnu.org/licenses/>. 

16# 

17 

18 

19import distutils.util 

20import errno 

21import glob 

22import json 

23import linstor 

24import os.path 

25import re 

26import shutil 

27import socket 

28import stat 

29import time 

30import util 

31import uuid 

32 

33# Persistent prefix to add to RAW persistent volumes. 

34PERSISTENT_PREFIX = 'xcp-persistent-' 

35 

36# Contains the data of the "/var/lib/linstor" directory. 

37DATABASE_VOLUME_NAME = PERSISTENT_PREFIX + 'database' 

38DATABASE_SIZE = 1 << 30 # 1GB. 

39DATABASE_PATH = '/var/lib/linstor' 

40DATABASE_MKFS = 'mkfs.ext4' 

41 

42REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary") 

43REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$') 

44 

45DRBD_BY_RES_PATH = '/dev/drbd/by-res/' 

46 

47PLUGIN = 'linstor-manager' 

48 

49 

50# ============================================================================== 

51 

52def get_local_volume_openers(resource_name, volume): 

53 if not resource_name or volume is None: 

54 raise Exception('Cannot get DRBD openers without resource name and/or volume.') 

55 

56 path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format( 

57 resource_name, volume 

58 ) 

59 

60 with open(path, 'r') as openers: 

61 # Not a big cost, so read all lines directly. 

62 lines = openers.readlines() 

63 

64 result = {} 

65 

66 opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)') 

67 for line in lines: 

68 match = opener_re.match(line) 

69 assert match 

70 

71 groups = match.groups() 

72 process_name = groups[0] 

73 pid = groups[1] 

74 open_duration_ms = groups[2] 

75 result[pid] = { 

76 'process-name': process_name, 

77 'open-duration': open_duration_ms 

78 } 

79 

80 return json.dumps(result) 

81 

82def get_all_volume_openers(resource_name, volume): 

83 PLUGIN_CMD = 'getDrbdOpeners' 

84 

85 volume = str(volume) 

86 openers = {} 

87 

88 # Make sure this call never stucks because this function can be called 

89 # during HA init and in this case we can wait forever. 

90 session = util.timeout_call(10, util.get_localAPI_session) 

91 

92 hosts = session.xenapi.host.get_all_records() 

93 for host_ref, host_record in hosts.items(): 

94 node_name = host_record['hostname'] 

95 try: 

96 if not session.xenapi.host_metrics.get_record( 

97 host_record['metrics'] 

98 )['live']: 

99 # Ensure we call plugin on online hosts only. 

100 continue 

101 

102 openers[node_name] = json.loads( 

103 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, { 

104 'resourceName': resource_name, 

105 'volume': volume 

106 }) 

107 ) 

108 except Exception as e: 

109 util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format( 

110 resource_name, node_name, e 

111 )) 

112 

113 return openers 

114 

115 

116# ============================================================================== 

117 

118def round_up(value, divisor): 

119 assert divisor 

120 divisor = int(divisor) 

121 return ((int(value) + divisor - 1) // divisor) * divisor 

122 

123 

124def round_down(value, divisor): 

125 assert divisor 

126 value = int(value) 

127 return value - (value % int(divisor)) 

128 

129 

130# ============================================================================== 

131 

132def get_remote_host_ip(node_name): 

133 (ret, stdout, stderr) = util.doexec([ 

134 'drbdsetup', 'show', DATABASE_VOLUME_NAME, '--json' 

135 ]) 

136 if ret != 0: 

137 return 

138 

139 try: 

140 conf = json.loads(stdout) 

141 if not conf: 

142 return 

143 

144 for connection in conf[0]['connections']: 

145 if connection['net']['_name'] == node_name: 

146 value = connection['path']['_remote_host'] 

147 res = REG_DRBDSETUP_IP.match(value) 

148 if res: 

149 return res.groups()[0] 

150 break 

151 except Exception: 

152 pass 

153 

154 

155def _get_controller_uri(): 

156 PLUGIN_CMD = 'hasControllerRunning' 

157 

158 # Try to find controller using drbdadm. 

159 (ret, stdout, stderr) = util.doexec([ 

160 'drbdadm', 'status', DATABASE_VOLUME_NAME 

161 ]) 

162 if ret == 0: 

163 # If we are here, the database device exists locally. 

164 

165 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)): 

166 # Nice case, we have the controller running on this local host. 

167 return 'linstor://localhost' 

168 

169 # Try to find the host using DRBD connections. 

170 res = REG_DRBDADM_PRIMARY.search(stdout) 

171 if res: 

172 node_name = res.groups()[0] 

173 ip = get_remote_host_ip(node_name) 

174 if ip: 

175 return 'linstor://' + ip 

176 

177 # Worst case: we use many hosts in the pool (>= 4), so we can't find the 

178 # primary using drbdadm because we don't have all connections to the 

179 # replicated volume. `drbdadm status xcp-persistent-database` returns 

180 # 3 connections by default. 

181 try: 

182 session = util.timeout_call(10, util.get_localAPI_session) 

183 

184 for host_ref, host_record in session.xenapi.host.get_all_records().items(): 

185 node_name = host_record['hostname'] 

186 try: 

187 if distutils.util.strtobool( 

188 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {}) 

189 ): 

190 return 'linstor://' + host_record['address'] 

191 except Exception as e: 

192 # Can throw and exception if a host is offline. So catch it. 

193 util.SMlog('Unable to search controller on `{}`: {}'.format( 

194 node_name, e 

195 )) 

196 except: 

197 # Not found, maybe we are trying to create the SR... 

198 pass 

199 

200def get_controller_uri(): 

201 retries = 0 

202 while True: 

203 uri = _get_controller_uri() 

204 if uri: 

205 return uri 

206 

207 retries += 1 

208 if retries >= 10: 

209 break 

210 time.sleep(1) 

211 

212 

213def get_controller_node_name(): 

214 PLUGIN_CMD = 'hasControllerRunning' 

215 

216 (ret, stdout, stderr) = util.doexec([ 

217 'drbdadm', 'status', DATABASE_VOLUME_NAME 

218 ]) 

219 

220 if ret == 0: 

221 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)): 

222 return 'localhost' 

223 

224 res = REG_DRBDADM_PRIMARY.search(stdout) 

225 if res: 

226 return res.groups()[0] 

227 

228 session = util.timeout_call(5, util.get_localAPI_session) 

229 

230 for host_ref, host_record in session.xenapi.host.get_all_records().items(): 

231 node_name = host_record['hostname'] 

232 try: 

233 if not session.xenapi.host_metrics.get_record( 

234 host_record['metrics'] 

235 )['live']: 

236 continue 

237 

238 if distutils.util.strtobool(session.xenapi.host.call_plugin( 

239 host_ref, PLUGIN, PLUGIN_CMD, {} 

240 )): 

241 return node_name 

242 except Exception as e: 

243 util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format( 

244 node_name, e 

245 )) 

246 

247 

248def demote_drbd_resource(node_name, resource_name): 

249 PLUGIN_CMD = 'demoteDrbdResource' 

250 

251 session = util.timeout_call(5, util.get_localAPI_session) 

252 

253 for host_ref, host_record in session.xenapi.host.get_all_records().items(): 

254 if host_record['hostname'] != node_name: 

255 continue 

256 

257 try: 

258 session.xenapi.host.call_plugin( 

259 host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name} 

260 ) 

261 except Exception as e: 

262 util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format( 

263 resource_name, node_name, e 

264 )) 

265 raise Exception( 

266 'Can\'t demote resource `{}`, unable to find node `{}`' 

267 .format(resource_name, node_name) 

268 ) 

269 

270# ============================================================================== 

271 

272class LinstorVolumeManagerError(Exception): 

273 ERR_GENERIC = 0, 

274 ERR_VOLUME_EXISTS = 1, 

275 ERR_VOLUME_NOT_EXISTS = 2 

276 

277 def __init__(self, message, code=ERR_GENERIC): 

278 super(LinstorVolumeManagerError, self).__init__(message) 

279 self._code = code 

280 

281 @property 

282 def code(self): 

283 return self._code 

284 

285 

286# ============================================================================== 

287 

288# Note: 

289# If a storage pool is not accessible after a network change: 

290# linstor node interface modify <NODE> default --ip <IP> 

291 

292 

293class LinstorVolumeManager(object): 

294 """ 

295 API to manager LINSTOR volumes in XCP-ng. 

296 A volume in this context is a physical part of the storage layer. 

297 """ 

298 

299 __slots__ = ( 

300 '_linstor', '_logger', 

301 '_uri', '_base_group_name', 

302 '_redundancy', '_group_name', 

303 '_volumes', '_storage_pools', 

304 '_storage_pools_time', 

305 '_kv_cache', '_resource_cache', '_volume_info_cache', 

306 '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty' 

307 ) 

308 

309 DEV_ROOT_PATH = DRBD_BY_RES_PATH 

310 

311 # Default LVM extent size. 

312 BLOCK_SIZE = 4 * 1024 * 1024 

313 

314 # List of volume properties. 

315 PROP_METADATA = 'metadata' 

316 PROP_NOT_EXISTS = 'not-exists' 

317 PROP_VOLUME_NAME = 'volume-name' 

318 PROP_IS_READONLY_TIMESTAMP = 'readonly-timestamp' 

319 

320 # A volume can only be locked for a limited duration. 

321 # The goal is to give enough time to slaves to execute some actions on 

322 # a device before an UUID update or a coalesce for example. 

323 # Expiration is expressed in seconds. 

324 LOCKED_EXPIRATION_DELAY = 1 * 60 

325 

326 # Used when volume uuid is being updated. 

327 PROP_UPDATING_UUID_SRC = 'updating-uuid-src' 

328 

329 # States of property PROP_NOT_EXISTS. 

330 STATE_EXISTS = '0' 

331 STATE_NOT_EXISTS = '1' 

332 STATE_CREATING = '2' 

333 

334 # Property namespaces. 

335 NAMESPACE_SR = 'xcp/sr' 

336 NAMESPACE_VOLUME = 'xcp/volume' 

337 

338 # Regex to match properties. 

339 REG_PROP = '^([^/]+)/{}$' 

340 

341 REG_METADATA = re.compile(REG_PROP.format(PROP_METADATA)) 

342 REG_NOT_EXISTS = re.compile(REG_PROP.format(PROP_NOT_EXISTS)) 

343 REG_VOLUME_NAME = re.compile(REG_PROP.format(PROP_VOLUME_NAME)) 

344 REG_UPDATING_UUID_SRC = re.compile(REG_PROP.format(PROP_UPDATING_UUID_SRC)) 

345 

346 # Prefixes of SR/VOLUME in the LINSTOR DB. 

347 # A LINSTOR (resource, group, ...) name cannot start with a number. 

348 # So we add a prefix behind our SR/VOLUME uuids. 

349 PREFIX_SR = 'xcp-sr-' 

350 PREFIX_VOLUME = 'xcp-volume-' 

351 

352 # Limit request number when storage pool info is asked, we fetch 

353 # the current pool status after N elapsed seconds. 

354 STORAGE_POOLS_FETCH_INTERVAL = 15 

355 

356 @staticmethod 

357 def default_logger(*args): 

358 print(args) 

359 

360 # -------------------------------------------------------------------------- 

361 # API. 

362 # -------------------------------------------------------------------------- 

363 

364 class VolumeInfo(object): 

365 __slots__ = ( 

366 'name', 

367 'allocated_size', # Allocated size, place count is not used. 

368 'virtual_size', # Total virtual available size of this volume 

369 # (i.e. the user size at creation). 

370 'diskful' # Array of nodes that have a diskful volume. 

371 ) 

372 

373 def __init__(self, name): 

374 self.name = name 

375 self.allocated_size = 0 

376 self.virtual_size = 0 

377 self.diskful = [] 

378 

379 def __repr__(self): 

380 return 'VolumeInfo("{}", {}, {}, {})'.format( 

381 self.name, self.allocated_size, self.virtual_size, 

382 self.diskful 

383 ) 

384 

385 # -------------------------------------------------------------------------- 

386 

387 def __init__( 

388 self, uri, group_name, repair=False, logger=default_logger.__func__, 

389 attempt_count=30 

390 ): 

391 """ 

392 Create a new LinstorVolumeManager object. 

393 :param str uri: URI to communicate with the LINSTOR controller. 

394 :param str group_name: The SR goup name to use. 

395 :param bool repair: If true we try to remove bad volumes due to a crash 

396 or unexpected behavior. 

397 :param function logger: Function to log messages. 

398 :param int attempt_count: Number of attempts to join the controller. 

399 """ 

400 

401 self._linstor = self._create_linstor_instance( 

402 uri, attempt_count=attempt_count 

403 ) 

404 self._base_group_name = group_name 

405 

406 # Ensure group exists. 

407 group_name = self._build_group_name(group_name) 

408 groups = self._linstor.resource_group_list_raise([group_name]) 

409 groups = groups.resource_groups 

410 if not groups: 

411 raise LinstorVolumeManagerError( 

412 'Unable to find `{}` Linstor SR'.format(group_name) 

413 ) 

414 

415 # Ok. ;) 

416 self._logger = logger 

417 self._redundancy = groups[0].select_filter.place_count 

418 self._group_name = group_name 

419 self._volumes = set() 

420 self._storage_pools_time = 0 

421 

422 # To increate performance and limit request count to LINSTOR services, 

423 # we use caches. 

424 self._kv_cache = self._create_kv_cache() 

425 self._resource_cache = None 

426 self._resource_cache_dirty = True 

427 self._volume_info_cache = None 

428 self._volume_info_cache_dirty = True 

429 self._build_volumes(repair=repair) 

430 

431 @property 

432 def group_name(self): 

433 """ 

434 Give the used group name. 

435 :return: The group name. 

436 :rtype: str 

437 """ 

438 return self._base_group_name 

439 

440 @property 

441 def redundancy(self): 

442 """ 

443 Give the used redundancy. 

444 :return: The redundancy. 

445 :rtype: int 

446 """ 

447 return self._redundancy 

448 

449 @property 

450 def volumes(self): 

451 """ 

452 Give the volumes uuid set. 

453 :return: The volumes uuid set. 

454 :rtype: set(str) 

455 """ 

456 return self._volumes 

457 

458 @property 

459 def max_volume_size_allowed(self): 

460 """ 

461 Give the max volume size currently available in B. 

462 :return: The current size. 

463 :rtype: int 

464 """ 

465 

466 candidates = self._find_best_size_candidates() 

467 if not candidates: 

468 raise LinstorVolumeManagerError( 

469 'Failed to get max volume size allowed' 

470 ) 

471 

472 size = candidates[0].max_volume_size 

473 if size < 0: 

474 raise LinstorVolumeManagerError( 

475 'Invalid max volume size allowed given: {}'.format(size) 

476 ) 

477 return self.round_down_volume_size(size * 1024) 

478 

479 @property 

480 def physical_size(self): 

481 """ 

482 Give the total physical size of the SR. 

483 :return: The physical size. 

484 :rtype: int 

485 """ 

486 return self._compute_size('total_capacity') 

487 

488 @property 

489 def physical_free_size(self): 

490 """ 

491 Give the total free physical size of the SR. 

492 :return: The physical free size. 

493 :rtype: int 

494 """ 

495 return self._compute_size('free_capacity') 

496 

497 @property 

498 def allocated_volume_size(self): 

499 """ 

500 Give the allocated size for all volumes. The place count is not 

501 used here. When thick lvm is used, the size for one volume should 

502 be equal to the virtual volume size. With thin lvm, the size is equal 

503 or lower to the volume size. 

504 :return: The allocated size of all volumes. 

505 :rtype: int 

506 """ 

507 

508 # Paths: /res_name/vol_number/size 

509 sizes = {} 

510 

511 for resource in self._get_resource_cache().resources: 

512 if resource.name not in sizes: 

513 current = sizes[resource.name] = {} 

514 else: 

515 current = sizes[resource.name] 

516 

517 for volume in resource.volumes: 

518 # We ignore diskless pools of the form "DfltDisklessStorPool". 

519 if volume.storage_pool_name != self._group_name: 

520 continue 

521 

522 current_size = volume.allocated_size 

523 if current_size < 0: 

524 raise LinstorVolumeManagerError( 

525 'Failed to get allocated size of `{}` on `{}`' 

526 .format(resource.name, volume.storage_pool_name) 

527 ) 

528 current[volume.number] = max(current_size, current.get(volume.number) or 0) 

529 

530 total_size = 0 

531 for volumes in sizes.values(): 

532 for size in volumes.values(): 

533 total_size += size 

534 

535 return total_size * 1024 

536 

537 def get_min_physical_size(self): 

538 """ 

539 Give the minimum physical size of the SR. 

540 I.e. the size of the smallest disk + the number of pools. 

541 :return: The physical min size. 

542 :rtype: tuple(int, int) 

543 """ 

544 size = None 

545 pool_count = 0 

546 for pool in self._get_storage_pools(force=True): 

547 space = pool.free_space 

548 if space: 

549 pool_count += 1 

550 current_size = space.total_capacity 

551 if current_size < 0: 

552 raise LinstorVolumeManagerError( 

553 'Failed to get pool total_capacity attr of `{}`' 

554 .format(pool.node_name) 

555 ) 

556 if size is None or current_size < size: 

557 size = current_size 

558 return (pool_count, (size or 0) * 1024) 

559 

560 @property 

561 def metadata(self): 

562 """ 

563 Get the metadata of the SR. 

564 :return: Dictionary that contains metadata. 

565 :rtype: dict(str, dict) 

566 """ 

567 

568 sr_properties = self._get_sr_properties() 

569 metadata = sr_properties.get(self.PROP_METADATA) 

570 if metadata is not None: 

571 metadata = json.loads(metadata) 

572 if isinstance(metadata, dict): 

573 return metadata 

574 raise LinstorVolumeManagerError( 

575 'Expected dictionary in SR metadata: {}'.format( 

576 self._group_name 

577 ) 

578 ) 

579 

580 return {} 

581 

582 @metadata.setter 

583 def metadata(self, metadata): 

584 """ 

585 Set the metadata of the SR. 

586 :param dict metadata: Dictionary that contains metadata. 

587 """ 

588 

589 assert isinstance(metadata, dict) 

590 sr_properties = self._get_sr_properties() 

591 sr_properties[self.PROP_METADATA] = json.dumps(metadata) 

592 

593 @property 

594 def disconnected_hosts(self): 

595 """ 

596 Get the list of disconnected hosts. 

597 :return: Set that contains disconnected hosts. 

598 :rtype: set(str) 

599 """ 

600 

601 disconnected_hosts = set() 

602 for pool in self._get_storage_pools(): 

603 for report in pool.reports: 

604 if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \ 

605 linstor.consts.WARN_NOT_CONNECTED: 

606 disconnected_hosts.add(pool.node_name) 

607 break 

608 return disconnected_hosts 

609 

610 def check_volume_exists(self, volume_uuid): 

611 """ 

612 Check if a volume exists in the SR. 

613 :return: True if volume exists. 

614 :rtype: bool 

615 """ 

616 return volume_uuid in self._volumes 

617 

618 def create_volume( 

619 self, volume_uuid, size, persistent=True, volume_name=None 

620 ): 

621 """ 

622 Create a new volume on the SR. 

623 :param str volume_uuid: The volume uuid to use. 

624 :param int size: volume size in B. 

625 :param bool persistent: If false the volume will be unavailable 

626 on the next constructor call LinstorSR(...). 

627 :param str volume_name: If set, this name is used in the LINSTOR 

628 database instead of a generated name. 

629 :return: The current device path of the volume. 

630 :rtype: str 

631 """ 

632 

633 self._logger('Creating LINSTOR volume {}...'.format(volume_uuid)) 

634 if not volume_name: 

635 volume_name = self.build_volume_name(util.gen_uuid()) 

636 volume_properties = self._create_volume_with_properties( 

637 volume_uuid, volume_name, size, place_resources=True 

638 ) 

639 

640 # Volume created! Now try to find the device path. 

641 try: 

642 self._logger( 

643 'Find device path of LINSTOR volume {}...'.format(volume_uuid) 

644 ) 

645 device_path = self._find_device_path(volume_uuid, volume_name) 

646 if persistent: 

647 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

648 self._volumes.add(volume_uuid) 

649 self._logger( 

650 'LINSTOR volume {} created!'.format(volume_uuid) 

651 ) 

652 return device_path 

653 except Exception as e: 

654 # There is an issue to find the path. 

655 # At this point the volume has just been created, so force flag can be used. 

656 self._destroy_volume(volume_uuid, force=True) 

657 raise 

658 

659 def mark_volume_as_persistent(self, volume_uuid): 

660 """ 

661 Mark volume as persistent if created with persistent=False. 

662 :param str volume_uuid: The volume uuid to mark. 

663 """ 

664 

665 self._ensure_volume_exists(volume_uuid) 

666 

667 # Mark volume as persistent. 

668 volume_properties = self._get_volume_properties(volume_uuid) 

669 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

670 

671 def destroy_volume(self, volume_uuid): 

672 """ 

673 Destroy a volume. 

674 :param str volume_uuid: The volume uuid to destroy. 

675 """ 

676 

677 self._ensure_volume_exists(volume_uuid) 

678 self.ensure_volume_is_not_locked(volume_uuid) 

679 

680 # Mark volume as destroyed. 

681 volume_properties = self._get_volume_properties(volume_uuid) 

682 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS 

683 

684 self._volumes.remove(volume_uuid) 

685 self._destroy_volume(volume_uuid) 

686 

687 def lock_volume(self, volume_uuid, locked=True): 

688 """ 

689 Prevent modifications of the volume properties during 

690 "self.LOCKED_EXPIRATION_DELAY" seconds. The SR must be locked 

691 when used. This method is useful to attach/detach correctly a volume on 

692 a slave. Without it the GC can rename a volume, in this case the old 

693 volume path can be used by a slave... 

694 :param str volume_uuid: The volume uuid to protect/unprotect. 

695 :param bool locked: Lock/unlock the volume. 

696 """ 

697 

698 self._ensure_volume_exists(volume_uuid) 

699 

700 self._logger( 

701 '{} volume {} as locked'.format( 

702 'Mark' if locked else 'Unmark', 

703 volume_uuid 

704 ) 

705 ) 

706 

707 volume_properties = self._get_volume_properties(volume_uuid) 

708 if locked: 

709 volume_properties[ 

710 self.PROP_IS_READONLY_TIMESTAMP 

711 ] = str(time.time()) 

712 elif self.PROP_IS_READONLY_TIMESTAMP in volume_properties: 

713 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP) 

714 

715 def ensure_volume_is_not_locked(self, volume_uuid, timeout=None): 

716 """ 

717 Ensure a volume is not locked. Wait if necessary. 

718 :param str volume_uuid: The volume uuid to check. 

719 :param int timeout: If the volume is always locked after the expiration 

720 of the timeout, an exception is thrown. 

721 """ 

722 return self.ensure_volume_list_is_not_locked([volume_uuid], timeout) 

723 

724 def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None): 

725 checked = set() 

726 for volume_uuid in volume_uuids: 

727 if volume_uuid in self._volumes: 

728 checked.add(volume_uuid) 

729 

730 if not checked: 

731 return 

732 

733 waiting = False 

734 

735 volume_properties = self._get_kv_cache() 

736 

737 start = time.time() 

738 while True: 

739 # Can't delete in for loop, use a copy of the list. 

740 remaining = checked.copy() 

741 for volume_uuid in checked: 

742 volume_properties.namespace = \ 

743 self._build_volume_namespace(volume_uuid) 

744 timestamp = volume_properties.get( 

745 self.PROP_IS_READONLY_TIMESTAMP 

746 ) 

747 if timestamp is None: 

748 remaining.remove(volume_uuid) 

749 continue 

750 

751 now = time.time() 

752 if now - float(timestamp) > self.LOCKED_EXPIRATION_DELAY: 

753 self._logger( 

754 'Remove readonly timestamp on {}'.format(volume_uuid) 

755 ) 

756 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP) 

757 remaining.remove(volume_uuid) 

758 continue 

759 

760 if not waiting: 

761 self._logger( 

762 'Volume {} is locked, waiting...'.format(volume_uuid) 

763 ) 

764 waiting = True 

765 break 

766 

767 if not remaining: 

768 break 

769 checked = remaining 

770 

771 if timeout is not None and now - start > timeout: 

772 raise LinstorVolumeManagerError( 

773 'volume `{}` is locked and timeout has been reached' 

774 .format(volume_uuid), 

775 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS 

776 ) 

777 

778 # We must wait to use the volume. After that we can modify it 

779 # ONLY if the SR is locked to avoid bad reads on the slaves. 

780 time.sleep(1) 

781 volume_properties = self._create_kv_cache() 

782 

783 if waiting: 

784 self._logger('No volume locked now!') 

785 

786 def remove_volume_if_diskless(self, volume_uuid): 

787 """ 

788 Remove disless path from local node. 

789 :param str volume_uuid: The volume uuid to remove. 

790 """ 

791 

792 self._ensure_volume_exists(volume_uuid) 

793 

794 volume_properties = self._get_volume_properties(volume_uuid) 

795 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

796 

797 node_name = socket.gethostname() 

798 result = self._linstor.resource_delete_if_diskless( 

799 node_name=node_name, rsc_name=volume_name 

800 ) 

801 if not linstor.Linstor.all_api_responses_no_error(result): 

802 raise LinstorVolumeManagerError( 

803 'Unable to delete diskless path of `{}` on node `{}`: {}' 

804 .format(volume_name, node_name, ', '.join( 

805 [str(x) for x in result])) 

806 ) 

807 

808 def introduce_volume(self, volume_uuid): 

809 pass # TODO: Implement me. 

810 

811 def resize_volume(self, volume_uuid, new_size): 

812 """ 

813 Resize a volume. 

814 :param str volume_uuid: The volume uuid to resize. 

815 :param int new_size: New size in B. 

816 """ 

817 

818 volume_name = self.get_volume_name(volume_uuid) 

819 self.ensure_volume_is_not_locked(volume_uuid) 

820 new_size = self.round_up_volume_size(new_size) 

821 

822 result = self._linstor.volume_dfn_modify( 

823 rsc_name=volume_name, 

824 volume_nr=0, 

825 size=new_size // 1024 

826 ) 

827 

828 self._mark_resource_cache_as_dirty() 

829 

830 error_str = self._get_error_str(result) 

831 if error_str: 

832 raise LinstorVolumeManagerError( 

833 'Could not resize volume `{}` from SR `{}`: {}' 

834 .format(volume_uuid, self._group_name, error_str) 

835 ) 

836 

837 def get_volume_name(self, volume_uuid): 

838 """ 

839 Get the name of a particular volume. 

840 :param str volume_uuid: The volume uuid of the name to get. 

841 :return: The volume name. 

842 :rtype: str 

843 """ 

844 

845 self._ensure_volume_exists(volume_uuid) 

846 volume_properties = self._get_volume_properties(volume_uuid) 

847 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

848 if volume_name: 

849 return volume_name 

850 raise LinstorVolumeManagerError( 

851 'Failed to get volume name of {}'.format(volume_uuid) 

852 ) 

853 

854 def get_volume_size(self, volume_uuid): 

855 """ 

856 Get the size of a particular volume. 

857 :param str volume_uuid: The volume uuid of the size to get. 

858 :return: The volume size. 

859 :rtype: int 

860 """ 

861 

862 volume_name = self.get_volume_name(volume_uuid) 

863 dfns = self._linstor.resource_dfn_list_raise( 

864 query_volume_definitions=True, 

865 filter_by_resource_definitions=[volume_name] 

866 ).resource_definitions 

867 

868 size = dfns[0].volume_definitions[0].size 

869 if size < 0: 

870 raise LinstorVolumeManagerError( 

871 'Failed to get volume size of: {}'.format(volume_uuid) 

872 ) 

873 return size * 1024 

874 

875 def set_auto_promote_timeout(self, volume_uuid, timeout): 

876 """ 

877 Define the blocking time of open calls when a DRBD 

878 is already open on another host. 

879 :param str volume_uuid: The volume uuid to modify. 

880 """ 

881 

882 volume_name = self.get_volume_name(volume_uuid) 

883 result = self._linstor.resource_dfn_modify(volume_name, { 

884 'DrbdOptions/Resource/auto-promote-timeout': timeout 

885 }) 

886 error_str = self._get_error_str(result) 

887 if error_str: 

888 raise LinstorVolumeManagerError( 

889 'Could not change the auto promote timeout of `{}`: {}' 

890 .format(volume_uuid, error_str) 

891 ) 

892 

893 def get_volume_info(self, volume_uuid): 

894 """ 

895 Get the volume info of a particular volume. 

896 :param str volume_uuid: The volume uuid of the volume info to get. 

897 :return: The volume info. 

898 :rtype: VolumeInfo 

899 """ 

900 

901 volume_name = self.get_volume_name(volume_uuid) 

902 return self._get_volumes_info()[volume_name] 

903 

904 def get_device_path(self, volume_uuid): 

905 """ 

906 Get the dev path of a volume, create a diskless if necessary. 

907 :param str volume_uuid: The volume uuid to get the dev path. 

908 :return: The current device path of the volume. 

909 :rtype: str 

910 """ 

911 

912 volume_name = self.get_volume_name(volume_uuid) 

913 return self._find_device_path(volume_uuid, volume_name) 

914 

915 def get_volume_uuid_from_device_path(self, device_path): 

916 """ 

917 Get the volume uuid of a device_path. 

918 :param str device_path: The dev path to find the volume uuid. 

919 :return: The volume uuid of the local device path. 

920 :rtype: str 

921 """ 

922 

923 expected_volume_name = \ 

924 self.get_volume_name_from_device_path(device_path) 

925 

926 volume_names = self.get_volumes_with_name() 

927 for volume_uuid, volume_name in volume_names.items(): 

928 if volume_name == expected_volume_name: 

929 return volume_uuid 

930 

931 raise LinstorVolumeManagerError( 

932 'Unable to find volume uuid from dev path `{}`'.format(device_path) 

933 ) 

934 

935 def get_volume_name_from_device_path(self, device_path): 

936 """ 

937 Get the volume name of a device_path. 

938 :param str device_path: The dev path to find the volume name. 

939 :return: The volume name of the device path. 

940 :rtype: str 

941 """ 

942 

943 # Assume that we have a path like this: 

944 # - "/dev/drbd/by-res/xcp-volume-<UUID>/0" 

945 # - "../xcp-volume-<UUID>/0" 

946 if device_path.startswith(DRBD_BY_RES_PATH): 

947 prefix_len = len(DRBD_BY_RES_PATH) 

948 else: 

949 assert device_path.startswith('../') 

950 prefix_len = 3 

951 

952 res_name_end = device_path.find('/', prefix_len) 

953 assert res_name_end != -1 

954 return device_path[prefix_len:res_name_end] 

955 

956 def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False): 

957 """ 

958 Change the uuid of a volume. 

959 :param str volume_uuid: The volume to modify. 

960 :param str new_volume_uuid: The new volume uuid to use. 

961 :param bool force: If true we doesn't check if volume_uuid is in the 

962 volume list. I.e. the volume can be marked as deleted but the volume 

963 can still be in the LINSTOR KV store if the deletion has failed. 

964 In specific cases like "undo" after a failed clone we must rename a bad 

965 deleted VDI. 

966 """ 

967 

968 assert volume_uuid != new_volume_uuid 

969 

970 self._logger( 

971 'Trying to update volume UUID {} to {}...' 

972 .format(volume_uuid, new_volume_uuid) 

973 ) 

974 if not force: 

975 self._ensure_volume_exists(volume_uuid) 

976 self.ensure_volume_is_not_locked(volume_uuid) 

977 

978 if new_volume_uuid in self._volumes: 

979 raise LinstorVolumeManagerError( 

980 'Volume `{}` already exists'.format(new_volume_uuid), 

981 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

982 ) 

983 

984 volume_properties = self._get_volume_properties(volume_uuid) 

985 if volume_properties.get(self.PROP_UPDATING_UUID_SRC): 

986 raise LinstorVolumeManagerError( 

987 'Cannot update volume uuid {}: invalid state' 

988 .format(volume_uuid) 

989 ) 

990 

991 # 1. Copy in temp variables metadata and volume_name. 

992 metadata = volume_properties.get(self.PROP_METADATA) 

993 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

994 

995 # 2. Switch to new volume namespace. 

996 volume_properties.namespace = self._build_volume_namespace( 

997 new_volume_uuid 

998 ) 

999 

1000 if list(volume_properties.items()): 

1001 raise LinstorVolumeManagerError( 

1002 'Cannot update volume uuid {} to {}: ' 

1003 .format(volume_uuid, new_volume_uuid) + 

1004 'this last one is not empty' 

1005 ) 

1006 

1007 try: 

1008 # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC. 

1009 # If we crash after that, the new properties can be removed 

1010 # properly. 

1011 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS 

1012 volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid 

1013 

1014 # 4. Copy the properties. 

1015 # Note: On new volumes, during clone for example, the metadata 

1016 # may be missing. So we must test it to avoid this error: 

1017 # "None has to be a str/unicode, but is <type 'NoneType'>" 

1018 if metadata: 

1019 volume_properties[self.PROP_METADATA] = metadata 

1020 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

1021 

1022 # 5. Ok! 

1023 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

1024 except Exception as e: 

1025 try: 

1026 # Clear the new volume properties in case of failure. 

1027 assert volume_properties.namespace == \ 

1028 self._build_volume_namespace(new_volume_uuid) 

1029 volume_properties.clear() 

1030 except Exception as e: 

1031 self._logger( 

1032 'Failed to clear new volume properties: {} (ignoring...)' 

1033 .format(e) 

1034 ) 

1035 raise LinstorVolumeManagerError( 

1036 'Failed to copy volume properties: {}'.format(e) 

1037 ) 

1038 

1039 try: 

1040 # 6. After this point, it's ok we can remove the 

1041 # PROP_UPDATING_UUID_SRC property and clear the src properties 

1042 # without problems. 

1043 

1044 # 7. Switch to old volume namespace. 

1045 volume_properties.namespace = self._build_volume_namespace( 

1046 volume_uuid 

1047 ) 

1048 volume_properties.clear() 

1049 

1050 # 8. Switch a last time to new volume namespace. 

1051 volume_properties.namespace = self._build_volume_namespace( 

1052 new_volume_uuid 

1053 ) 

1054 volume_properties.pop(self.PROP_UPDATING_UUID_SRC) 

1055 except Exception as e: 

1056 raise LinstorVolumeManagerError( 

1057 'Failed to clear volume properties ' 

1058 'after volume uuid update: {}'.format(e) 

1059 ) 

1060 

1061 self._volumes.remove(volume_uuid) 

1062 self._volumes.add(new_volume_uuid) 

1063 

1064 self._logger( 

1065 'UUID update succeeded of {} to {}! (properties={})' 

1066 .format( 

1067 volume_uuid, new_volume_uuid, 

1068 self._get_filtered_properties(volume_properties) 

1069 ) 

1070 ) 

1071 

1072 def update_volume_name(self, volume_uuid, volume_name): 

1073 """ 

1074 Change the volume name of a volume. 

1075 :param str volume_uuid: The volume to modify. 

1076 :param str volume_name: The volume_name to use. 

1077 """ 

1078 

1079 self._ensure_volume_exists(volume_uuid) 

1080 self.ensure_volume_is_not_locked(volume_uuid) 

1081 if not volume_name.startswith(self.PREFIX_VOLUME): 

1082 raise LinstorVolumeManagerError( 

1083 'Volume name `{}` must be start with `{}`' 

1084 .format(volume_name, self.PREFIX_VOLUME) 

1085 ) 

1086 

1087 if volume_name not in self._fetch_resource_names(): 

1088 raise LinstorVolumeManagerError( 

1089 'Volume `{}` doesn\'t exist'.format(volume_name) 

1090 ) 

1091 

1092 volume_properties = self._get_volume_properties(volume_uuid) 

1093 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

1094 

1095 def get_usage_states(self, volume_uuid): 

1096 """ 

1097 Check if a volume is currently used. 

1098 :param str volume_uuid: The volume uuid to check. 

1099 :return: A dictionnary that contains states. 

1100 :rtype: dict(str, bool or None) 

1101 """ 

1102 

1103 states = {} 

1104 

1105 volume_name = self.get_volume_name(volume_uuid) 

1106 for resource_state in self._linstor.resource_list_raise( 

1107 filter_by_resources=[volume_name] 

1108 ).resource_states: 

1109 states[resource_state.node_name] = resource_state.in_use 

1110 

1111 return states 

1112 

1113 def get_volume_openers(self, volume_uuid): 

1114 """ 

1115 Get openers of a volume. 

1116 :param str volume_uuid: The volume uuid to monitor. 

1117 :return: A dictionnary that contains openers. 

1118 :rtype: dict(str, obj) 

1119 """ 

1120 return get_all_volume_openers(self.get_volume_name(volume_uuid), '0') 

1121 

1122 def get_volumes_with_name(self): 

1123 """ 

1124 Give a volume dictionnary that contains names actually owned. 

1125 :return: A volume/name dict. 

1126 :rtype: dict(str, str) 

1127 """ 

1128 return self._get_volumes_by_property(self.REG_VOLUME_NAME) 

1129 

1130 def get_volumes_with_info(self): 

1131 """ 

1132 Give a volume dictionnary that contains VolumeInfos. 

1133 :return: A volume/VolumeInfo dict. 

1134 :rtype: dict(str, VolumeInfo) 

1135 """ 

1136 

1137 volumes = {} 

1138 

1139 all_volume_info = self._get_volumes_info() 

1140 volume_names = self.get_volumes_with_name() 

1141 for volume_uuid, volume_name in volume_names.items(): 

1142 if volume_name: 

1143 volume_info = all_volume_info.get(volume_name) 

1144 if volume_info: 

1145 volumes[volume_uuid] = volume_info 

1146 continue 

1147 

1148 # Well I suppose if this volume is not available, 

1149 # LINSTOR has been used directly without using this API. 

1150 volumes[volume_uuid] = self.VolumeInfo('') 

1151 

1152 return volumes 

1153 

1154 def get_volumes_with_metadata(self): 

1155 """ 

1156 Give a volume dictionnary that contains metadata. 

1157 :return: A volume/metadata dict. 

1158 :rtype: dict(str, dict) 

1159 """ 

1160 

1161 volumes = {} 

1162 

1163 metadata = self._get_volumes_by_property(self.REG_METADATA) 

1164 for volume_uuid, volume_metadata in metadata.items(): 

1165 if volume_metadata: 

1166 volume_metadata = json.loads(volume_metadata) 

1167 if isinstance(volume_metadata, dict): 

1168 volumes[volume_uuid] = volume_metadata 

1169 continue 

1170 raise LinstorVolumeManagerError( 

1171 'Expected dictionary in volume metadata: {}' 

1172 .format(volume_uuid) 

1173 ) 

1174 

1175 volumes[volume_uuid] = {} 

1176 

1177 return volumes 

1178 

1179 def get_volume_metadata(self, volume_uuid): 

1180 """ 

1181 Get the metadata of a volume. 

1182 :return: Dictionary that contains metadata. 

1183 :rtype: dict 

1184 """ 

1185 

1186 self._ensure_volume_exists(volume_uuid) 

1187 volume_properties = self._get_volume_properties(volume_uuid) 

1188 metadata = volume_properties.get(self.PROP_METADATA) 

1189 if metadata: 

1190 metadata = json.loads(metadata) 

1191 if isinstance(metadata, dict): 

1192 return metadata 

1193 raise LinstorVolumeManagerError( 

1194 'Expected dictionary in volume metadata: {}' 

1195 .format(volume_uuid) 

1196 ) 

1197 return {} 

1198 

1199 def set_volume_metadata(self, volume_uuid, metadata): 

1200 """ 

1201 Set the metadata of a volume. 

1202 :param dict metadata: Dictionary that contains metadata. 

1203 """ 

1204 

1205 self._ensure_volume_exists(volume_uuid) 

1206 self.ensure_volume_is_not_locked(volume_uuid) 

1207 

1208 assert isinstance(metadata, dict) 

1209 volume_properties = self._get_volume_properties(volume_uuid) 

1210 volume_properties[self.PROP_METADATA] = json.dumps(metadata) 

1211 

1212 def update_volume_metadata(self, volume_uuid, metadata): 

1213 """ 

1214 Update the metadata of a volume. It modify only the given keys. 

1215 It doesn't remove unreferenced key instead of set_volume_metadata. 

1216 :param dict metadata: Dictionary that contains metadata. 

1217 """ 

1218 

1219 self._ensure_volume_exists(volume_uuid) 

1220 self.ensure_volume_is_not_locked(volume_uuid) 

1221 

1222 assert isinstance(metadata, dict) 

1223 volume_properties = self._get_volume_properties(volume_uuid) 

1224 

1225 current_metadata = json.loads( 

1226 volume_properties.get(self.PROP_METADATA, '{}') 

1227 ) 

1228 if not isinstance(metadata, dict): 

1229 raise LinstorVolumeManagerError( 

1230 'Expected dictionary in volume metadata: {}' 

1231 .format(volume_uuid) 

1232 ) 

1233 

1234 for key, value in metadata.items(): 

1235 current_metadata[key] = value 

1236 volume_properties[self.PROP_METADATA] = json.dumps(current_metadata) 

1237 

1238 def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True): 

1239 """ 

1240 Clone a volume. Do not copy the data, this method creates a new volume 

1241 with the same size. It tries to create the volume on the same host 

1242 than volume source. 

1243 :param str volume_uuid: The volume to clone. 

1244 :param str clone_uuid: The cloned volume. 

1245 :param bool persistent: If false the volume will be unavailable 

1246 on the next constructor call LinstorSR(...). 

1247 :return: The current device path of the cloned volume. 

1248 :rtype: str 

1249 """ 

1250 

1251 volume_name = self.get_volume_name(volume_uuid) 

1252 self.ensure_volume_is_not_locked(volume_uuid) 

1253 

1254 # 1. Find ideal nodes + size to use. 

1255 ideal_node_names, size = self._get_volume_node_names_and_size( 

1256 volume_name 

1257 ) 

1258 if size <= 0: 

1259 raise LinstorVolumeManagerError( 

1260 'Invalid size of {} for volume `{}`'.format(size, volume_name) 

1261 ) 

1262 

1263 # 2. Find the node(s) with the maximum space. 

1264 candidates = self._find_best_size_candidates() 

1265 if not candidates: 

1266 raise LinstorVolumeManagerError( 

1267 'Unable to shallow clone volume `{}`, no free space found.' 

1268 ) 

1269 

1270 # 3. Compute node names and search if we can try to clone 

1271 # on the same nodes than volume. 

1272 def find_best_nodes(): 

1273 for candidate in candidates: 

1274 for node_name in candidate.node_names: 

1275 if node_name in ideal_node_names: 

1276 return candidate.node_names 

1277 

1278 node_names = find_best_nodes() 

1279 if not node_names: 

1280 node_names = candidates[0].node_names 

1281 

1282 if len(node_names) < self._redundancy: 

1283 raise LinstorVolumeManagerError( 

1284 'Unable to shallow clone volume `{}`, '.format(volume_uuid) + 

1285 '{} are required to clone, found: {}'.format( 

1286 self._redundancy, len(node_names) 

1287 ) 

1288 ) 

1289 

1290 # 4. Compute resources to create. 

1291 clone_volume_name = self.build_volume_name(util.gen_uuid()) 

1292 diskless_node_names = self._get_node_names() 

1293 resources = [] 

1294 for node_name in node_names: 

1295 diskless_node_names.remove(node_name) 

1296 resources.append(linstor.ResourceData( 

1297 node_name=node_name, 

1298 rsc_name=clone_volume_name, 

1299 storage_pool=self._group_name 

1300 )) 

1301 

1302 # 5. Create resources! 

1303 def clean(): 

1304 try: 

1305 self._destroy_volume(clone_uuid, force=True) 

1306 except Exception as e: 

1307 self._logger( 

1308 'Unable to destroy volume {} after shallow clone fail: {}' 

1309 .format(clone_uuid, e) 

1310 ) 

1311 

1312 def create(): 

1313 # Note: placed outside try/except block because we create only definition first. 

1314 # There is no reason to call `clean` before the real resource creation. 

1315 volume_properties = self._create_volume_with_properties( 

1316 clone_uuid, clone_volume_name, size, place_resources=False 

1317 ) 

1318 

1319 # After this point, `clean` can be called for any fail because the clone UUID 

1320 # is really unique. No risk to remove existing data. 

1321 try: 

1322 result = self._linstor.resource_create(resources) 

1323 error_str = self._get_error_str(result) 

1324 if error_str: 

1325 raise LinstorVolumeManagerError( 

1326 'Could not create cloned volume `{}` of `{}` from ' 

1327 'SR `{}`: {}'.format( 

1328 clone_uuid, volume_uuid, self._group_name, 

1329 error_str 

1330 ) 

1331 ) 

1332 return volume_properties 

1333 except Exception: 

1334 clean() 

1335 raise 

1336 

1337 # Retry because we can get errors like this: 

1338 # "Resource disappeared while waiting for it to be ready" or 

1339 # "Resource did not became ready on node 'XXX' within reasonable time, check Satellite for errors." 

1340 # in the LINSTOR server. 

1341 volume_properties = util.retry(create, maxretry=5) 

1342 

1343 try: 

1344 device_path = self._find_device_path(clone_uuid, clone_volume_name) 

1345 if persistent: 

1346 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

1347 self._volumes.add(clone_uuid) 

1348 return device_path 

1349 except Exception as e: 

1350 clean() 

1351 raise 

1352 

1353 def remove_resourceless_volumes(self): 

1354 """ 

1355 Remove all volumes without valid or non-empty name 

1356 (i.e. without LINSTOR resource). It's different than 

1357 LinstorVolumeManager constructor that takes a `repair` param that 

1358 removes volumes with `PROP_NOT_EXISTS` to 1. 

1359 """ 

1360 

1361 resource_names = self._fetch_resource_names() 

1362 for volume_uuid, volume_name in self.get_volumes_with_name().items(): 

1363 if not volume_name or volume_name not in resource_names: 

1364 # Don't force, we can be sure of what's happening. 

1365 self.destroy_volume(volume_uuid) 

1366 

1367 def destroy(self): 

1368 """ 

1369 Destroy this SR. Object should not be used after that. 

1370 :param bool force: Try to destroy volumes before if true. 

1371 """ 

1372 

1373 # 1. Ensure volume list is empty. No cost. 

1374 if self._volumes: 

1375 raise LinstorVolumeManagerError( 

1376 'Cannot destroy LINSTOR volume manager: ' 

1377 'It exists remaining volumes' 

1378 ) 

1379 

1380 # 2. Fetch ALL resource names. 

1381 # This list may therefore contain volumes created outside 

1382 # the scope of the driver. 

1383 resource_names = self._fetch_resource_names(ignore_deleted=False) 

1384 try: 

1385 resource_names.remove(DATABASE_VOLUME_NAME) 

1386 except KeyError: 

1387 # Really strange to reach that point. 

1388 # Normally we always have the database volume in the list. 

1389 pass 

1390 

1391 # 3. Ensure the resource name list is entirely empty... 

1392 if resource_names: 

1393 raise LinstorVolumeManagerError( 

1394 'Cannot destroy LINSTOR volume manager: ' 

1395 'It exists remaining volumes (created externally or being deleted)' 

1396 ) 

1397 

1398 # 4. Destroying... 

1399 controller_is_running = self._controller_is_running() 

1400 uri = 'linstor://localhost' 

1401 try: 

1402 if controller_is_running: 

1403 self._start_controller(start=False) 

1404 

1405 # 4.1. Umount LINSTOR database. 

1406 self._mount_database_volume( 

1407 self.build_device_path(DATABASE_VOLUME_NAME), 

1408 mount=False, 

1409 force=True 

1410 ) 

1411 

1412 # 4.2. Refresh instance. 

1413 self._start_controller(start=True) 

1414 self._linstor = self._create_linstor_instance( 

1415 uri, keep_uri_unmodified=True 

1416 ) 

1417 

1418 # 4.3. Destroy database volume. 

1419 self._destroy_resource(DATABASE_VOLUME_NAME) 

1420 

1421 # 4.4. Refresh linstor connection. 

1422 # Without we get this error: 

1423 # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.." 

1424 # Because the deletion of the databse was not seen by Linstor for some reason. 

1425 # It seems a simple refresh of the Linstor connection make it aware of the deletion. 

1426 self._linstor.disconnect() 

1427 self._linstor.connect() 

1428 

1429 # 4.5. Destroy group and storage pools. 

1430 self._destroy_resource_group(self._linstor, self._group_name) 

1431 for pool in self._get_storage_pools(force=True): 

1432 self._destroy_storage_pool( 

1433 self._linstor, pool.name, pool.node_name 

1434 ) 

1435 except Exception as e: 

1436 self._start_controller(start=controller_is_running) 

1437 raise e 

1438 

1439 try: 

1440 self._start_controller(start=False) 

1441 for file in glob.glob(DATABASE_PATH + '/'): 

1442 os.remove(file) 

1443 except Exception as e: 

1444 util.SMlog( 

1445 'Ignoring failure after LINSTOR SR destruction: {}' 

1446 .format(e) 

1447 ) 

1448 

1449 def find_up_to_date_diskful_nodes(self, volume_uuid): 

1450 """ 

1451 Find all nodes that contain a specific volume using diskful disks. 

1452 The disk must be up to data to be used. 

1453 :param str volume_uuid: The volume to use. 

1454 :return: The available nodes. 

1455 :rtype: tuple(set(str), str) 

1456 """ 

1457 

1458 volume_name = self.get_volume_name(volume_uuid) 

1459 

1460 in_use_by = None 

1461 node_names = set() 

1462 

1463 resource_states = filter( 

1464 lambda resource_state: resource_state.name == volume_name, 

1465 self._get_resource_cache().resource_states 

1466 ) 

1467 

1468 for resource_state in resource_states: 

1469 volume_state = resource_state.volume_states[0] 

1470 if volume_state.disk_state == 'UpToDate': 

1471 node_names.add(resource_state.node_name) 

1472 if resource_state.in_use: 

1473 in_use_by = resource_state.node_name 

1474 

1475 return (node_names, in_use_by) 

1476 

1477 def invalidate_resource_cache(self): 

1478 """ 

1479 If resources are impacted by external commands like vhdutil, 

1480 it's necessary to call this function to invalidate current resource 

1481 cache. 

1482 """ 

1483 self._mark_resource_cache_as_dirty() 

1484 

1485 def has_node(self, node_name): 

1486 """ 

1487 Check if a node exists in the LINSTOR database. 

1488 :rtype: bool 

1489 """ 

1490 result = self._linstor.node_list() 

1491 error_str = self._get_error_str(result) 

1492 if error_str: 

1493 raise LinstorVolumeManagerError( 

1494 'Failed to list nodes using `{}`: {}' 

1495 .format(node_name, error_str) 

1496 ) 

1497 return bool(result[0].node(node_name)) 

1498 

1499 def create_node(self, node_name, ip): 

1500 """ 

1501 Create a new node in the LINSTOR database. 

1502 :param str node_name: Node name to use. 

1503 :param str ip: Host IP to communicate. 

1504 """ 

1505 result = self._linstor.node_create( 

1506 node_name, 

1507 linstor.consts.VAL_NODE_TYPE_CMBD, 

1508 ip 

1509 ) 

1510 errors = self._filter_errors(result) 

1511 if errors: 

1512 error_str = self._get_error_str(errors) 

1513 raise LinstorVolumeManagerError( 

1514 'Failed to create node `{}`: {}'.format(node_name, error_str) 

1515 ) 

1516 

1517 def destroy_node(self, node_name): 

1518 """ 

1519 Destroy a node in the LINSTOR database. 

1520 :param str node_name: Node name to remove. 

1521 """ 

1522 result = self._linstor.node_delete(node_name) 

1523 errors = self._filter_errors(result) 

1524 if errors: 

1525 error_str = self._get_error_str(errors) 

1526 raise LinstorVolumeManagerError( 

1527 'Failed to destroy node `{}`: {}'.format(node_name, error_str) 

1528 ) 

1529 

1530 def create_node_interface(self, node_name, name, ip): 

1531 """ 

1532 Create a new node interface in the LINSTOR database. 

1533 :param str node_name: Node name of the interface to use. 

1534 :param str name: Interface to create. 

1535 :param str ip: IP of the interface. 

1536 """ 

1537 result = self._linstor.netinterface_create(node_name, name, ip) 

1538 errors = self._filter_errors(result) 

1539 if errors: 

1540 error_str = self._get_error_str(errors) 

1541 raise LinstorVolumeManagerError( 

1542 'Failed to create node interface on `{}`: {}'.format(node_name, error_str) 

1543 ) 

1544 

1545 def destroy_node_interface(self, node_name, name): 

1546 """ 

1547 Destroy a node interface in the LINSTOR database. 

1548 :param str node_name: Node name of the interface to remove. 

1549 :param str name: Interface to remove. 

1550 """ 

1551 result = self._linstor.netinterface_delete(node_name, name) 

1552 errors = self._filter_errors(result) 

1553 if errors: 

1554 error_str = self._get_error_str(errors) 

1555 raise LinstorVolumeManagerError( 

1556 'Failed to destroy node interface on `{}`: {}'.format(node_name, error_str) 

1557 ) 

1558 

1559 def modify_node_interface(self, node_name, name, ip): 

1560 """ 

1561 Modify a node interface in the LINSTOR database. Create it if necessary. 

1562 :param str node_name: Node name of the interface to use. 

1563 :param str name: Interface to modify or create. 

1564 :param str ip: IP of the interface. 

1565 """ 

1566 result = self._linstor.netinterface_create(node_name, name, ip) 

1567 errors = self._filter_errors(result) 

1568 if not errors: 

1569 return 

1570 

1571 if self._check_errors(errors, [linstor.consts.FAIL_EXISTS_NET_IF]): 

1572 result = self._linstor.netinterface_modify(node_name, name, ip) 

1573 errors = self._filter_errors(result) 

1574 if not errors: 

1575 return 

1576 

1577 error_str = self._get_error_str(errors) 

1578 raise LinstorVolumeManagerError( 

1579 'Unable to modify interface on `{}`: {}'.format(node_name, error_str) 

1580 ) 

1581 

1582 def list_node_interfaces(self, node_name): 

1583 """ 

1584 List all node interfaces. 

1585 :param str node_name: Node name to use to list interfaces. 

1586 :rtype: list 

1587 : 

1588 """ 

1589 result = self._linstor.net_interface_list(node_name) 

1590 if not result: 

1591 raise LinstorVolumeManagerError( 

1592 'Unable to list interfaces on `{}`: no list received'.format(node_name) 

1593 ) 

1594 

1595 interfaces = {} 

1596 for interface in result: 

1597 interface = interface._rest_data 

1598 interfaces[interface['name']] = { 

1599 'address': interface['address'], 

1600 'active': interface['is_active'] 

1601 } 

1602 return interfaces 

1603 

1604 def set_node_preferred_interface(self, node_name, name): 

1605 """ 

1606 Set the preferred interface to use on a node. 

1607 :param str node_name: Node name of the interface. 

1608 :param str name: Preferred interface to use. 

1609 """ 

1610 result = self._linstor.node_modify(node_name, property_dict={'PrefNic': name}) 

1611 errors = self._filter_errors(result) 

1612 if errors: 

1613 error_str = self._get_error_str(errors) 

1614 raise LinstorVolumeManagerError( 

1615 'Failed to set preferred node interface on `{}`: {}'.format(node_name, error_str) 

1616 ) 

1617 

1618 def get_nodes_info(self): 

1619 """ 

1620 Get all nodes + statuses, used or not by the pool. 

1621 :rtype: dict(str, dict) 

1622 """ 

1623 try: 

1624 nodes = {} 

1625 for node in self._linstor.node_list_raise().nodes: 

1626 nodes[node.name] = node.connection_status 

1627 return nodes 

1628 except Exception as e: 

1629 raise LinstorVolumeManagerError( 

1630 'Failed to get all nodes: `{}`'.format(e) 

1631 ) 

1632 

1633 def get_storage_pools_info(self): 

1634 """ 

1635 Give all storage pools of current group name. 

1636 :rtype: dict(str, list) 

1637 """ 

1638 storage_pools = {} 

1639 for pool in self._get_storage_pools(force=True): 

1640 if pool.node_name not in storage_pools: 

1641 storage_pools[pool.node_name] = [] 

1642 

1643 size = -1 

1644 capacity = -1 

1645 

1646 space = pool.free_space 

1647 if space: 

1648 size = space.free_capacity 

1649 if size < 0: 

1650 size = -1 

1651 else: 

1652 size *= 1024 

1653 capacity = space.total_capacity 

1654 if capacity <= 0: 

1655 capacity = -1 

1656 else: 

1657 capacity *= 1024 

1658 

1659 storage_pools[pool.node_name].append({ 

1660 'storage-pool-name': pool.name, 

1661 'uuid': pool.uuid, 

1662 'free-size': size, 

1663 'capacity': capacity 

1664 }) 

1665 

1666 return storage_pools 

1667 

1668 def get_resources_info(self): 

1669 """ 

1670 Give all resources of current group name. 

1671 :rtype: dict(str, list) 

1672 """ 

1673 resources = {} 

1674 resource_list = self._linstor.resource_list_raise() 

1675 for resource in resource_list.resources: 

1676 if resource.name not in resources: 

1677 resources[resource.name] = {} 

1678 

1679 resources[resource.name][resource.node_name] = { 

1680 'volumes': [], 

1681 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags, 

1682 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags 

1683 } 

1684 

1685 for volume in resource.volumes: 

1686 # We ignore diskless pools of the form "DfltDisklessStorPool". 

1687 if volume.storage_pool_name != self._group_name: 

1688 continue 

1689 

1690 usable_size = volume.usable_size 

1691 if usable_size < 0: 

1692 usable_size = -1 

1693 else: 

1694 usable_size *= 1024 

1695 

1696 allocated_size = volume.allocated_size 

1697 if allocated_size < 0: 

1698 allocated_size = -1 

1699 else: 

1700 allocated_size *= 1024 

1701 

1702 resources[resource.name][resource.node_name]['volumes'].append({ 

1703 'storage-pool-name': volume.storage_pool_name, 

1704 'uuid': volume.uuid, 

1705 'number': volume.number, 

1706 'device-path': volume.device_path, 

1707 'usable-size': usable_size, 

1708 'allocated-size': allocated_size 

1709 }) 

1710 

1711 for resource_state in resource_list.resource_states: 

1712 resource = resources[resource_state.rsc_name][resource_state.node_name] 

1713 resource['in-use'] = resource_state.in_use 

1714 

1715 volumes = resource['volumes'] 

1716 for volume_state in resource_state.volume_states: 

1717 volume = next((x for x in volumes if x['number'] == volume_state.number), None) 

1718 if volume: 

1719 volume['disk-state'] = volume_state.disk_state 

1720 

1721 return resources 

1722 

1723 def get_database_path(self): 

1724 """ 

1725 Get the database path. 

1726 :return: The current database path. 

1727 :rtype: str 

1728 """ 

1729 return self._request_database_path(self._linstor) 

1730 

1731 @classmethod 

1732 def create_sr( 

1733 cls, group_name, ips, redundancy, 

1734 thin_provisioning, auto_quorum, 

1735 logger=default_logger.__func__ 

1736 ): 

1737 """ 

1738 Create a new SR on the given nodes. 

1739 :param str group_name: The SR group_name to use. 

1740 :param set(str) ips: Node ips. 

1741 :param int redundancy: How many copy of volumes should we store? 

1742 :param bool thin_provisioning: Use thin or thick provisioning. 

1743 :param bool auto_quorum: DB quorum is monitored by LINSTOR. 

1744 :param function logger: Function to log messages. 

1745 :return: A new LinstorSr instance. 

1746 :rtype: LinstorSr 

1747 """ 

1748 

1749 try: 

1750 cls._start_controller(start=True) 

1751 sr = cls._create_sr( 

1752 group_name, 

1753 ips, 

1754 redundancy, 

1755 thin_provisioning, 

1756 auto_quorum, 

1757 logger 

1758 ) 

1759 finally: 

1760 # Controller must be stopped and volume unmounted because 

1761 # it is the role of the drbd-reactor daemon to do the right 

1762 # actions. 

1763 cls._start_controller(start=False) 

1764 cls._mount_volume( 

1765 cls.build_device_path(DATABASE_VOLUME_NAME), 

1766 DATABASE_PATH, 

1767 mount=False 

1768 ) 

1769 return sr 

1770 

1771 @classmethod 

1772 def _create_sr( 

1773 cls, group_name, ips, redundancy, 

1774 thin_provisioning, auto_quorum, 

1775 logger=default_logger.__func__ 

1776 ): 

1777 # 1. Check if SR already exists. 

1778 uri = 'linstor://localhost' 

1779 

1780 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) 

1781 

1782 node_names = list(ips.keys()) 

1783 for node_name, ip in ips.items(): 

1784 while True: 

1785 # Try to create node. 

1786 result = lin.node_create( 

1787 node_name, 

1788 linstor.consts.VAL_NODE_TYPE_CMBD, 

1789 ip 

1790 ) 

1791 

1792 errors = cls._filter_errors(result) 

1793 if cls._check_errors( 

1794 errors, [linstor.consts.FAIL_EXISTS_NODE] 

1795 ): 

1796 # If it already exists, remove, then recreate. 

1797 result = lin.node_delete(node_name) 

1798 error_str = cls._get_error_str(result) 

1799 if error_str: 

1800 raise LinstorVolumeManagerError( 

1801 'Failed to remove old node `{}`: {}' 

1802 .format(node_name, error_str) 

1803 ) 

1804 elif not errors: 

1805 break # Created! 

1806 else: 

1807 raise LinstorVolumeManagerError( 

1808 'Failed to create node `{}` with ip `{}`: {}'.format( 

1809 node_name, ip, cls._get_error_str(errors) 

1810 ) 

1811 ) 

1812 

1813 driver_pool_name = group_name 

1814 base_group_name = group_name 

1815 group_name = cls._build_group_name(group_name) 

1816 pools = lin.storage_pool_list_raise(filter_by_stor_pools=[group_name]) 

1817 pools = pools.storage_pools 

1818 if pools: 

1819 existing_node_names = [pool.node_name for pool in pools] 

1820 raise LinstorVolumeManagerError( 

1821 'Unable to create SR `{}`. It already exists on node(s): {}' 

1822 .format(group_name, existing_node_names) 

1823 ) 

1824 

1825 if lin.resource_group_list_raise( 

1826 [group_name] 

1827 ).resource_groups: 

1828 if not lin.resource_dfn_list_raise().resource_definitions: 

1829 backup_path = cls._create_database_backup_path() 

1830 logger( 

1831 'Group name already exists `{}` without LVs. ' 

1832 'Ignoring and moving the config files in {}'.format(group_name, backup_path) 

1833 ) 

1834 cls._move_files(DATABASE_PATH, backup_path) 

1835 else: 

1836 raise LinstorVolumeManagerError( 

1837 'Unable to create SR `{}`: The group name already exists' 

1838 .format(group_name) 

1839 ) 

1840 

1841 if thin_provisioning: 

1842 driver_pool_parts = driver_pool_name.split('/') 

1843 if not len(driver_pool_parts) == 2: 

1844 raise LinstorVolumeManagerError( 

1845 'Invalid group name using thin provisioning. ' 

1846 'Expected format: \'VG/LV`\'' 

1847 ) 

1848 

1849 # 2. Create storage pool on each node + resource group. 

1850 reg_volume_group_not_found = re.compile( 

1851 ".*Volume group '.*' not found$" 

1852 ) 

1853 

1854 i = 0 

1855 try: 

1856 # 2.a. Create storage pools. 

1857 storage_pool_count = 0 

1858 while i < len(node_names): 

1859 node_name = node_names[i] 

1860 

1861 result = lin.storage_pool_create( 

1862 node_name=node_name, 

1863 storage_pool_name=group_name, 

1864 storage_driver='LVM_THIN' if thin_provisioning else 'LVM', 

1865 driver_pool_name=driver_pool_name 

1866 ) 

1867 

1868 errors = linstor.Linstor.filter_api_call_response_errors( 

1869 result 

1870 ) 

1871 if errors: 

1872 if len(errors) == 1 and errors[0].is_error( 

1873 linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR 

1874 ) and reg_volume_group_not_found.match(errors[0].message): 

1875 logger( 

1876 'Volume group `{}` not found on `{}`. Ignoring...' 

1877 .format(group_name, node_name) 

1878 ) 

1879 cls._destroy_storage_pool(lin, group_name, node_name) 

1880 else: 

1881 error_str = cls._get_error_str(result) 

1882 raise LinstorVolumeManagerError( 

1883 'Could not create SP `{}` on node `{}`: {}' 

1884 .format(group_name, node_name, error_str) 

1885 ) 

1886 else: 

1887 storage_pool_count += 1 

1888 i += 1 

1889 

1890 if not storage_pool_count: 

1891 raise LinstorVolumeManagerError( 

1892 'Unable to create SR `{}`: No VG group found'.format( 

1893 group_name, 

1894 ) 

1895 ) 

1896 

1897 # 2.b. Create resource group. 

1898 rg_creation_attempt = 0 

1899 while True: 

1900 result = lin.resource_group_create( 

1901 name=group_name, 

1902 place_count=redundancy, 

1903 storage_pool=group_name, 

1904 diskless_on_remaining=False 

1905 ) 

1906 error_str = cls._get_error_str(result) 

1907 if not error_str: 

1908 break 

1909 

1910 errors = cls._filter_errors(result) 

1911 if cls._check_errors(errors, [linstor.consts.FAIL_EXISTS_RSC_GRP]): 

1912 rg_creation_attempt += 1 

1913 if rg_creation_attempt < 2: 

1914 try: 

1915 cls._destroy_resource_group(lin, group_name) 

1916 except Exception as e: 

1917 error_str = 'Failed to destroy old and empty RG: {}'.format(e) 

1918 else: 

1919 continue 

1920 

1921 raise LinstorVolumeManagerError( 

1922 'Could not create RG `{}`: {}'.format(group_name, error_str) 

1923 ) 

1924 

1925 # 2.c. Create volume group. 

1926 result = lin.volume_group_create(group_name) 

1927 error_str = cls._get_error_str(result) 

1928 if error_str: 

1929 raise LinstorVolumeManagerError( 

1930 'Could not create VG `{}`: {}'.format( 

1931 group_name, error_str 

1932 ) 

1933 ) 

1934 

1935 # 3. Create the LINSTOR database volume and mount it. 

1936 try: 

1937 logger('Creating database volume...') 

1938 volume_path = cls._create_database_volume( 

1939 lin, group_name, node_names, redundancy, auto_quorum 

1940 ) 

1941 except LinstorVolumeManagerError as e: 

1942 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

1943 logger('Destroying database volume after creation fail...') 

1944 cls._force_destroy_database_volume(lin, group_name) 

1945 raise 

1946 

1947 try: 

1948 logger('Mounting database volume...') 

1949 

1950 # First we must disable the controller to move safely the 

1951 # LINSTOR config. 

1952 cls._start_controller(start=False) 

1953 

1954 cls._mount_database_volume(volume_path) 

1955 except Exception as e: 

1956 # Ensure we are connected because controller has been 

1957 # restarted during mount call. 

1958 logger('Destroying database volume after mount fail...') 

1959 

1960 try: 

1961 cls._start_controller(start=True) 

1962 except Exception: 

1963 pass 

1964 

1965 lin = cls._create_linstor_instance( 

1966 uri, keep_uri_unmodified=True 

1967 ) 

1968 cls._force_destroy_database_volume(lin, group_name) 

1969 raise e 

1970 

1971 cls._start_controller(start=True) 

1972 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) 

1973 

1974 # 4. Remove storage pools/resource/volume group in the case of errors. 

1975 except Exception as e: 

1976 logger('Destroying resource group and storage pools after fail...') 

1977 try: 

1978 cls._destroy_resource_group(lin, group_name) 

1979 except Exception as e2: 

1980 logger('Failed to destroy resource group: {}'.format(e2)) 

1981 pass 

1982 j = 0 

1983 i = min(i, len(node_names) - 1) 

1984 while j <= i: 

1985 try: 

1986 cls._destroy_storage_pool(lin, group_name, node_names[j]) 

1987 except Exception as e2: 

1988 logger('Failed to destroy resource group: {}'.format(e2)) 

1989 pass 

1990 j += 1 

1991 raise e 

1992 

1993 # 5. Return new instance. 

1994 instance = cls.__new__(cls) 

1995 instance._linstor = lin 

1996 instance._logger = logger 

1997 instance._redundancy = redundancy 

1998 instance._base_group_name = base_group_name 

1999 instance._group_name = group_name 

2000 instance._volumes = set() 

2001 instance._storage_pools_time = 0 

2002 instance._kv_cache = instance._create_kv_cache() 

2003 instance._resource_cache = None 

2004 instance._resource_cache_dirty = True 

2005 instance._volume_info_cache = None 

2006 instance._volume_info_cache_dirty = True 

2007 return instance 

2008 

2009 @classmethod 

2010 def build_device_path(cls, volume_name): 

2011 """ 

2012 Build a device path given a volume name. 

2013 :param str volume_name: The volume name to use. 

2014 :return: A valid or not device path. 

2015 :rtype: str 

2016 """ 

2017 

2018 return '{}{}/0'.format(cls.DEV_ROOT_PATH, volume_name) 

2019 

2020 @classmethod 

2021 def build_volume_name(cls, base_name): 

2022 """ 

2023 Build a volume name given a base name (i.e. a UUID). 

2024 :param str volume_name: The volume name to use. 

2025 :return: A valid or not device path. 

2026 :rtype: str 

2027 """ 

2028 return '{}{}'.format(cls.PREFIX_VOLUME, base_name) 

2029 

2030 @classmethod 

2031 def round_up_volume_size(cls, volume_size): 

2032 """ 

2033 Align volume size on higher multiple of BLOCK_SIZE. 

2034 :param int volume_size: The volume size to align. 

2035 :return: An aligned volume size. 

2036 :rtype: int 

2037 """ 

2038 return round_up(volume_size, cls.BLOCK_SIZE) 

2039 

2040 @classmethod 

2041 def round_down_volume_size(cls, volume_size): 

2042 """ 

2043 Align volume size on lower multiple of BLOCK_SIZE. 

2044 :param int volume_size: The volume size to align. 

2045 :return: An aligned volume size. 

2046 :rtype: int 

2047 """ 

2048 return round_down(volume_size, cls.BLOCK_SIZE) 

2049 

2050 # -------------------------------------------------------------------------- 

2051 # Private helpers. 

2052 # -------------------------------------------------------------------------- 

2053 

2054 def _create_kv_cache(self): 

2055 self._kv_cache = self._create_linstor_kv('/') 

2056 self._kv_cache_dirty = False 

2057 return self._kv_cache 

2058 

2059 def _get_kv_cache(self): 

2060 if self._kv_cache_dirty: 

2061 self._kv_cache = self._create_kv_cache() 

2062 return self._kv_cache 

2063 

2064 def _create_resource_cache(self): 

2065 self._resource_cache = self._linstor.resource_list_raise() 

2066 self._resource_cache_dirty = False 

2067 return self._resource_cache 

2068 

2069 def _get_resource_cache(self): 

2070 if self._resource_cache_dirty: 

2071 self._resource_cache = self._create_resource_cache() 

2072 return self._resource_cache 

2073 

2074 def _mark_resource_cache_as_dirty(self): 

2075 self._resource_cache_dirty = True 

2076 self._volume_info_cache_dirty = True 

2077 

2078 # -------------------------------------------------------------------------- 

2079 

2080 def _ensure_volume_exists(self, volume_uuid): 

2081 if volume_uuid not in self._volumes: 

2082 raise LinstorVolumeManagerError( 

2083 'volume `{}` doesn\'t exist'.format(volume_uuid), 

2084 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS 

2085 ) 

2086 

2087 def _find_best_size_candidates(self): 

2088 result = self._linstor.resource_group_qmvs(self._group_name) 

2089 error_str = self._get_error_str(result) 

2090 if error_str: 

2091 raise LinstorVolumeManagerError( 

2092 'Failed to get max volume size allowed of SR `{}`: {}'.format( 

2093 self._group_name, 

2094 error_str 

2095 ) 

2096 ) 

2097 return result[0].candidates 

2098 

2099 def _fetch_resource_names(self, ignore_deleted=True): 

2100 resource_names = set() 

2101 dfns = self._linstor.resource_dfn_list_raise().resource_definitions 

2102 for dfn in dfns: 

2103 if dfn.resource_group_name == self._group_name and ( 

2104 ignore_deleted or 

2105 linstor.consts.FLAG_DELETE not in dfn.flags 

2106 ): 

2107 resource_names.add(dfn.name) 

2108 return resource_names 

2109 

2110 def _get_volumes_info(self, volume_name=None): 

2111 all_volume_info = {} 

2112 

2113 if not self._volume_info_cache_dirty: 

2114 return self._volume_info_cache 

2115 

2116 for resource in self._get_resource_cache().resources: 

2117 if resource.name not in all_volume_info: 

2118 current = all_volume_info[resource.name] = self.VolumeInfo( 

2119 resource.name 

2120 ) 

2121 else: 

2122 current = all_volume_info[resource.name] 

2123 

2124 if linstor.consts.FLAG_DISKLESS not in resource.flags: 

2125 current.diskful.append(resource.node_name) 

2126 

2127 for volume in resource.volumes: 

2128 # We ignore diskless pools of the form "DfltDisklessStorPool". 

2129 if volume.storage_pool_name == self._group_name: 

2130 if volume.allocated_size < 0: 

2131 raise LinstorVolumeManagerError( 

2132 'Failed to get allocated size of `{}` on `{}`' 

2133 .format(resource.name, volume.storage_pool_name) 

2134 ) 

2135 allocated_size = volume.allocated_size 

2136 

2137 current.allocated_size = current.allocated_size and \ 

2138 max(current.allocated_size, allocated_size) or \ 

2139 allocated_size 

2140 

2141 usable_size = volume.usable_size 

2142 if usable_size > 0 and ( 

2143 usable_size < current.virtual_size or 

2144 not current.virtual_size 

2145 ): 

2146 current.virtual_size = usable_size 

2147 

2148 if current.virtual_size <= 0: 

2149 raise LinstorVolumeManagerError( 

2150 'Failed to get usable size of `{}` on `{}`' 

2151 .format(resource.name, volume.storage_pool_name) 

2152 ) 

2153 

2154 for current in all_volume_info.values(): 

2155 current.allocated_size *= 1024 

2156 current.virtual_size *= 1024 

2157 

2158 self._volume_info_cache_dirty = False 

2159 self._volume_info_cache = all_volume_info 

2160 

2161 return all_volume_info 

2162 

2163 def _get_volume_node_names_and_size(self, volume_name): 

2164 node_names = set() 

2165 size = -1 

2166 for resource in self._linstor.resource_list_raise( 

2167 filter_by_resources=[volume_name] 

2168 ).resources: 

2169 for volume in resource.volumes: 

2170 # We ignore diskless pools of the form "DfltDisklessStorPool". 

2171 if volume.storage_pool_name == self._group_name: 

2172 node_names.add(resource.node_name) 

2173 

2174 current_size = volume.usable_size 

2175 if current_size < 0: 

2176 raise LinstorVolumeManagerError( 

2177 'Failed to get usable size of `{}` on `{}`' 

2178 .format(resource.name, volume.storage_pool_name) 

2179 ) 

2180 

2181 if size < 0: 

2182 size = current_size 

2183 else: 

2184 size = min(size, current_size) 

2185 

2186 return (node_names, size * 1024) 

2187 

2188 def _compute_size(self, attr): 

2189 capacity = 0 

2190 for pool in self._get_storage_pools(force=True): 

2191 space = pool.free_space 

2192 if space: 

2193 size = getattr(space, attr) 

2194 if size < 0: 

2195 raise LinstorVolumeManagerError( 

2196 'Failed to get pool {} attr of `{}`' 

2197 .format(attr, pool.node_name) 

2198 ) 

2199 capacity += size 

2200 return capacity * 1024 

2201 

2202 def _get_node_names(self): 

2203 node_names = set() 

2204 for pool in self._get_storage_pools(): 

2205 node_names.add(pool.node_name) 

2206 return node_names 

2207 

2208 def _get_storage_pools(self, force=False): 

2209 cur_time = time.time() 

2210 elsaped_time = cur_time - self._storage_pools_time 

2211 

2212 if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL: 

2213 self._storage_pools = self._linstor.storage_pool_list_raise( 

2214 filter_by_stor_pools=[self._group_name] 

2215 ).storage_pools 

2216 self._storage_pools_time = time.time() 

2217 

2218 return self._storage_pools 

2219 

2220 def _create_volume( 

2221 self, volume_uuid, volume_name, size, place_resources 

2222 ): 

2223 size = self.round_up_volume_size(size) 

2224 self._mark_resource_cache_as_dirty() 

2225 

2226 def create_definition(): 

2227 self._check_volume_creation_errors( 

2228 self._linstor.resource_group_spawn( 

2229 rsc_grp_name=self._group_name, 

2230 rsc_dfn_name=volume_name, 

2231 vlm_sizes=['{}B'.format(size)], 

2232 definitions_only=True 

2233 ), 

2234 volume_uuid, 

2235 self._group_name 

2236 ) 

2237 self._configure_volume_peer_slots(self._linstor, volume_name) 

2238 

2239 def clean(): 

2240 try: 

2241 self._destroy_volume(volume_uuid, force=True) 

2242 except Exception as e: 

2243 self._logger( 

2244 'Unable to destroy volume {} after creation fail: {}' 

2245 .format(volume_uuid, e) 

2246 ) 

2247 

2248 def create(): 

2249 try: 

2250 create_definition() 

2251 if place_resources: 

2252 # Basic case when we use the default redundancy of the group. 

2253 self._check_volume_creation_errors( 

2254 self._linstor.resource_auto_place( 

2255 rsc_name=volume_name, 

2256 place_count=self._redundancy, 

2257 diskless_on_remaining=False 

2258 ), 

2259 volume_uuid, 

2260 self._group_name 

2261 ) 

2262 except LinstorVolumeManagerError as e: 

2263 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

2264 clean() 

2265 raise 

2266 except Exception: 

2267 clean() 

2268 raise 

2269 

2270 util.retry(create, maxretry=5) 

2271 

2272 def _create_volume_with_properties( 

2273 self, volume_uuid, volume_name, size, place_resources 

2274 ): 

2275 if self.check_volume_exists(volume_uuid): 

2276 raise LinstorVolumeManagerError( 

2277 'Could not create volume `{}` from SR `{}`, it already exists' 

2278 .format(volume_uuid, self._group_name) + ' in properties', 

2279 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

2280 ) 

2281 

2282 if volume_name in self._fetch_resource_names(): 

2283 raise LinstorVolumeManagerError( 

2284 'Could not create volume `{}` from SR `{}`, '.format( 

2285 volume_uuid, self._group_name 

2286 ) + 'resource of the same name already exists in LINSTOR' 

2287 ) 

2288 

2289 # I am paranoid. 

2290 volume_properties = self._get_volume_properties(volume_uuid) 

2291 if (volume_properties.get(self.PROP_NOT_EXISTS) is not None): 

2292 raise LinstorVolumeManagerError( 

2293 'Could not create volume `{}`, '.format(volume_uuid) + 

2294 'properties already exist' 

2295 ) 

2296 

2297 try: 

2298 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_CREATING 

2299 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

2300 

2301 self._create_volume( 

2302 volume_uuid, volume_name, size, place_resources 

2303 ) 

2304 

2305 assert volume_properties.namespace == \ 

2306 self._build_volume_namespace(volume_uuid) 

2307 return volume_properties 

2308 except LinstorVolumeManagerError as e: 

2309 # Do not destroy existing resource! 

2310 # In theory we can't get this error because we check this event 

2311 # before the `self._create_volume` case. 

2312 # It can only happen if the same volume uuid is used in the same 

2313 # call in another host. 

2314 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

2315 self._destroy_volume(volume_uuid, force=True) 

2316 raise 

2317 

2318 def _find_device_path(self, volume_uuid, volume_name): 

2319 current_device_path = self._request_device_path( 

2320 volume_uuid, volume_name, activate=True 

2321 ) 

2322 

2323 # We use realpath here to get the /dev/drbd<id> path instead of 

2324 # /dev/drbd/by-res/<resource_name>. 

2325 expected_device_path = self.build_device_path(volume_name) 

2326 util.wait_for_path(expected_device_path, 5) 

2327 

2328 device_realpath = os.path.realpath(expected_device_path) 

2329 if current_device_path != device_realpath: 

2330 raise LinstorVolumeManagerError( 

2331 'Invalid path, current={}, expected={} (realpath={})' 

2332 .format( 

2333 current_device_path, 

2334 expected_device_path, 

2335 device_realpath 

2336 ) 

2337 ) 

2338 return expected_device_path 

2339 

2340 def _request_device_path(self, volume_uuid, volume_name, activate=False): 

2341 node_name = socket.gethostname() 

2342 

2343 resource = next(filter( 

2344 lambda resource: resource.node_name == node_name and 

2345 resource.name == volume_name, 

2346 self._get_resource_cache().resources 

2347 ), None) 

2348 

2349 if not resource: 

2350 if activate: 

2351 self._mark_resource_cache_as_dirty() 

2352 self._activate_device_path( 

2353 self._linstor, node_name, volume_name 

2354 ) 

2355 return self._request_device_path(volume_uuid, volume_name) 

2356 raise LinstorVolumeManagerError( 

2357 'Empty dev path for `{}`, but definition "seems" to exist' 

2358 .format(volume_uuid) 

2359 ) 

2360 # Contains a path of the /dev/drbd<id> form. 

2361 return resource.volumes[0].device_path 

2362 

2363 def _destroy_resource(self, resource_name, force=False): 

2364 result = self._linstor.resource_dfn_delete(resource_name) 

2365 error_str = self._get_error_str(result) 

2366 if not error_str: 

2367 self._mark_resource_cache_as_dirty() 

2368 return 

2369 

2370 if not force: 

2371 self._mark_resource_cache_as_dirty() 

2372 raise LinstorVolumeManagerError( 

2373 'Could not destroy resource `{}` from SR `{}`: {}' 

2374 .format(resource_name, self._group_name, error_str) 

2375 ) 

2376 

2377 # If force is used, ensure there is no opener. 

2378 all_openers = get_all_volume_openers(resource_name, '0') 

2379 for openers in all_openers.values(): 

2380 if openers: 

2381 self._mark_resource_cache_as_dirty() 

2382 raise LinstorVolumeManagerError( 

2383 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)' 

2384 .format(resource_name, self._group_name, error_str, all_openers) 

2385 ) 

2386 

2387 # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue? 

2388 resource_states = filter( 

2389 lambda resource_state: resource_state.name == resource_name, 

2390 self._get_resource_cache().resource_states 

2391 ) 

2392 

2393 # Mark only after computation of states. 

2394 self._mark_resource_cache_as_dirty() 

2395 

2396 for resource_state in resource_states: 

2397 volume_state = resource_state.volume_states[0] 

2398 if resource_state.in_use: 

2399 demote_drbd_resource(resource_state.node_name, resource_name) 

2400 break 

2401 self._destroy_resource(resource_name) 

2402 

2403 def _destroy_volume(self, volume_uuid, force=False): 

2404 volume_properties = self._get_volume_properties(volume_uuid) 

2405 try: 

2406 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

2407 if volume_name in self._fetch_resource_names(): 

2408 self._destroy_resource(volume_name, force) 

2409 

2410 # Assume this call is atomic. 

2411 volume_properties.clear() 

2412 except Exception as e: 

2413 raise LinstorVolumeManagerError( 

2414 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e) 

2415 ) 

2416 

2417 def _build_volumes(self, repair): 

2418 properties = self._kv_cache 

2419 resource_names = self._fetch_resource_names() 

2420 

2421 self._volumes = set() 

2422 

2423 updating_uuid_volumes = self._get_volumes_by_property( 

2424 self.REG_UPDATING_UUID_SRC, ignore_inexisting_volumes=False 

2425 ) 

2426 if updating_uuid_volumes and not repair: 

2427 raise LinstorVolumeManagerError( 

2428 'Cannot build LINSTOR volume list: ' 

2429 'It exists invalid "updating uuid volumes", repair is required' 

2430 ) 

2431 

2432 existing_volumes = self._get_volumes_by_property( 

2433 self.REG_NOT_EXISTS, ignore_inexisting_volumes=False 

2434 ) 

2435 for volume_uuid, not_exists in existing_volumes.items(): 

2436 properties.namespace = self._build_volume_namespace(volume_uuid) 

2437 

2438 src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC) 

2439 if src_uuid: 

2440 self._logger( 

2441 'Ignoring volume during manager initialization with prop ' 

2442 ' PROP_UPDATING_UUID_SRC: {} (properties={})' 

2443 .format( 

2444 volume_uuid, 

2445 self._get_filtered_properties(properties) 

2446 ) 

2447 ) 

2448 continue 

2449 

2450 # Insert volume in list if the volume exists. Or if the volume 

2451 # is being created and a slave wants to use it (repair = False). 

2452 # 

2453 # If we are on the master and if repair is True and state is 

2454 # Creating, it's probably a bug or crash: the creation process has 

2455 # been stopped. 

2456 if not_exists == self.STATE_EXISTS or ( 

2457 not repair and not_exists == self.STATE_CREATING 

2458 ): 

2459 self._volumes.add(volume_uuid) 

2460 continue 

2461 

2462 if not repair: 

2463 self._logger( 

2464 'Ignoring bad volume during manager initialization: {} ' 

2465 '(properties={})'.format( 

2466 volume_uuid, 

2467 self._get_filtered_properties(properties) 

2468 ) 

2469 ) 

2470 continue 

2471 

2472 # Remove bad volume. 

2473 try: 

2474 self._logger( 

2475 'Removing bad volume during manager initialization: {} ' 

2476 '(properties={})'.format( 

2477 volume_uuid, 

2478 self._get_filtered_properties(properties) 

2479 ) 

2480 ) 

2481 volume_name = properties.get(self.PROP_VOLUME_NAME) 

2482 

2483 # Little optimization, don't call `self._destroy_volume`, 

2484 # we already have resource name list. 

2485 if volume_name in resource_names: 

2486 self._destroy_resource(volume_name, force=True) 

2487 

2488 # Assume this call is atomic. 

2489 properties.clear() 

2490 except Exception as e: 

2491 # Do not raise, we don't want to block user action. 

2492 self._logger( 

2493 'Cannot clean volume {}: {}'.format(volume_uuid, e) 

2494 ) 

2495 

2496 # The volume can't be removed, maybe it's still in use, 

2497 # in this case rename it with the "DELETED_" prefix. 

2498 # This prefix is mandatory if it exists a snap transaction to 

2499 # rollback because the original VDI UUID can try to be renamed 

2500 # with the UUID we are trying to delete... 

2501 if not volume_uuid.startswith('DELETED_'): 

2502 self.update_volume_uuid( 

2503 volume_uuid, 'DELETED_' + volume_uuid, force=True 

2504 ) 

2505 

2506 for dest_uuid, src_uuid in updating_uuid_volumes.items(): 

2507 dest_namespace = self._build_volume_namespace(dest_uuid) 

2508 

2509 properties.namespace = dest_namespace 

2510 if int(properties.get(self.PROP_NOT_EXISTS)): 

2511 properties.clear() 

2512 continue 

2513 

2514 properties.namespace = self._build_volume_namespace(src_uuid) 

2515 properties.clear() 

2516 

2517 properties.namespace = dest_namespace 

2518 properties.pop(self.PROP_UPDATING_UUID_SRC) 

2519 

2520 if src_uuid in self._volumes: 

2521 self._volumes.remove(src_uuid) 

2522 self._volumes.add(dest_uuid) 

2523 

2524 def _get_sr_properties(self): 

2525 return self._create_linstor_kv(self._build_sr_namespace()) 

2526 

2527 def _get_volumes_by_property( 

2528 self, reg_prop, ignore_inexisting_volumes=True 

2529 ): 

2530 base_properties = self._get_kv_cache() 

2531 base_properties.namespace = self._build_volume_namespace() 

2532 

2533 volume_properties = {} 

2534 for volume_uuid in self._volumes: 

2535 volume_properties[volume_uuid] = '' 

2536 

2537 for key, value in base_properties.items(): 

2538 res = reg_prop.match(key) 

2539 if res: 

2540 volume_uuid = res.groups()[0] 

2541 if not ignore_inexisting_volumes or \ 

2542 volume_uuid in self._volumes: 

2543 volume_properties[volume_uuid] = value 

2544 

2545 return volume_properties 

2546 

2547 def _create_linstor_kv(self, namespace): 

2548 return linstor.KV( 

2549 self._group_name, 

2550 uri=self._linstor.controller_host(), 

2551 namespace=namespace 

2552 ) 

2553 

2554 def _get_volume_properties(self, volume_uuid): 

2555 properties = self._get_kv_cache() 

2556 properties.namespace = self._build_volume_namespace(volume_uuid) 

2557 return properties 

2558 

2559 @classmethod 

2560 def _build_sr_namespace(cls): 

2561 return '/{}/'.format(cls.NAMESPACE_SR) 

2562 

2563 @classmethod 

2564 def _build_volume_namespace(cls, volume_uuid=None): 

2565 # Return a path to all volumes if `volume_uuid` is not given. 

2566 if volume_uuid is None: 

2567 return '/{}/'.format(cls.NAMESPACE_VOLUME) 

2568 return '/{}/{}/'.format(cls.NAMESPACE_VOLUME, volume_uuid) 

2569 

2570 @classmethod 

2571 def _get_error_str(cls, result): 

2572 return ', '.join([ 

2573 err.message for err in cls._filter_errors(result) 

2574 ]) 

2575 

2576 @classmethod 

2577 def _create_linstor_instance( 

2578 cls, uri, keep_uri_unmodified=False, attempt_count=30 

2579 ): 

2580 retry = False 

2581 

2582 def connect(uri): 

2583 if not uri: 

2584 uri = get_controller_uri() 

2585 if not uri: 

2586 raise LinstorVolumeManagerError( 

2587 'Unable to find controller uri...' 

2588 ) 

2589 instance = linstor.Linstor(uri, keep_alive=True) 

2590 instance.connect() 

2591 return instance 

2592 

2593 try: 

2594 return connect(uri) 

2595 except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError): 

2596 pass 

2597 

2598 if not keep_uri_unmodified: 

2599 uri = None 

2600 

2601 return util.retry( 

2602 lambda: connect(uri), 

2603 maxretry=attempt_count, 

2604 period=1, 

2605 exceptions=[ 

2606 linstor.errors.LinstorNetworkError, 

2607 LinstorVolumeManagerError 

2608 ] 

2609 ) 

2610 

2611 @classmethod 

2612 def _configure_volume_peer_slots(cls, lin, volume_name): 

2613 result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3) 

2614 error_str = cls._get_error_str(result) 

2615 if error_str: 

2616 raise LinstorVolumeManagerError( 

2617 'Could not configure volume peer slots of {}: {}' 

2618 .format(volume_name, error_str) 

2619 ) 

2620 

2621 @classmethod 

2622 def _activate_device_path(cls, lin, node_name, volume_name): 

2623 result = lin.resource_make_available(node_name, volume_name, diskful=False) 

2624 if linstor.Linstor.all_api_responses_no_error(result): 

2625 return 

2626 errors = linstor.Linstor.filter_api_call_response_errors(result) 

2627 if len(errors) == 1 and errors[0].is_error( 

2628 linstor.consts.FAIL_EXISTS_RSC 

2629 ): 

2630 return 

2631 

2632 raise LinstorVolumeManagerError( 

2633 'Unable to activate device path of `{}` on node `{}`: {}' 

2634 .format(volume_name, node_name, ', '.join( 

2635 [str(x) for x in result])) 

2636 ) 

2637 

2638 @classmethod 

2639 def _request_database_path(cls, lin, activate=False): 

2640 node_name = socket.gethostname() 

2641 

2642 try: 

2643 resource = next(filter( 

2644 lambda resource: resource.node_name == node_name and 

2645 resource.name == DATABASE_VOLUME_NAME, 

2646 lin.resource_list_raise().resources 

2647 ), None) 

2648 except Exception as e: 

2649 raise LinstorVolumeManagerError( 

2650 'Unable to get resources during database creation: {}' 

2651 .format(e) 

2652 ) 

2653 

2654 if not resource: 

2655 if activate: 

2656 cls._activate_device_path( 

2657 lin, node_name, DATABASE_VOLUME_NAME 

2658 ) 

2659 return cls._request_database_path( 

2660 DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME 

2661 ) 

2662 raise LinstorVolumeManagerError( 

2663 'Empty dev path for `{}`, but definition "seems" to exist' 

2664 .format(DATABASE_PATH) 

2665 ) 

2666 # Contains a path of the /dev/drbd<id> form. 

2667 return resource.volumes[0].device_path 

2668 

2669 @classmethod 

2670 def _create_database_volume( 

2671 cls, lin, group_name, node_names, redundancy, auto_quorum 

2672 ): 

2673 try: 

2674 dfns = lin.resource_dfn_list_raise().resource_definitions 

2675 except Exception as e: 

2676 raise LinstorVolumeManagerError( 

2677 'Unable to get definitions during database creation: {}' 

2678 .format(e) 

2679 ) 

2680 

2681 if dfns: 

2682 raise LinstorVolumeManagerError( 

2683 'Could not create volume `{}` from SR `{}`, '.format( 

2684 DATABASE_VOLUME_NAME, group_name 

2685 ) + 'LINSTOR volume list must be empty.' 

2686 ) 

2687 

2688 # Workaround to use thin lvm. Without this line an error is returned: 

2689 # "Not enough available nodes" 

2690 # I don't understand why but this command protect against this bug. 

2691 try: 

2692 pools = lin.storage_pool_list_raise( 

2693 filter_by_stor_pools=[group_name] 

2694 ) 

2695 except Exception as e: 

2696 raise LinstorVolumeManagerError( 

2697 'Failed to get storage pool list before database creation: {}' 

2698 .format(e) 

2699 ) 

2700 

2701 # Ensure we have a correct list of storage pools. 

2702 nodes_with_pool = [pool.node_name for pool in pools.storage_pools] 

2703 assert nodes_with_pool # We must have at least one storage pool! 

2704 for node_name in nodes_with_pool: 

2705 assert node_name in node_names 

2706 util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool)) 

2707 

2708 # Create the database definition. 

2709 size = cls.round_up_volume_size(DATABASE_SIZE) 

2710 cls._check_volume_creation_errors(lin.resource_group_spawn( 

2711 rsc_grp_name=group_name, 

2712 rsc_dfn_name=DATABASE_VOLUME_NAME, 

2713 vlm_sizes=['{}B'.format(size)], 

2714 definitions_only=True 

2715 ), DATABASE_VOLUME_NAME, group_name) 

2716 cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME) 

2717 

2718 # Create real resources on the first nodes. 

2719 resources = [] 

2720 

2721 diskful_nodes = [] 

2722 diskless_nodes = [] 

2723 for node_name in node_names: 

2724 if node_name in nodes_with_pool: 

2725 diskful_nodes.append(node_name) 

2726 else: 

2727 diskless_nodes.append(node_name) 

2728 

2729 assert diskful_nodes 

2730 for node_name in diskful_nodes[:redundancy]: 

2731 util.SMlog('Create database diskful on {}'.format(node_name)) 

2732 resources.append(linstor.ResourceData( 

2733 node_name=node_name, 

2734 rsc_name=DATABASE_VOLUME_NAME, 

2735 storage_pool=group_name 

2736 )) 

2737 # Create diskless resources on the remaining set. 

2738 for node_name in diskful_nodes[redundancy:] + diskless_nodes: 

2739 util.SMlog('Create database diskless on {}'.format(node_name)) 

2740 resources.append(linstor.ResourceData( 

2741 node_name=node_name, 

2742 rsc_name=DATABASE_VOLUME_NAME, 

2743 diskless=True 

2744 )) 

2745 

2746 result = lin.resource_create(resources) 

2747 error_str = cls._get_error_str(result) 

2748 if error_str: 

2749 raise LinstorVolumeManagerError( 

2750 'Could not create database volume from SR `{}`: {}'.format( 

2751 group_name, error_str 

2752 ) 

2753 ) 

2754 

2755 # We must modify the quorum. Otherwise we can't use correctly the 

2756 # drbd-reactor daemon. 

2757 if auto_quorum: 

2758 result = lin.resource_dfn_modify(DATABASE_VOLUME_NAME, { 

2759 'DrbdOptions/auto-quorum': 'disabled', 

2760 'DrbdOptions/Resource/quorum': 'majority' 

2761 }) 

2762 error_str = cls._get_error_str(result) 

2763 if error_str: 

2764 raise LinstorVolumeManagerError( 

2765 'Could not activate quorum on database volume: {}' 

2766 .format(error_str) 

2767 ) 

2768 

2769 # Create database and ensure path exists locally and 

2770 # on replicated devices. 

2771 current_device_path = cls._request_database_path(lin, activate=True) 

2772 

2773 # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be 

2774 # plugged. 

2775 for node_name in node_names: 

2776 cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME) 

2777 

2778 # We use realpath here to get the /dev/drbd<id> path instead of 

2779 # /dev/drbd/by-res/<resource_name>. 

2780 expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME) 

2781 util.wait_for_path(expected_device_path, 5) 

2782 

2783 device_realpath = os.path.realpath(expected_device_path) 

2784 if current_device_path != device_realpath: 

2785 raise LinstorVolumeManagerError( 

2786 'Invalid path, current={}, expected={} (realpath={})' 

2787 .format( 

2788 current_device_path, 

2789 expected_device_path, 

2790 device_realpath 

2791 ) 

2792 ) 

2793 

2794 try: 

2795 util.retry( 

2796 lambda: util.pread2([DATABASE_MKFS, expected_device_path]), 

2797 maxretry=5 

2798 ) 

2799 except Exception as e: 

2800 raise LinstorVolumeManagerError( 

2801 'Failed to execute {} on database volume: {}' 

2802 .format(DATABASE_MKFS, e) 

2803 ) 

2804 

2805 return expected_device_path 

2806 

2807 @classmethod 

2808 def _destroy_database_volume(cls, lin, group_name): 

2809 error_str = cls._get_error_str( 

2810 lin.resource_dfn_delete(DATABASE_VOLUME_NAME) 

2811 ) 

2812 if error_str: 

2813 raise LinstorVolumeManagerError( 

2814 'Could not destroy resource `{}` from SR `{}`: {}' 

2815 .format(DATABASE_VOLUME_NAME, group_name, error_str) 

2816 ) 

2817 

2818 @classmethod 

2819 def _mount_database_volume(cls, volume_path, mount=True, force=False): 

2820 try: 

2821 # 1. Create a backup config folder. 

2822 database_not_empty = bool(os.listdir(DATABASE_PATH)) 

2823 backup_path = cls._create_database_backup_path() 

2824 

2825 # 2. Move the config in the mounted volume. 

2826 if database_not_empty: 

2827 cls._move_files(DATABASE_PATH, backup_path) 

2828 

2829 cls._mount_volume(volume_path, DATABASE_PATH, mount) 

2830 

2831 if database_not_empty: 

2832 cls._move_files(backup_path, DATABASE_PATH, force) 

2833 

2834 # 3. Remove useless backup directory. 

2835 try: 

2836 os.rmdir(backup_path) 

2837 except Exception as e: 

2838 raise LinstorVolumeManagerError( 

2839 'Failed to remove backup path {} of LINSTOR config: {}' 

2840 .format(backup_path, e) 

2841 ) 

2842 except Exception as e: 

2843 def force_exec(fn): 

2844 try: 

2845 fn() 

2846 except Exception: 

2847 pass 

2848 

2849 if mount == cls._is_mounted(DATABASE_PATH): 

2850 force_exec(lambda: cls._move_files( 

2851 DATABASE_PATH, backup_path 

2852 )) 

2853 force_exec(lambda: cls._mount_volume( 

2854 volume_path, DATABASE_PATH, not mount 

2855 )) 

2856 

2857 if mount != cls._is_mounted(DATABASE_PATH): 

2858 force_exec(lambda: cls._move_files( 

2859 backup_path, DATABASE_PATH 

2860 )) 

2861 

2862 force_exec(lambda: os.rmdir(backup_path)) 

2863 raise e 

2864 

2865 @classmethod 

2866 def _force_destroy_database_volume(cls, lin, group_name): 

2867 try: 

2868 cls._destroy_database_volume(lin, group_name) 

2869 except Exception: 

2870 pass 

2871 

2872 @classmethod 

2873 def _destroy_storage_pool(cls, lin, group_name, node_name): 

2874 def destroy(): 

2875 result = lin.storage_pool_delete(node_name, group_name) 

2876 errors = cls._filter_errors(result) 

2877 if cls._check_errors(errors, [ 

2878 linstor.consts.FAIL_NOT_FOUND_STOR_POOL, 

2879 linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN 

2880 ]): 

2881 return 

2882 

2883 if errors: 

2884 raise LinstorVolumeManagerError( 

2885 'Failed to destroy SP `{}` on node `{}`: {}'.format( 

2886 group_name, 

2887 node_name, 

2888 cls._get_error_str(errors) 

2889 ) 

2890 ) 

2891 

2892 # We must retry to avoid errors like: 

2893 # "can not be deleted as volumes / snapshot-volumes are still using it" 

2894 # after LINSTOR database volume destruction. 

2895 return util.retry(destroy, maxretry=10) 

2896 

2897 @classmethod 

2898 def _destroy_resource_group(cls, lin, group_name): 

2899 def destroy(): 

2900 result = lin.resource_group_delete(group_name) 

2901 errors = cls._filter_errors(result) 

2902 if cls._check_errors(errors, [ 

2903 linstor.consts.FAIL_NOT_FOUND_RSC_GRP 

2904 ]): 

2905 return 

2906 

2907 if errors: 

2908 raise LinstorVolumeManagerError( 

2909 'Failed to destroy RG `{}`: {}' 

2910 .format(group_name, cls._get_error_str(errors)) 

2911 ) 

2912 

2913 return util.retry(destroy, maxretry=10) 

2914 

2915 @classmethod 

2916 def _build_group_name(cls, base_name): 

2917 # If thin provisioning is used we have a path like this: 

2918 # `VG/LV`. "/" is not accepted by LINSTOR. 

2919 return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_')) 

2920 

2921 @classmethod 

2922 def _check_volume_creation_errors(cls, result, volume_uuid, group_name): 

2923 errors = cls._filter_errors(result) 

2924 if cls._check_errors(errors, [ 

2925 linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN 

2926 ]): 

2927 raise LinstorVolumeManagerError( 

2928 'Failed to create volume `{}` from SR `{}`, it already exists' 

2929 .format(volume_uuid, group_name), 

2930 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

2931 ) 

2932 

2933 if errors: 

2934 raise LinstorVolumeManagerError( 

2935 'Failed to create volume `{}` from SR `{}`: {}'.format( 

2936 volume_uuid, 

2937 group_name, 

2938 cls._get_error_str(errors) 

2939 ) 

2940 ) 

2941 

2942 @classmethod 

2943 def _move_files(cls, src_dir, dest_dir, force=False): 

2944 def listdir(dir): 

2945 ignored = ['lost+found'] 

2946 return [file for file in os.listdir(dir) if file not in ignored] 

2947 

2948 try: 

2949 if not force: 

2950 files = listdir(dest_dir) 

2951 if files: 

2952 raise LinstorVolumeManagerError( 

2953 'Cannot move files from {} to {} because destination ' 

2954 'contains: {}'.format(src_dir, dest_dir, files) 

2955 ) 

2956 except LinstorVolumeManagerError: 

2957 raise 

2958 except Exception as e: 

2959 raise LinstorVolumeManagerError( 

2960 'Cannot list dir {}: {}'.format(dest_dir, e) 

2961 ) 

2962 

2963 try: 

2964 for file in listdir(src_dir): 

2965 try: 

2966 dest_file = os.path.join(dest_dir, file) 

2967 if not force and os.path.exists(dest_file): 

2968 raise LinstorVolumeManagerError( 

2969 'Cannot move {} because it already exists in the ' 

2970 'destination'.format(file) 

2971 ) 

2972 shutil.move(os.path.join(src_dir, file), dest_file) 

2973 except LinstorVolumeManagerError: 

2974 raise 

2975 except Exception as e: 

2976 raise LinstorVolumeManagerError( 

2977 'Cannot move {}: {}'.format(file, e) 

2978 ) 

2979 except Exception as e: 

2980 if not force: 

2981 try: 

2982 cls._move_files(dest_dir, src_dir, force=True) 

2983 except Exception: 

2984 pass 

2985 

2986 raise LinstorVolumeManagerError( 

2987 'Failed to move files from {} to {}: {}'.format( 

2988 src_dir, dest_dir, e 

2989 ) 

2990 ) 

2991 

2992 @staticmethod 

2993 def _create_database_backup_path(): 

2994 path = DATABASE_PATH + '-' + str(uuid.uuid4()) 

2995 try: 

2996 os.mkdir(path) 

2997 return path 

2998 except Exception as e: 

2999 raise LinstorVolumeManagerError( 

3000 'Failed to create backup path {} of LINSTOR config: {}' 

3001 .format(path, e) 

3002 ) 

3003 

3004 @staticmethod 

3005 def _get_filtered_properties(properties): 

3006 return dict(properties.items()) 

3007 

3008 @staticmethod 

3009 def _filter_errors(result): 

3010 return [ 

3011 err for err in result 

3012 if hasattr(err, 'is_error') and err.is_error() 

3013 ] 

3014 

3015 @staticmethod 

3016 def _check_errors(result, codes): 

3017 for err in result: 

3018 for code in codes: 

3019 if err.is_error(code): 

3020 return True 

3021 return False 

3022 

3023 @classmethod 

3024 def _controller_is_running(cls): 

3025 return cls._service_is_running('linstor-controller') 

3026 

3027 @classmethod 

3028 def _start_controller(cls, start=True): 

3029 return cls._start_service('linstor-controller', start) 

3030 

3031 @staticmethod 

3032 def _start_service(name, start=True): 

3033 action = 'start' if start else 'stop' 

3034 (ret, out, err) = util.doexec([ 

3035 'systemctl', action, name 

3036 ]) 

3037 if ret != 0: 

3038 raise LinstorVolumeManagerError( 

3039 'Failed to {} {}: {} {}' 

3040 .format(action, name, out, err) 

3041 ) 

3042 

3043 @staticmethod 

3044 def _service_is_running(name): 

3045 (ret, out, err) = util.doexec([ 

3046 'systemctl', 'is-active', '--quiet', name 

3047 ]) 

3048 return not ret 

3049 

3050 @staticmethod 

3051 def _is_mounted(mountpoint): 

3052 (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint]) 

3053 return ret == 0 

3054 

3055 @classmethod 

3056 def _mount_volume(cls, volume_path, mountpoint, mount=True): 

3057 if mount: 

3058 try: 

3059 util.pread(['mount', volume_path, mountpoint]) 

3060 except Exception as e: 

3061 raise LinstorVolumeManagerError( 

3062 'Failed to mount volume {} on {}: {}' 

3063 .format(volume_path, mountpoint, e) 

3064 ) 

3065 else: 

3066 try: 

3067 if cls._is_mounted(mountpoint): 

3068 util.pread(['umount', mountpoint]) 

3069 except Exception as e: 

3070 raise LinstorVolumeManagerError( 

3071 'Failed to umount volume {} on {}: {}' 

3072 .format(volume_path, mountpoint, e) 

3073 ) 

3074 

3075 

3076# ============================================================================== 

3077 

3078# Check if a path is a DRBD resource and log the process name/pid 

3079# that opened it. 

3080def log_drbd_openers(path): 

3081 # Ignore if it's not a symlink to DRBD resource. 

3082 if not path.startswith(DRBD_BY_RES_PATH): 

3083 return 

3084 

3085 # Compute resource name. 

3086 res_name_end = path.find('/', len(DRBD_BY_RES_PATH)) 

3087 if res_name_end == -1: 

3088 return 

3089 res_name = path[len(DRBD_BY_RES_PATH):res_name_end] 

3090 

3091 volume_end = path.rfind('/') 

3092 if volume_end == res_name_end: 

3093 return 

3094 volume = path[volume_end + 1:] 

3095 

3096 try: 

3097 # Ensure path is a DRBD. 

3098 drbd_path = os.path.realpath(path) 

3099 stats = os.stat(drbd_path) 

3100 if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147: 

3101 return 

3102 

3103 # Find where the device is open. 

3104 (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name]) 

3105 if ret != 0: 

3106 util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format( 

3107 res_name, stderr 

3108 )) 

3109 return 

3110 

3111 # Is it a local device? 

3112 if stdout.startswith('{} role:Primary'.format(res_name)): 

3113 util.SMlog( 

3114 'DRBD resource `{}` is open on local host: {}' 

3115 .format(path, get_local_volume_openers(res_name, volume)) 

3116 ) 

3117 return 

3118 

3119 # Is it a remote device? 

3120 util.SMlog( 

3121 'DRBD resource `{}` is open on hosts: {}' 

3122 .format(path, get_all_volume_openers(res_name, volume)) 

3123 ) 

3124 except Exception as e: 

3125 util.SMlog( 

3126 'Got exception while trying to determine where DRBD resource ' + 

3127 '`{}` is open: {}'.format(path, e) 

3128 )