Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python3 

2# 

3# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr 

4# 

5# This program is free software: you can redistribute it and/or modify 

6# it under the terms of the GNU General Public License as published by 

7# the Free Software Foundation, either version 3 of the License, or 

8# (at your option) any later version. 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU General Public License for more details. 

13# 

14# You should have received a copy of the GNU General Public License 

15# along with this program. If not, see <https://www.gnu.org/licenses/>. 

16# 

17 

18from sm_typing import override 

19 

20import errno 

21import json 

22import linstor 

23import os.path 

24import re 

25import shutil 

26import socket 

27import stat 

28import time 

29import util 

30import uuid 

31 

32# Persistent prefix to add to RAW persistent volumes. 

33PERSISTENT_PREFIX = 'xcp-persistent-' 

34 

35# Contains the data of the "/var/lib/linstor" directory. 

36DATABASE_VOLUME_NAME = PERSISTENT_PREFIX + 'database' 

37DATABASE_SIZE = 1 << 30 # 1GB. 

38DATABASE_PATH = '/var/lib/linstor' 

39DATABASE_MKFS = 'mkfs.ext4' 

40 

41REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary") 

42REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$') 

43 

44DRBD_BY_RES_PATH = '/dev/drbd/by-res/' 

45 

46PLUGIN = 'linstor-manager' 

47 

48 

49# ============================================================================== 

50 

51def get_local_volume_openers(resource_name, volume): 

52 if not resource_name or volume is None: 

53 raise Exception('Cannot get DRBD openers without resource name and/or volume.') 

54 

55 path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format( 

56 resource_name, volume 

57 ) 

58 

59 with open(path, 'r') as openers: 

60 # Not a big cost, so read all lines directly. 

61 lines = openers.readlines() 

62 

63 result = {} 

64 

65 opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)') 

66 for line in lines: 

67 match = opener_re.match(line) 

68 assert match 

69 

70 groups = match.groups() 

71 process_name = groups[0] 

72 pid = groups[1] 

73 open_duration_ms = groups[2] 

74 result[pid] = { 

75 'process-name': process_name, 

76 'open-duration': open_duration_ms 

77 } 

78 

79 return json.dumps(result) 

80 

81def get_all_volume_openers(resource_name, volume): 

82 PLUGIN_CMD = 'getDrbdOpeners' 

83 

84 volume = str(volume) 

85 openers = {} 

86 

87 # Make sure this call never stucks because this function can be called 

88 # during HA init and in this case we can wait forever. 

89 session = util.timeout_call(10, util.get_localAPI_session) 

90 

91 hosts = session.xenapi.host.get_all_records() 

92 for host_ref, host_record in hosts.items(): 

93 node_name = host_record['hostname'] 

94 try: 

95 if not session.xenapi.host_metrics.get_record( 

96 host_record['metrics'] 

97 )['live']: 

98 # Ensure we call plugin on online hosts only. 

99 continue 

100 

101 openers[node_name] = json.loads( 

102 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, { 

103 'resourceName': resource_name, 

104 'volume': volume 

105 }) 

106 ) 

107 except Exception as e: 

108 util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format( 

109 resource_name, node_name, e 

110 )) 

111 

112 return openers 

113 

114 

115# ============================================================================== 

116 

117def round_up(value, divisor): 

118 assert divisor 

119 divisor = int(divisor) 

120 return ((int(value) + divisor - 1) // divisor) * divisor 

121 

122 

123def round_down(value, divisor): 

124 assert divisor 

125 value = int(value) 

126 return value - (value % int(divisor)) 

127 

128 

129# ============================================================================== 

130 

131def get_remote_host_ip(node_name): 

132 (ret, stdout, stderr) = util.doexec([ 

133 'drbdsetup', 'show', DATABASE_VOLUME_NAME, '--json' 

134 ]) 

135 if ret != 0: 

136 return 

137 

138 try: 

139 conf = json.loads(stdout) 

140 if not conf: 

141 return 

142 

143 for connection in conf[0]['connections']: 

144 if connection['net']['_name'] == node_name: 

145 value = connection['path']['_remote_host'] 

146 res = REG_DRBDSETUP_IP.match(value) 

147 if res: 

148 return res.groups()[0] 

149 break 

150 except Exception: 

151 pass 

152 

153 

154def _get_controller_uri(): 

155 PLUGIN_CMD = 'hasControllerRunning' 

156 

157 # Try to find controller using drbdadm. 

158 (ret, stdout, stderr) = util.doexec([ 

159 'drbdadm', 'status', DATABASE_VOLUME_NAME 

160 ]) 

161 if ret == 0: 

162 # If we are here, the database device exists locally. 

163 

164 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)): 

165 # Nice case, we have the controller running on this local host. 

166 return 'linstor://localhost' 

167 

168 # Try to find the host using DRBD connections. 

169 res = REG_DRBDADM_PRIMARY.search(stdout) 

170 if res: 

171 node_name = res.groups()[0] 

172 ip = get_remote_host_ip(node_name) 

173 if ip: 

174 return 'linstor://' + ip 

175 

176 # Worst case: we use many hosts in the pool (>= 4), so we can't find the 

177 # primary using drbdadm because we don't have all connections to the 

178 # replicated volume. `drbdadm status xcp-persistent-database` returns 

179 # 3 connections by default. 

180 try: 

181 session = util.timeout_call(10, util.get_localAPI_session) 

182 

183 for host_ref, host_record in session.xenapi.host.get_all_records().items(): 

184 node_name = host_record['hostname'] 

185 try: 

186 if util.strtobool( 

187 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {}) 

188 ): 

189 return 'linstor://' + host_record['address'] 

190 except Exception as e: 

191 # Can throw and exception if a host is offline. So catch it. 

192 util.SMlog('Unable to search controller on `{}`: {}'.format( 

193 node_name, e 

194 )) 

195 except: 

196 # Not found, maybe we are trying to create the SR... 

197 pass 

198 

199def get_controller_uri(): 

200 retries = 0 

201 while True: 

202 uri = _get_controller_uri() 

203 if uri: 

204 return uri 

205 

206 retries += 1 

207 if retries >= 10: 

208 break 

209 time.sleep(1) 

210 

211 

212def get_controller_node_name(): 

213 PLUGIN_CMD = 'hasControllerRunning' 

214 

215 (ret, stdout, stderr) = util.doexec([ 

216 'drbdadm', 'status', DATABASE_VOLUME_NAME 

217 ]) 

218 

219 if ret == 0: 

220 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)): 

221 return 'localhost' 

222 

223 res = REG_DRBDADM_PRIMARY.search(stdout) 

224 if res: 

225 return res.groups()[0] 

226 

227 session = util.timeout_call(5, util.get_localAPI_session) 

228 

229 for host_ref, host_record in session.xenapi.host.get_all_records().items(): 

230 node_name = host_record['hostname'] 

231 try: 

232 if not session.xenapi.host_metrics.get_record( 

233 host_record['metrics'] 

234 )['live']: 

235 continue 

236 

237 if util.strtobool(session.xenapi.host.call_plugin( 

238 host_ref, PLUGIN, PLUGIN_CMD, {} 

239 )): 

240 return node_name 

241 except Exception as e: 

242 util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format( 

243 node_name, e 

244 )) 

245 

246 

247def demote_drbd_resource(node_name, resource_name): 

248 PLUGIN_CMD = 'demoteDrbdResource' 

249 

250 session = util.timeout_call(5, util.get_localAPI_session) 

251 

252 for host_ref, host_record in session.xenapi.host.get_all_records().items(): 

253 if host_record['hostname'] != node_name: 

254 continue 

255 

256 try: 

257 session.xenapi.host.call_plugin( 

258 host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name} 

259 ) 

260 except Exception as e: 

261 util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format( 

262 resource_name, node_name, e 

263 )) 

264 raise Exception( 

265 'Can\'t demote resource `{}`, unable to find node `{}`' 

266 .format(resource_name, node_name) 

267 ) 

268 

269# ============================================================================== 

270 

271class LinstorVolumeManagerError(Exception): 

272 ERR_GENERIC = 0, 

273 ERR_VOLUME_EXISTS = 1, 

274 ERR_VOLUME_NOT_EXISTS = 2, 

275 ERR_VOLUME_DESTROY = 3, 

276 ERR_GROUP_NOT_EXISTS = 4 

277 

278 def __init__(self, message, code=ERR_GENERIC): 

279 super(LinstorVolumeManagerError, self).__init__(message) 

280 self._code = code 

281 

282 @property 

283 def code(self): 

284 return self._code 

285 

286 

287# ============================================================================== 

288 

289# Note: 

290# If a storage pool is not accessible after a network change: 

291# linstor node interface modify <NODE> default --ip <IP> 

292 

293 

294class LinstorVolumeManager(object): 

295 """ 

296 API to manager LINSTOR volumes in XCP-ng. 

297 A volume in this context is a physical part of the storage layer. 

298 """ 

299 

300 __slots__ = ( 

301 '_linstor', '_logger', '_redundancy', 

302 '_base_group_name', '_group_name', '_ha_group_name', 

303 '_volumes', '_storage_pools', '_storage_pools_time', 

304 '_kv_cache', '_resource_cache', '_volume_info_cache', 

305 '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty' 

306 ) 

307 

308 DEV_ROOT_PATH = DRBD_BY_RES_PATH 

309 

310 # Default sector size. 

311 BLOCK_SIZE = 512 

312 

313 # List of volume properties. 

314 PROP_METADATA = 'metadata' 

315 PROP_NOT_EXISTS = 'not-exists' 

316 PROP_VOLUME_NAME = 'volume-name' 

317 PROP_IS_READONLY_TIMESTAMP = 'readonly-timestamp' 

318 

319 # A volume can only be locked for a limited duration. 

320 # The goal is to give enough time to slaves to execute some actions on 

321 # a device before an UUID update or a coalesce for example. 

322 # Expiration is expressed in seconds. 

323 LOCKED_EXPIRATION_DELAY = 1 * 60 

324 

325 # Used when volume uuid is being updated. 

326 PROP_UPDATING_UUID_SRC = 'updating-uuid-src' 

327 

328 # States of property PROP_NOT_EXISTS. 

329 STATE_EXISTS = '0' 

330 STATE_NOT_EXISTS = '1' 

331 STATE_CREATING = '2' 

332 

333 # Property namespaces. 

334 NAMESPACE_SR = 'xcp/sr' 

335 NAMESPACE_VOLUME = 'xcp/volume' 

336 

337 # Regex to match properties. 

338 REG_PROP = '^([^/]+)/{}$' 

339 

340 REG_METADATA = re.compile(REG_PROP.format(PROP_METADATA)) 

341 REG_NOT_EXISTS = re.compile(REG_PROP.format(PROP_NOT_EXISTS)) 

342 REG_VOLUME_NAME = re.compile(REG_PROP.format(PROP_VOLUME_NAME)) 

343 REG_UPDATING_UUID_SRC = re.compile(REG_PROP.format(PROP_UPDATING_UUID_SRC)) 

344 

345 # Prefixes of SR/VOLUME in the LINSTOR DB. 

346 # A LINSTOR (resource, group, ...) name cannot start with a number. 

347 # So we add a prefix behind our SR/VOLUME uuids. 

348 PREFIX_SR = 'xcp-sr-' 

349 PREFIX_HA = 'xcp-ha-' 

350 PREFIX_VOLUME = 'xcp-volume-' 

351 

352 # Limit request number when storage pool info is asked, we fetch 

353 # the current pool status after N elapsed seconds. 

354 STORAGE_POOLS_FETCH_INTERVAL = 15 

355 

356 @staticmethod 

357 def default_logger(*args): 

358 print(args) 

359 

360 # -------------------------------------------------------------------------- 

361 # API. 

362 # -------------------------------------------------------------------------- 

363 

364 class VolumeInfo(object): 

365 __slots__ = ( 

366 'name', 

367 'allocated_size', # Allocated size, place count is not used. 

368 'virtual_size', # Total virtual available size of this volume 

369 # (i.e. the user size at creation). 

370 'diskful' # Array of nodes that have a diskful volume. 

371 ) 

372 

373 def __init__(self, name): 

374 self.name = name 

375 self.allocated_size = 0 

376 self.virtual_size = 0 

377 self.diskful = [] 

378 

379 @override 

380 def __repr__(self) -> str: 

381 return 'VolumeInfo("{}", {}, {}, {})'.format( 

382 self.name, self.allocated_size, self.virtual_size, 

383 self.diskful 

384 ) 

385 

386 # -------------------------------------------------------------------------- 

387 

388 def __init__( 

389 self, uri, group_name, repair=False, logger=default_logger.__func__, 

390 attempt_count=30 

391 ): 

392 """ 

393 Create a new LinstorVolumeManager object. 

394 :param str uri: URI to communicate with the LINSTOR controller. 

395 :param str group_name: The SR goup name to use. 

396 :param bool repair: If true we try to remove bad volumes due to a crash 

397 or unexpected behavior. 

398 :param function logger: Function to log messages. 

399 :param int attempt_count: Number of attempts to join the controller. 

400 """ 

401 

402 self._linstor = self._create_linstor_instance( 

403 uri, attempt_count=attempt_count 

404 ) 

405 self._base_group_name = group_name 

406 

407 # Ensure group exists. 

408 group_name = self._build_group_name(group_name) 

409 groups = self._linstor.resource_group_list_raise([group_name]).resource_groups 

410 if not groups: 

411 raise LinstorVolumeManagerError( 

412 'Unable to find `{}` Linstor SR'.format(group_name) 

413 ) 

414 

415 # Ok. ;) 

416 self._logger = logger 

417 self._redundancy = groups[0].select_filter.place_count 

418 self._group_name = group_name 

419 self._ha_group_name = self._build_ha_group_name(self._base_group_name) 

420 self._volumes = set() 

421 self._storage_pools_time = 0 

422 

423 # To increate performance and limit request count to LINSTOR services, 

424 # we use caches. 

425 self._kv_cache = self._create_kv_cache() 

426 self._resource_cache = None 

427 self._resource_cache_dirty = True 

428 self._volume_info_cache = None 

429 self._volume_info_cache_dirty = True 

430 self._build_volumes(repair=repair) 

431 

432 @property 

433 def group_name(self): 

434 """ 

435 Give the used group name. 

436 :return: The group name. 

437 :rtype: str 

438 """ 

439 return self._base_group_name 

440 

441 @property 

442 def redundancy(self): 

443 """ 

444 Give the used redundancy. 

445 :return: The redundancy. 

446 :rtype: int 

447 """ 

448 return self._redundancy 

449 

450 @property 

451 def volumes(self): 

452 """ 

453 Give the volumes uuid set. 

454 :return: The volumes uuid set. 

455 :rtype: set(str) 

456 """ 

457 return self._volumes 

458 

459 @property 

460 def max_volume_size_allowed(self): 

461 """ 

462 Give the max volume size currently available in B. 

463 :return: The current size. 

464 :rtype: int 

465 """ 

466 

467 candidates = self._find_best_size_candidates() 

468 if not candidates: 

469 raise LinstorVolumeManagerError( 

470 'Failed to get max volume size allowed' 

471 ) 

472 

473 size = candidates[0].max_volume_size 

474 if size < 0: 

475 raise LinstorVolumeManagerError( 

476 'Invalid max volume size allowed given: {}'.format(size) 

477 ) 

478 return self.round_down_volume_size(size * 1024) 

479 

480 @property 

481 def physical_size(self): 

482 """ 

483 Give the total physical size of the SR. 

484 :return: The physical size. 

485 :rtype: int 

486 """ 

487 return self._compute_size('total_capacity') 

488 

489 @property 

490 def physical_free_size(self): 

491 """ 

492 Give the total free physical size of the SR. 

493 :return: The physical free size. 

494 :rtype: int 

495 """ 

496 return self._compute_size('free_capacity') 

497 

498 @property 

499 def allocated_volume_size(self): 

500 """ 

501 Give the allocated size for all volumes. The place count is not 

502 used here. When thick lvm is used, the size for one volume should 

503 be equal to the virtual volume size. With thin lvm, the size is equal 

504 or lower to the volume size. 

505 :return: The allocated size of all volumes. 

506 :rtype: int 

507 """ 

508 

509 # Paths: /res_name/vol_number/size 

510 sizes = {} 

511 

512 for resource in self._get_resource_cache().resources: 

513 if resource.name not in sizes: 

514 current = sizes[resource.name] = {} 

515 else: 

516 current = sizes[resource.name] 

517 

518 for volume in resource.volumes: 

519 # We ignore diskless pools of the form "DfltDisklessStorPool". 

520 if volume.storage_pool_name != self._group_name: 

521 continue 

522 

523 current_size = volume.allocated_size 

524 if current_size < 0: 

525 raise LinstorVolumeManagerError( 

526 'Failed to get allocated size of `{}` on `{}`' 

527 .format(resource.name, volume.storage_pool_name) 

528 ) 

529 current[volume.number] = max(current_size, current.get(volume.number) or 0) 

530 

531 total_size = 0 

532 for volumes in sizes.values(): 

533 for size in volumes.values(): 

534 total_size += size 

535 

536 return total_size * 1024 

537 

538 def get_min_physical_size(self): 

539 """ 

540 Give the minimum physical size of the SR. 

541 I.e. the size of the smallest disk + the number of pools. 

542 :return: The physical min size. 

543 :rtype: tuple(int, int) 

544 """ 

545 size = None 

546 pool_count = 0 

547 for pool in self._get_storage_pools(force=True): 

548 space = pool.free_space 

549 if space: 

550 pool_count += 1 

551 current_size = space.total_capacity 

552 if current_size < 0: 

553 raise LinstorVolumeManagerError( 

554 'Failed to get pool total_capacity attr of `{}`' 

555 .format(pool.node_name) 

556 ) 

557 if size is None or current_size < size: 

558 size = current_size 

559 return (pool_count, (size or 0) * 1024) 

560 

561 @property 

562 def metadata(self): 

563 """ 

564 Get the metadata of the SR. 

565 :return: Dictionary that contains metadata. 

566 :rtype: dict(str, dict) 

567 """ 

568 

569 sr_properties = self._get_sr_properties() 

570 metadata = sr_properties.get(self.PROP_METADATA) 

571 if metadata is not None: 

572 metadata = json.loads(metadata) 

573 if isinstance(metadata, dict): 

574 return metadata 

575 raise LinstorVolumeManagerError( 

576 'Expected dictionary in SR metadata: {}'.format( 

577 self._group_name 

578 ) 

579 ) 

580 

581 return {} 

582 

583 @metadata.setter 

584 def metadata(self, metadata): 

585 """ 

586 Set the metadata of the SR. 

587 :param dict metadata: Dictionary that contains metadata. 

588 """ 

589 

590 assert isinstance(metadata, dict) 

591 sr_properties = self._get_sr_properties() 

592 sr_properties[self.PROP_METADATA] = json.dumps(metadata) 

593 

594 @property 

595 def disconnected_hosts(self): 

596 """ 

597 Get the list of disconnected hosts. 

598 :return: Set that contains disconnected hosts. 

599 :rtype: set(str) 

600 """ 

601 

602 disconnected_hosts = set() 

603 for pool in self._get_storage_pools(): 

604 for report in pool.reports: 

605 if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \ 

606 linstor.consts.WARN_NOT_CONNECTED: 

607 disconnected_hosts.add(pool.node_name) 

608 break 

609 return disconnected_hosts 

610 

611 def check_volume_exists(self, volume_uuid): 

612 """ 

613 Check if a volume exists in the SR. 

614 :return: True if volume exists. 

615 :rtype: bool 

616 """ 

617 return volume_uuid in self._volumes 

618 

619 def create_volume( 

620 self, 

621 volume_uuid, 

622 size, 

623 persistent=True, 

624 volume_name=None, 

625 high_availability=False 

626 ): 

627 """ 

628 Create a new volume on the SR. 

629 :param str volume_uuid: The volume uuid to use. 

630 :param int size: volume size in B. 

631 :param bool persistent: If false the volume will be unavailable 

632 on the next constructor call LinstorSR(...). 

633 :param str volume_name: If set, this name is used in the LINSTOR 

634 database instead of a generated name. 

635 :param bool high_availability: If set, the volume is created in 

636 the HA group. 

637 :return: The current device path of the volume. 

638 :rtype: str 

639 """ 

640 

641 self._logger('Creating LINSTOR volume {}...'.format(volume_uuid)) 

642 if not volume_name: 

643 volume_name = self.build_volume_name(util.gen_uuid()) 

644 volume_properties = self._create_volume_with_properties( 

645 volume_uuid, 

646 volume_name, 

647 size, 

648 True, # place_resources 

649 high_availability 

650 ) 

651 

652 # Volume created! Now try to find the device path. 

653 try: 

654 self._logger( 

655 'Find device path of LINSTOR volume {}...'.format(volume_uuid) 

656 ) 

657 device_path = self._find_device_path(volume_uuid, volume_name) 

658 if persistent: 

659 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

660 self._volumes.add(volume_uuid) 

661 self._logger( 

662 'LINSTOR volume {} created!'.format(volume_uuid) 

663 ) 

664 return device_path 

665 except Exception: 

666 # There is an issue to find the path. 

667 # At this point the volume has just been created, so force flag can be used. 

668 self._destroy_volume(volume_uuid, force=True) 

669 raise 

670 

671 def mark_volume_as_persistent(self, volume_uuid): 

672 """ 

673 Mark volume as persistent if created with persistent=False. 

674 :param str volume_uuid: The volume uuid to mark. 

675 """ 

676 

677 self._ensure_volume_exists(volume_uuid) 

678 

679 # Mark volume as persistent. 

680 volume_properties = self._get_volume_properties(volume_uuid) 

681 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

682 

683 def destroy_volume(self, volume_uuid): 

684 """ 

685 Destroy a volume. 

686 :param str volume_uuid: The volume uuid to destroy. 

687 """ 

688 

689 self._ensure_volume_exists(volume_uuid) 

690 self.ensure_volume_is_not_locked(volume_uuid) 

691 

692 # Mark volume as destroyed. 

693 volume_properties = self._get_volume_properties(volume_uuid) 

694 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS 

695 

696 try: 

697 self._volumes.remove(volume_uuid) 

698 self._destroy_volume(volume_uuid) 

699 except Exception as e: 

700 raise LinstorVolumeManagerError( 

701 str(e), 

702 LinstorVolumeManagerError.ERR_VOLUME_DESTROY 

703 ) 

704 

705 def lock_volume(self, volume_uuid, locked=True): 

706 """ 

707 Prevent modifications of the volume properties during 

708 "self.LOCKED_EXPIRATION_DELAY" seconds. The SR must be locked 

709 when used. This method is useful to attach/detach correctly a volume on 

710 a slave. Without it the GC can rename a volume, in this case the old 

711 volume path can be used by a slave... 

712 :param str volume_uuid: The volume uuid to protect/unprotect. 

713 :param bool locked: Lock/unlock the volume. 

714 """ 

715 

716 self._ensure_volume_exists(volume_uuid) 

717 

718 self._logger( 

719 '{} volume {} as locked'.format( 

720 'Mark' if locked else 'Unmark', 

721 volume_uuid 

722 ) 

723 ) 

724 

725 volume_properties = self._get_volume_properties(volume_uuid) 

726 if locked: 

727 volume_properties[ 

728 self.PROP_IS_READONLY_TIMESTAMP 

729 ] = str(time.time()) 

730 elif self.PROP_IS_READONLY_TIMESTAMP in volume_properties: 

731 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP) 

732 

733 def ensure_volume_is_not_locked(self, volume_uuid, timeout=None): 

734 """ 

735 Ensure a volume is not locked. Wait if necessary. 

736 :param str volume_uuid: The volume uuid to check. 

737 :param int timeout: If the volume is always locked after the expiration 

738 of the timeout, an exception is thrown. 

739 """ 

740 return self.ensure_volume_list_is_not_locked([volume_uuid], timeout) 

741 

742 def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None): 

743 checked = set() 

744 for volume_uuid in volume_uuids: 

745 if volume_uuid in self._volumes: 

746 checked.add(volume_uuid) 

747 

748 if not checked: 

749 return 

750 

751 waiting = False 

752 

753 volume_properties = self._get_kv_cache() 

754 

755 start = time.time() 

756 while True: 

757 # Can't delete in for loop, use a copy of the list. 

758 remaining = checked.copy() 

759 for volume_uuid in checked: 

760 volume_properties.namespace = \ 

761 self._build_volume_namespace(volume_uuid) 

762 timestamp = volume_properties.get( 

763 self.PROP_IS_READONLY_TIMESTAMP 

764 ) 

765 if timestamp is None: 

766 remaining.remove(volume_uuid) 

767 continue 

768 

769 now = time.time() 

770 if now - float(timestamp) > self.LOCKED_EXPIRATION_DELAY: 

771 self._logger( 

772 'Remove readonly timestamp on {}'.format(volume_uuid) 

773 ) 

774 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP) 

775 remaining.remove(volume_uuid) 

776 continue 

777 

778 if not waiting: 

779 self._logger( 

780 'Volume {} is locked, waiting...'.format(volume_uuid) 

781 ) 

782 waiting = True 

783 break 

784 

785 if not remaining: 

786 break 

787 checked = remaining 

788 

789 if timeout is not None and now - start > timeout: 

790 raise LinstorVolumeManagerError( 

791 'volume `{}` is locked and timeout has been reached' 

792 .format(volume_uuid), 

793 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS 

794 ) 

795 

796 # We must wait to use the volume. After that we can modify it 

797 # ONLY if the SR is locked to avoid bad reads on the slaves. 

798 time.sleep(1) 

799 volume_properties = self._create_kv_cache() 

800 

801 if waiting: 

802 self._logger('No volume locked now!') 

803 

804 def remove_volume_if_diskless(self, volume_uuid): 

805 """ 

806 Remove disless path from local node. 

807 :param str volume_uuid: The volume uuid to remove. 

808 """ 

809 

810 self._ensure_volume_exists(volume_uuid) 

811 

812 volume_properties = self._get_volume_properties(volume_uuid) 

813 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

814 

815 node_name = socket.gethostname() 

816 

817 for resource in self._get_resource_cache().resources: 

818 if resource.name == volume_name and resource.node_name == node_name: 

819 if linstor.consts.FLAG_TIE_BREAKER in resource.flags: 

820 return 

821 break 

822 

823 result = self._linstor.resource_delete_if_diskless( 

824 node_name=node_name, rsc_name=volume_name 

825 ) 

826 if not linstor.Linstor.all_api_responses_no_error(result): 

827 raise LinstorVolumeManagerError( 

828 'Unable to delete diskless path of `{}` on node `{}`: {}' 

829 .format(volume_name, node_name, ', '.join( 

830 [str(x) for x in result])) 

831 ) 

832 

833 def introduce_volume(self, volume_uuid): 

834 pass # TODO: Implement me. 

835 

836 def resize_volume(self, volume_uuid, new_size): 

837 """ 

838 Resize a volume. 

839 :param str volume_uuid: The volume uuid to resize. 

840 :param int new_size: New size in B. 

841 """ 

842 

843 volume_name = self.get_volume_name(volume_uuid) 

844 self.ensure_volume_is_not_locked(volume_uuid) 

845 new_size = self.round_up_volume_size(new_size) // 1024 

846 

847 retry_count = 30 

848 while True: 

849 result = self._linstor.volume_dfn_modify( 

850 rsc_name=volume_name, 

851 volume_nr=0, 

852 size=new_size 

853 ) 

854 

855 self._mark_resource_cache_as_dirty() 

856 

857 error_str = self._get_error_str(result) 

858 if not error_str: 

859 break 

860 

861 # After volume creation, DRBD volume can be unusable during many seconds. 

862 # So we must retry the definition change if the device is not up to date. 

863 # Often the case for thick provisioning. 

864 if retry_count and error_str.find('non-UpToDate DRBD device') >= 0: 

865 time.sleep(2) 

866 retry_count -= 1 

867 continue 

868 

869 raise LinstorVolumeManagerError( 

870 'Could not resize volume `{}` from SR `{}`: {}' 

871 .format(volume_uuid, self._group_name, error_str) 

872 ) 

873 

874 def get_volume_name(self, volume_uuid): 

875 """ 

876 Get the name of a particular volume. 

877 :param str volume_uuid: The volume uuid of the name to get. 

878 :return: The volume name. 

879 :rtype: str 

880 """ 

881 

882 self._ensure_volume_exists(volume_uuid) 

883 volume_properties = self._get_volume_properties(volume_uuid) 

884 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

885 if volume_name: 

886 return volume_name 

887 raise LinstorVolumeManagerError( 

888 'Failed to get volume name of {}'.format(volume_uuid) 

889 ) 

890 

891 def get_volume_size(self, volume_uuid): 

892 """ 

893 Get the size of a particular volume. 

894 :param str volume_uuid: The volume uuid of the size to get. 

895 :return: The volume size. 

896 :rtype: int 

897 """ 

898 

899 volume_name = self.get_volume_name(volume_uuid) 

900 dfns = self._linstor.resource_dfn_list_raise( 

901 query_volume_definitions=True, 

902 filter_by_resource_definitions=[volume_name] 

903 ).resource_definitions 

904 

905 size = dfns[0].volume_definitions[0].size 

906 if size < 0: 

907 raise LinstorVolumeManagerError( 

908 'Failed to get volume size of: {}'.format(volume_uuid) 

909 ) 

910 return size * 1024 

911 

912 def set_auto_promote_timeout(self, volume_uuid, timeout): 

913 """ 

914 Define the blocking time of open calls when a DRBD 

915 is already open on another host. 

916 :param str volume_uuid: The volume uuid to modify. 

917 """ 

918 

919 volume_name = self.get_volume_name(volume_uuid) 

920 result = self._linstor.resource_dfn_modify(volume_name, { 

921 'DrbdOptions/Resource/auto-promote-timeout': timeout 

922 }) 

923 error_str = self._get_error_str(result) 

924 if error_str: 

925 raise LinstorVolumeManagerError( 

926 'Could not change the auto promote timeout of `{}`: {}' 

927 .format(volume_uuid, error_str) 

928 ) 

929 

930 def get_volume_info(self, volume_uuid): 

931 """ 

932 Get the volume info of a particular volume. 

933 :param str volume_uuid: The volume uuid of the volume info to get. 

934 :return: The volume info. 

935 :rtype: VolumeInfo 

936 """ 

937 

938 volume_name = self.get_volume_name(volume_uuid) 

939 return self._get_volumes_info()[volume_name] 

940 

941 def get_device_path(self, volume_uuid): 

942 """ 

943 Get the dev path of a volume, create a diskless if necessary. 

944 :param str volume_uuid: The volume uuid to get the dev path. 

945 :return: The current device path of the volume. 

946 :rtype: str 

947 """ 

948 

949 volume_name = self.get_volume_name(volume_uuid) 

950 return self._find_device_path(volume_uuid, volume_name) 

951 

952 def get_volume_uuid_from_device_path(self, device_path): 

953 """ 

954 Get the volume uuid of a device_path. 

955 :param str device_path: The dev path to find the volume uuid. 

956 :return: The volume uuid of the local device path. 

957 :rtype: str 

958 """ 

959 

960 expected_volume_name = \ 

961 self.get_volume_name_from_device_path(device_path) 

962 

963 volume_names = self.get_volumes_with_name() 

964 for volume_uuid, volume_name in volume_names.items(): 

965 if volume_name == expected_volume_name: 

966 return volume_uuid 

967 

968 raise LinstorVolumeManagerError( 

969 'Unable to find volume uuid from dev path `{}`'.format(device_path) 

970 ) 

971 

972 def get_volume_name_from_device_path(self, device_path): 

973 """ 

974 Get the volume name of a device_path. 

975 :param str device_path: The dev path to find the volume name. 

976 :return: The volume name of the device path. 

977 :rtype: str 

978 """ 

979 

980 # Assume that we have a path like this: 

981 # - "/dev/drbd/by-res/xcp-volume-<UUID>/0" 

982 # - "../xcp-volume-<UUID>/0" 

983 if device_path.startswith(DRBD_BY_RES_PATH): 

984 prefix_len = len(DRBD_BY_RES_PATH) 

985 else: 

986 assert device_path.startswith('../') 

987 prefix_len = 3 

988 

989 res_name_end = device_path.find('/', prefix_len) 

990 assert res_name_end != -1 

991 return device_path[prefix_len:res_name_end] 

992 

993 def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False): 

994 """ 

995 Change the uuid of a volume. 

996 :param str volume_uuid: The volume to modify. 

997 :param str new_volume_uuid: The new volume uuid to use. 

998 :param bool force: If true we doesn't check if volume_uuid is in the 

999 volume list. I.e. the volume can be marked as deleted but the volume 

1000 can still be in the LINSTOR KV store if the deletion has failed. 

1001 In specific cases like "undo" after a failed clone we must rename a bad 

1002 deleted VDI. 

1003 """ 

1004 

1005 self._logger( 

1006 'Trying to update volume UUID {} to {}...' 

1007 .format(volume_uuid, new_volume_uuid) 

1008 ) 

1009 assert volume_uuid != new_volume_uuid, 'can\'t update volume UUID, same value' 

1010 

1011 if not force: 

1012 self._ensure_volume_exists(volume_uuid) 

1013 self.ensure_volume_is_not_locked(volume_uuid) 

1014 

1015 if new_volume_uuid in self._volumes: 

1016 raise LinstorVolumeManagerError( 

1017 'Volume `{}` already exists'.format(new_volume_uuid), 

1018 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

1019 ) 

1020 

1021 volume_properties = self._get_volume_properties(volume_uuid) 

1022 if volume_properties.get(self.PROP_UPDATING_UUID_SRC): 

1023 raise LinstorVolumeManagerError( 

1024 'Cannot update volume uuid {}: invalid state' 

1025 .format(volume_uuid) 

1026 ) 

1027 

1028 # 1. Copy in temp variables metadata and volume_name. 

1029 metadata = volume_properties.get(self.PROP_METADATA) 

1030 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

1031 

1032 # 2. Switch to new volume namespace. 

1033 volume_properties.namespace = self._build_volume_namespace( 

1034 new_volume_uuid 

1035 ) 

1036 

1037 if list(volume_properties.items()): 

1038 raise LinstorVolumeManagerError( 

1039 'Cannot update volume uuid {} to {}: ' 

1040 .format(volume_uuid, new_volume_uuid) + 

1041 'this last one is not empty' 

1042 ) 

1043 

1044 try: 

1045 # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC. 

1046 # If we crash after that, the new properties can be removed 

1047 # properly. 

1048 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS 

1049 volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid 

1050 

1051 # 4. Copy the properties. 

1052 # Note: On new volumes, during clone for example, the metadata 

1053 # may be missing. So we must test it to avoid this error: 

1054 # "None has to be a str/unicode, but is <type 'NoneType'>" 

1055 if metadata: 

1056 volume_properties[self.PROP_METADATA] = metadata 

1057 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

1058 

1059 # 5. Ok! 

1060 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

1061 except Exception as e: 

1062 try: 

1063 # Clear the new volume properties in case of failure. 

1064 assert volume_properties.namespace == \ 

1065 self._build_volume_namespace(new_volume_uuid) 

1066 volume_properties.clear() 

1067 except Exception as e: 

1068 self._logger( 

1069 'Failed to clear new volume properties: {} (ignoring...)' 

1070 .format(e) 

1071 ) 

1072 raise LinstorVolumeManagerError( 

1073 'Failed to copy volume properties: {}'.format(e) 

1074 ) 

1075 

1076 try: 

1077 # 6. After this point, it's ok we can remove the 

1078 # PROP_UPDATING_UUID_SRC property and clear the src properties 

1079 # without problems. 

1080 

1081 # 7. Switch to old volume namespace. 

1082 volume_properties.namespace = self._build_volume_namespace( 

1083 volume_uuid 

1084 ) 

1085 volume_properties.clear() 

1086 

1087 # 8. Switch a last time to new volume namespace. 

1088 volume_properties.namespace = self._build_volume_namespace( 

1089 new_volume_uuid 

1090 ) 

1091 volume_properties.pop(self.PROP_UPDATING_UUID_SRC) 

1092 except Exception as e: 

1093 raise LinstorVolumeManagerError( 

1094 'Failed to clear volume properties ' 

1095 'after volume uuid update: {}'.format(e) 

1096 ) 

1097 

1098 self._volumes.remove(volume_uuid) 

1099 self._volumes.add(new_volume_uuid) 

1100 

1101 self._logger( 

1102 'UUID update succeeded of {} to {}! (properties={})' 

1103 .format( 

1104 volume_uuid, new_volume_uuid, 

1105 self._get_filtered_properties(volume_properties) 

1106 ) 

1107 ) 

1108 

1109 def update_volume_name(self, volume_uuid, volume_name): 

1110 """ 

1111 Change the volume name of a volume. 

1112 :param str volume_uuid: The volume to modify. 

1113 :param str volume_name: The volume_name to use. 

1114 """ 

1115 

1116 self._ensure_volume_exists(volume_uuid) 

1117 self.ensure_volume_is_not_locked(volume_uuid) 

1118 if not volume_name.startswith(self.PREFIX_VOLUME): 

1119 raise LinstorVolumeManagerError( 

1120 'Volume name `{}` must be start with `{}`' 

1121 .format(volume_name, self.PREFIX_VOLUME) 

1122 ) 

1123 

1124 if volume_name not in self._fetch_resource_names(): 

1125 raise LinstorVolumeManagerError( 

1126 'Volume `{}` doesn\'t exist'.format(volume_name) 

1127 ) 

1128 

1129 volume_properties = self._get_volume_properties(volume_uuid) 

1130 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

1131 

1132 def get_usage_states(self, volume_uuid): 

1133 """ 

1134 Check if a volume is currently used. 

1135 :param str volume_uuid: The volume uuid to check. 

1136 :return: A dictionnary that contains states. 

1137 :rtype: dict(str, bool or None) 

1138 """ 

1139 

1140 states = {} 

1141 

1142 volume_name = self.get_volume_name(volume_uuid) 

1143 for resource_state in self._linstor.resource_list_raise( 

1144 filter_by_resources=[volume_name] 

1145 ).resource_states: 

1146 states[resource_state.node_name] = resource_state.in_use 

1147 

1148 return states 

1149 

1150 def get_volume_openers(self, volume_uuid): 

1151 """ 

1152 Get openers of a volume. 

1153 :param str volume_uuid: The volume uuid to monitor. 

1154 :return: A dictionnary that contains openers. 

1155 :rtype: dict(str, obj) 

1156 """ 

1157 return get_all_volume_openers(self.get_volume_name(volume_uuid), '0') 

1158 

1159 def get_volumes_with_name(self): 

1160 """ 

1161 Give a volume dictionnary that contains names actually owned. 

1162 :return: A volume/name dict. 

1163 :rtype: dict(str, str) 

1164 """ 

1165 return self._get_volumes_by_property(self.REG_VOLUME_NAME) 

1166 

1167 def get_volumes_with_info(self): 

1168 """ 

1169 Give a volume dictionnary that contains VolumeInfos. 

1170 :return: A volume/VolumeInfo dict. 

1171 :rtype: dict(str, VolumeInfo) 

1172 """ 

1173 

1174 volumes = {} 

1175 

1176 all_volume_info = self._get_volumes_info() 

1177 volume_names = self.get_volumes_with_name() 

1178 for volume_uuid, volume_name in volume_names.items(): 

1179 if volume_name: 

1180 volume_info = all_volume_info.get(volume_name) 

1181 if volume_info: 

1182 volumes[volume_uuid] = volume_info 

1183 continue 

1184 

1185 # Well I suppose if this volume is not available, 

1186 # LINSTOR has been used directly without using this API. 

1187 volumes[volume_uuid] = self.VolumeInfo('') 

1188 

1189 return volumes 

1190 

1191 def get_volumes_with_metadata(self): 

1192 """ 

1193 Give a volume dictionnary that contains metadata. 

1194 :return: A volume/metadata dict. 

1195 :rtype: dict(str, dict) 

1196 """ 

1197 

1198 volumes = {} 

1199 

1200 metadata = self._get_volumes_by_property(self.REG_METADATA) 

1201 for volume_uuid, volume_metadata in metadata.items(): 

1202 if volume_metadata: 

1203 volume_metadata = json.loads(volume_metadata) 

1204 if isinstance(volume_metadata, dict): 

1205 volumes[volume_uuid] = volume_metadata 

1206 continue 

1207 raise LinstorVolumeManagerError( 

1208 'Expected dictionary in volume metadata: {}' 

1209 .format(volume_uuid) 

1210 ) 

1211 

1212 volumes[volume_uuid] = {} 

1213 

1214 return volumes 

1215 

1216 def get_volume_metadata(self, volume_uuid): 

1217 """ 

1218 Get the metadata of a volume. 

1219 :return: Dictionary that contains metadata. 

1220 :rtype: dict 

1221 """ 

1222 

1223 self._ensure_volume_exists(volume_uuid) 

1224 volume_properties = self._get_volume_properties(volume_uuid) 

1225 metadata = volume_properties.get(self.PROP_METADATA) 

1226 if metadata: 

1227 metadata = json.loads(metadata) 

1228 if isinstance(metadata, dict): 

1229 return metadata 

1230 raise LinstorVolumeManagerError( 

1231 'Expected dictionary in volume metadata: {}' 

1232 .format(volume_uuid) 

1233 ) 

1234 return {} 

1235 

1236 def set_volume_metadata(self, volume_uuid, metadata): 

1237 """ 

1238 Set the metadata of a volume. 

1239 :param dict metadata: Dictionary that contains metadata. 

1240 """ 

1241 

1242 self._ensure_volume_exists(volume_uuid) 

1243 self.ensure_volume_is_not_locked(volume_uuid) 

1244 

1245 assert isinstance(metadata, dict) 

1246 volume_properties = self._get_volume_properties(volume_uuid) 

1247 volume_properties[self.PROP_METADATA] = json.dumps(metadata) 

1248 

1249 def update_volume_metadata(self, volume_uuid, metadata): 

1250 """ 

1251 Update the metadata of a volume. It modify only the given keys. 

1252 It doesn't remove unreferenced key instead of set_volume_metadata. 

1253 :param dict metadata: Dictionary that contains metadata. 

1254 """ 

1255 

1256 self._ensure_volume_exists(volume_uuid) 

1257 self.ensure_volume_is_not_locked(volume_uuid) 

1258 

1259 assert isinstance(metadata, dict) 

1260 volume_properties = self._get_volume_properties(volume_uuid) 

1261 

1262 current_metadata = json.loads( 

1263 volume_properties.get(self.PROP_METADATA, '{}') 

1264 ) 

1265 if not isinstance(metadata, dict): 

1266 raise LinstorVolumeManagerError( 

1267 'Expected dictionary in volume metadata: {}' 

1268 .format(volume_uuid) 

1269 ) 

1270 

1271 for key, value in metadata.items(): 

1272 current_metadata[key] = value 

1273 volume_properties[self.PROP_METADATA] = json.dumps(current_metadata) 

1274 

1275 def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True): 

1276 """ 

1277 Clone a volume. Do not copy the data, this method creates a new volume 

1278 with the same size. 

1279 :param str volume_uuid: The volume to clone. 

1280 :param str clone_uuid: The cloned volume. 

1281 :param bool persistent: If false the volume will be unavailable 

1282 on the next constructor call LinstorSR(...). 

1283 :return: The current device path of the cloned volume. 

1284 :rtype: str 

1285 """ 

1286 

1287 volume_name = self.get_volume_name(volume_uuid) 

1288 self.ensure_volume_is_not_locked(volume_uuid) 

1289 

1290 # 1. Find ideal nodes + size to use. 

1291 ideal_node_names, size = self._get_volume_node_names_and_size( 

1292 volume_name 

1293 ) 

1294 if size <= 0: 

1295 raise LinstorVolumeManagerError( 

1296 'Invalid size of {} for volume `{}`'.format(size, volume_name) 

1297 ) 

1298 

1299 # 2. Create clone! 

1300 return self.create_volume(clone_uuid, size, persistent) 

1301 

1302 def remove_resourceless_volumes(self): 

1303 """ 

1304 Remove all volumes without valid or non-empty name 

1305 (i.e. without LINSTOR resource). It's different than 

1306 LinstorVolumeManager constructor that takes a `repair` param that 

1307 removes volumes with `PROP_NOT_EXISTS` to 1. 

1308 """ 

1309 

1310 resource_names = self._fetch_resource_names() 

1311 for volume_uuid, volume_name in self.get_volumes_with_name().items(): 

1312 if not volume_name or volume_name not in resource_names: 

1313 # Don't force, we can be sure of what's happening. 

1314 self.destroy_volume(volume_uuid) 

1315 

1316 def destroy(self): 

1317 """ 

1318 Destroy this SR. Object should not be used after that. 

1319 :param bool force: Try to destroy volumes before if true. 

1320 """ 

1321 

1322 # 1. Ensure volume list is empty. No cost. 

1323 if self._volumes: 

1324 raise LinstorVolumeManagerError( 

1325 'Cannot destroy LINSTOR volume manager: ' 

1326 'It exists remaining volumes' 

1327 ) 

1328 

1329 # 2. Fetch ALL resource names. 

1330 # This list may therefore contain volumes created outside 

1331 # the scope of the driver. 

1332 resource_names = self._fetch_resource_names(ignore_deleted=False) 

1333 try: 

1334 resource_names.remove(DATABASE_VOLUME_NAME) 

1335 except KeyError: 

1336 # Really strange to reach that point. 

1337 # Normally we always have the database volume in the list. 

1338 pass 

1339 

1340 # 3. Ensure the resource name list is entirely empty... 

1341 if resource_names: 

1342 raise LinstorVolumeManagerError( 

1343 'Cannot destroy LINSTOR volume manager: ' 

1344 'It exists remaining volumes (created externally or being deleted)' 

1345 ) 

1346 

1347 # 4. Destroying... 

1348 controller_is_running = self._controller_is_running() 

1349 uri = 'linstor://localhost' 

1350 try: 

1351 if controller_is_running: 

1352 self._start_controller(start=False) 

1353 

1354 # 4.1. Umount LINSTOR database. 

1355 self._mount_database_volume( 

1356 self.build_device_path(DATABASE_VOLUME_NAME), 

1357 mount=False, 

1358 force=True 

1359 ) 

1360 

1361 # 4.2. Refresh instance. 

1362 self._start_controller(start=True) 

1363 self._linstor = self._create_linstor_instance( 

1364 uri, keep_uri_unmodified=True 

1365 ) 

1366 

1367 # 4.3. Destroy database volume. 

1368 self._destroy_resource(DATABASE_VOLUME_NAME) 

1369 

1370 # 4.4. Refresh linstor connection. 

1371 # Without we get this error: 

1372 # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.." 

1373 # Because the deletion of the databse was not seen by Linstor for some reason. 

1374 # It seems a simple refresh of the Linstor connection make it aware of the deletion. 

1375 self._linstor.disconnect() 

1376 self._linstor.connect() 

1377 

1378 # 4.5. Destroy remaining drbd nodes on hosts. 

1379 # We check if there is a DRBD node on hosts that could mean blocking when destroying resource groups. 

1380 # It needs to be done locally by each host so we go through the linstor-manager plugin. 

1381 # If we don't do this sometimes, the destroy will fail when trying to destroy the resource groups with: 

1382 # "linstor-manager:destroy error: Failed to destroy SP `xcp-sr-linstor_group_thin_device` on node `r620-s2`: The specified storage pool 'xcp-sr-linstor_group_thin_device' on node 'r620-s2' can not be deleted as volumes / snapshot-volumes are still using it." 

1383 session = util.timeout_call(5, util.get_localAPI_session) 

1384 for host_ref in session.xenapi.host.get_all(): 

1385 try: 

1386 response = session.xenapi.host.call_plugin( 

1387 host_ref, 'linstor-manager', 'destroyDrbdVolumes', {'volume_group': self._group_name} 

1388 ) 

1389 except Exception as e: 

1390 util.SMlog('Calling destroyDrbdVolumes on host {} failed with error {}'.format(host_ref, e)) 

1391 

1392 # 4.6. Destroy group and storage pools. 

1393 self._destroy_resource_group(self._linstor, self._group_name) 

1394 self._destroy_resource_group(self._linstor, self._ha_group_name) 

1395 for pool in self._get_storage_pools(force=True): 

1396 self._destroy_storage_pool( 

1397 self._linstor, pool.name, pool.node_name 

1398 ) 

1399 except Exception as e: 

1400 self._start_controller(start=controller_is_running) 

1401 raise e 

1402 

1403 try: 

1404 self._start_controller(start=False) 

1405 for file in os.listdir(DATABASE_PATH): 

1406 if file != 'lost+found': 

1407 os.remove(DATABASE_PATH + '/' + file) 

1408 except Exception as e: 

1409 util.SMlog( 

1410 'Ignoring failure after LINSTOR SR destruction: {}' 

1411 .format(e) 

1412 ) 

1413 

1414 def find_up_to_date_diskful_nodes(self, volume_uuid): 

1415 """ 

1416 Find all nodes that contain a specific volume using diskful disks. 

1417 The disk must be up to data to be used. 

1418 :param str volume_uuid: The volume to use. 

1419 :return: The available nodes. 

1420 :rtype: tuple(set(str), str) 

1421 """ 

1422 

1423 volume_name = self.get_volume_name(volume_uuid) 

1424 

1425 in_use_by = None 

1426 node_names = set() 

1427 

1428 resource_states = filter( 

1429 lambda resource_state: resource_state.name == volume_name, 

1430 self._get_resource_cache().resource_states 

1431 ) 

1432 

1433 for resource_state in resource_states: 

1434 volume_state = resource_state.volume_states[0] 

1435 if volume_state.disk_state == 'UpToDate': 

1436 node_names.add(resource_state.node_name) 

1437 if resource_state.in_use: 

1438 in_use_by = resource_state.node_name 

1439 

1440 return (node_names, in_use_by) 

1441 

1442 def invalidate_resource_cache(self): 

1443 """ 

1444 If resources are impacted by external commands like vhdutil, 

1445 it's necessary to call this function to invalidate current resource 

1446 cache. 

1447 """ 

1448 self._mark_resource_cache_as_dirty() 

1449 

1450 def has_node(self, node_name): 

1451 """ 

1452 Check if a node exists in the LINSTOR database. 

1453 :rtype: bool 

1454 """ 

1455 result = self._linstor.node_list() 

1456 error_str = self._get_error_str(result) 

1457 if error_str: 

1458 raise LinstorVolumeManagerError( 

1459 'Failed to list nodes using `{}`: {}' 

1460 .format(node_name, error_str) 

1461 ) 

1462 return bool(result[0].node(node_name)) 

1463 

1464 def create_node(self, node_name, ip): 

1465 """ 

1466 Create a new node in the LINSTOR database. 

1467 :param str node_name: Node name to use. 

1468 :param str ip: Host IP to communicate. 

1469 """ 

1470 result = self._linstor.node_create( 

1471 node_name, 

1472 linstor.consts.VAL_NODE_TYPE_CMBD, 

1473 ip 

1474 ) 

1475 errors = self._filter_errors(result) 

1476 if errors: 

1477 error_str = self._get_error_str(errors) 

1478 raise LinstorVolumeManagerError( 

1479 'Failed to create node `{}`: {}'.format(node_name, error_str) 

1480 ) 

1481 

1482 def destroy_node(self, node_name): 

1483 """ 

1484 Destroy a node in the LINSTOR database. 

1485 :param str node_name: Node name to remove. 

1486 """ 

1487 result = self._linstor.node_delete(node_name) 

1488 errors = self._filter_errors(result) 

1489 if errors: 

1490 error_str = self._get_error_str(errors) 

1491 raise LinstorVolumeManagerError( 

1492 'Failed to destroy node `{}`: {}'.format(node_name, error_str) 

1493 ) 

1494 

1495 def create_node_interface(self, node_name, name, ip): 

1496 """ 

1497 Create a new node interface in the LINSTOR database. 

1498 :param str node_name: Node name of the interface to use. 

1499 :param str name: Interface to create. 

1500 :param str ip: IP of the interface. 

1501 """ 

1502 result = self._linstor.netinterface_create(node_name, name, ip) 

1503 errors = self._filter_errors(result) 

1504 if errors: 

1505 error_str = self._get_error_str(errors) 

1506 raise LinstorVolumeManagerError( 

1507 'Failed to create node interface on `{}`: {}'.format(node_name, error_str) 

1508 ) 

1509 

1510 def destroy_node_interface(self, node_name, name): 

1511 """ 

1512 Destroy a node interface in the LINSTOR database. 

1513 :param str node_name: Node name of the interface to remove. 

1514 :param str name: Interface to remove. 

1515 """ 

1516 

1517 if name == 'default': 

1518 raise LinstorVolumeManagerError( 

1519 'Unable to delete the default interface of a node!' 

1520 ) 

1521 

1522 result = self._linstor.netinterface_delete(node_name, name) 

1523 errors = self._filter_errors(result) 

1524 if errors: 

1525 error_str = self._get_error_str(errors) 

1526 raise LinstorVolumeManagerError( 

1527 'Failed to destroy node interface on `{}`: {}'.format(node_name, error_str) 

1528 ) 

1529 

1530 def modify_node_interface(self, node_name, name, ip): 

1531 """ 

1532 Modify a node interface in the LINSTOR database. Create it if necessary. 

1533 :param str node_name: Node name of the interface to use. 

1534 :param str name: Interface to modify or create. 

1535 :param str ip: IP of the interface. 

1536 """ 

1537 result = self._linstor.netinterface_create(node_name, name, ip) 

1538 errors = self._filter_errors(result) 

1539 if not errors: 

1540 return 

1541 

1542 if self._check_errors(errors, [linstor.consts.FAIL_EXISTS_NET_IF]): 

1543 result = self._linstor.netinterface_modify(node_name, name, ip) 

1544 errors = self._filter_errors(result) 

1545 if not errors: 

1546 return 

1547 

1548 error_str = self._get_error_str(errors) 

1549 raise LinstorVolumeManagerError( 

1550 'Unable to modify interface on `{}`: {}'.format(node_name, error_str) 

1551 ) 

1552 

1553 def list_node_interfaces(self, node_name): 

1554 """ 

1555 List all node interfaces. 

1556 :param str node_name: Node name to use to list interfaces. 

1557 :rtype: list 

1558 : 

1559 """ 

1560 result = self._linstor.net_interface_list(node_name) 

1561 if not result: 

1562 raise LinstorVolumeManagerError( 

1563 'Unable to list interfaces on `{}`: no list received'.format(node_name) 

1564 ) 

1565 

1566 interfaces = {} 

1567 for interface in result: 

1568 interface = interface._rest_data 

1569 interfaces[interface['name']] = { 

1570 'address': interface['address'], 

1571 'active': interface['is_active'] 

1572 } 

1573 return interfaces 

1574 

1575 def get_node_preferred_interface(self, node_name): 

1576 """ 

1577 Get the preferred interface used by a node. 

1578 :param str node_name: Node name of the interface to get. 

1579 :rtype: str 

1580 """ 

1581 try: 

1582 nodes = self._linstor.node_list_raise([node_name]).nodes 

1583 if nodes: 

1584 properties = nodes[0].props 

1585 return properties.get('PrefNic', 'default') 

1586 return nodes 

1587 except Exception as e: 

1588 raise LinstorVolumeManagerError( 

1589 'Failed to get preferred interface: `{}`'.format(e) 

1590 ) 

1591 

1592 def set_node_preferred_interface(self, node_name, name): 

1593 """ 

1594 Set the preferred interface to use on a node. 

1595 :param str node_name: Node name of the interface. 

1596 :param str name: Preferred interface to use. 

1597 """ 

1598 result = self._linstor.node_modify(node_name, property_dict={'PrefNic': name}) 

1599 errors = self._filter_errors(result) 

1600 if errors: 

1601 error_str = self._get_error_str(errors) 

1602 raise LinstorVolumeManagerError( 

1603 'Failed to set preferred node interface on `{}`: {}'.format(node_name, error_str) 

1604 ) 

1605 

1606 def get_nodes_info(self): 

1607 """ 

1608 Get all nodes + statuses, used or not by the pool. 

1609 :rtype: dict(str, dict) 

1610 """ 

1611 try: 

1612 nodes = {} 

1613 for node in self._linstor.node_list_raise().nodes: 

1614 nodes[node.name] = node.connection_status 

1615 return nodes 

1616 except Exception as e: 

1617 raise LinstorVolumeManagerError( 

1618 'Failed to get all nodes: `{}`'.format(e) 

1619 ) 

1620 

1621 def get_storage_pools_info(self): 

1622 """ 

1623 Give all storage pools of current group name. 

1624 :rtype: dict(str, list) 

1625 """ 

1626 storage_pools = {} 

1627 for pool in self._get_storage_pools(force=True): 

1628 if pool.node_name not in storage_pools: 

1629 storage_pools[pool.node_name] = [] 

1630 

1631 size = -1 

1632 capacity = -1 

1633 

1634 space = pool.free_space 

1635 if space: 

1636 size = space.free_capacity 

1637 if size < 0: 

1638 size = -1 

1639 else: 

1640 size *= 1024 

1641 capacity = space.total_capacity 

1642 if capacity <= 0: 

1643 capacity = -1 

1644 else: 

1645 capacity *= 1024 

1646 

1647 storage_pools[pool.node_name].append({ 

1648 'name': pool.name, 

1649 'linstor-uuid': pool.uuid, 

1650 'free-size': size, 

1651 'capacity': capacity 

1652 }) 

1653 

1654 return storage_pools 

1655 

1656 def get_resources_info(self): 

1657 """ 

1658 Give all resources of current group name. 

1659 :rtype: dict(str, list) 

1660 """ 

1661 resources = {} 

1662 resource_list = self._get_resource_cache() 

1663 volume_names = self.get_volumes_with_name() 

1664 for resource in resource_list.resources: 

1665 if resource.name not in resources: 

1666 resources[resource.name] = { 'nodes': {}, 'uuid': '' } 

1667 resource_nodes = resources[resource.name]['nodes'] 

1668 

1669 resource_nodes[resource.node_name] = { 

1670 'volumes': [], 

1671 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags, 

1672 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags 

1673 } 

1674 resource_volumes = resource_nodes[resource.node_name]['volumes'] 

1675 

1676 for volume in resource.volumes: 

1677 # We ignore diskless pools of the form "DfltDisklessStorPool". 

1678 if volume.storage_pool_name != self._group_name: 

1679 continue 

1680 

1681 usable_size = volume.usable_size 

1682 if usable_size < 0: 

1683 usable_size = -1 

1684 else: 

1685 usable_size *= 1024 

1686 

1687 allocated_size = volume.allocated_size 

1688 if allocated_size < 0: 

1689 allocated_size = -1 

1690 else: 

1691 allocated_size *= 1024 

1692 

1693 resource_volumes.append({ 

1694 'storage-pool-name': volume.storage_pool_name, 

1695 'linstor-uuid': volume.uuid, 

1696 'number': volume.number, 

1697 'device-path': volume.device_path, 

1698 'usable-size': usable_size, 

1699 'allocated-size': allocated_size 

1700 }) 

1701 

1702 for resource_state in resource_list.resource_states: 

1703 resource = resources[resource_state.rsc_name]['nodes'][resource_state.node_name] 

1704 resource['in-use'] = resource_state.in_use 

1705 

1706 volumes = resource['volumes'] 

1707 for volume_state in resource_state.volume_states: 

1708 volume = next((x for x in volumes if x['number'] == volume_state.number), None) 

1709 if volume: 

1710 volume['disk-state'] = volume_state.disk_state 

1711 

1712 for volume_uuid, volume_name in volume_names.items(): 

1713 resource = resources.get(volume_name) 

1714 if resource: 

1715 resource['uuid'] = volume_uuid 

1716 

1717 return resources 

1718 

1719 def get_database_path(self): 

1720 """ 

1721 Get the database path. 

1722 :return: The current database path. 

1723 :rtype: str 

1724 """ 

1725 return self._request_database_path(self._linstor) 

1726 

1727 @classmethod 

1728 def get_all_group_names(cls, base_name): 

1729 """ 

1730 Get all group names. I.e. list of current group + HA. 

1731 :param str base_name: The SR group_name to use. 

1732 :return: List of group names. 

1733 :rtype: list 

1734 """ 

1735 return [cls._build_group_name(base_name), cls._build_ha_group_name(base_name)] 

1736 

1737 @classmethod 

1738 def create_sr( 

1739 cls, group_name, ips, redundancy, 

1740 thin_provisioning, auto_quorum, 

1741 logger=default_logger.__func__ 

1742 ): 

1743 """ 

1744 Create a new SR on the given nodes. 

1745 :param str group_name: The SR group_name to use. 

1746 :param set(str) ips: Node ips. 

1747 :param int redundancy: How many copy of volumes should we store? 

1748 :param bool thin_provisioning: Use thin or thick provisioning. 

1749 :param bool auto_quorum: DB quorum is monitored by LINSTOR. 

1750 :param function logger: Function to log messages. 

1751 :return: A new LinstorSr instance. 

1752 :rtype: LinstorSr 

1753 """ 

1754 

1755 try: 

1756 cls._start_controller(start=True) 

1757 sr = cls._create_sr( 

1758 group_name, 

1759 ips, 

1760 redundancy, 

1761 thin_provisioning, 

1762 auto_quorum, 

1763 logger 

1764 ) 

1765 finally: 

1766 # Controller must be stopped and volume unmounted because 

1767 # it is the role of the drbd-reactor daemon to do the right 

1768 # actions. 

1769 cls._start_controller(start=False) 

1770 cls._mount_volume( 

1771 cls.build_device_path(DATABASE_VOLUME_NAME), 

1772 DATABASE_PATH, 

1773 mount=False 

1774 ) 

1775 return sr 

1776 

1777 @classmethod 

1778 def _create_sr( 

1779 cls, group_name, ips, redundancy, 

1780 thin_provisioning, auto_quorum, 

1781 logger=default_logger.__func__ 

1782 ): 

1783 # 1. Check if SR already exists. 

1784 uri = 'linstor://localhost' 

1785 

1786 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) 

1787 

1788 node_names = list(ips.keys()) 

1789 for node_name, ip in ips.items(): 

1790 while True: 

1791 # Try to create node. 

1792 result = lin.node_create( 

1793 node_name, 

1794 linstor.consts.VAL_NODE_TYPE_CMBD, 

1795 ip 

1796 ) 

1797 

1798 errors = cls._filter_errors(result) 

1799 if cls._check_errors( 

1800 errors, [linstor.consts.FAIL_EXISTS_NODE] 

1801 ): 

1802 # If it already exists, remove, then recreate. 

1803 result = lin.node_delete(node_name) 

1804 error_str = cls._get_error_str(result) 

1805 if error_str: 

1806 raise LinstorVolumeManagerError( 

1807 'Failed to remove old node `{}`: {}' 

1808 .format(node_name, error_str) 

1809 ) 

1810 elif not errors: 

1811 break # Created! 

1812 else: 

1813 raise LinstorVolumeManagerError( 

1814 'Failed to create node `{}` with ip `{}`: {}'.format( 

1815 node_name, ip, cls._get_error_str(errors) 

1816 ) 

1817 ) 

1818 

1819 driver_pool_name = group_name 

1820 base_group_name = group_name 

1821 group_name = cls._build_group_name(group_name) 

1822 storage_pool_name = group_name 

1823 pools = lin.storage_pool_list_raise(filter_by_stor_pools=[storage_pool_name]).storage_pools 

1824 if pools: 

1825 existing_node_names = [pool.node_name for pool in pools] 

1826 raise LinstorVolumeManagerError( 

1827 'Unable to create SR `{}`. It already exists on node(s): {}' 

1828 .format(group_name, existing_node_names) 

1829 ) 

1830 

1831 if lin.resource_group_list_raise( 

1832 cls.get_all_group_names(base_group_name) 

1833 ).resource_groups: 

1834 if not lin.resource_dfn_list_raise().resource_definitions: 

1835 backup_path = cls._create_database_backup_path() 

1836 logger( 

1837 'Group name already exists `{}` without LVs. ' 

1838 'Ignoring and moving the config files in {}'.format(group_name, backup_path) 

1839 ) 

1840 cls._move_files(DATABASE_PATH, backup_path) 

1841 else: 

1842 raise LinstorVolumeManagerError( 

1843 'Unable to create SR `{}`: The group name already exists' 

1844 .format(group_name) 

1845 ) 

1846 

1847 if thin_provisioning: 

1848 driver_pool_parts = driver_pool_name.split('/') 

1849 if not len(driver_pool_parts) == 2: 

1850 raise LinstorVolumeManagerError( 

1851 'Invalid group name using thin provisioning. ' 

1852 'Expected format: \'VG/LV`\'' 

1853 ) 

1854 

1855 # 2. Create storage pool on each node + resource group. 

1856 reg_volume_group_not_found = re.compile( 

1857 ".*Volume group '.*' not found$" 

1858 ) 

1859 

1860 i = 0 

1861 try: 

1862 # 2.a. Create storage pools. 

1863 storage_pool_count = 0 

1864 while i < len(node_names): 

1865 node_name = node_names[i] 

1866 

1867 result = lin.storage_pool_create( 

1868 node_name=node_name, 

1869 storage_pool_name=storage_pool_name, 

1870 storage_driver='LVM_THIN' if thin_provisioning else 'LVM', 

1871 driver_pool_name=driver_pool_name 

1872 ) 

1873 

1874 errors = linstor.Linstor.filter_api_call_response_errors( 

1875 result 

1876 ) 

1877 if errors: 

1878 if len(errors) == 1 and errors[0].is_error( 

1879 linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR 

1880 ) and reg_volume_group_not_found.match(errors[0].message): 

1881 logger( 

1882 'Volume group `{}` not found on `{}`. Ignoring...' 

1883 .format(group_name, node_name) 

1884 ) 

1885 cls._destroy_storage_pool(lin, storage_pool_name, node_name) 

1886 else: 

1887 error_str = cls._get_error_str(result) 

1888 raise LinstorVolumeManagerError( 

1889 'Could not create SP `{}` on node `{}`: {}' 

1890 .format(group_name, node_name, error_str) 

1891 ) 

1892 else: 

1893 storage_pool_count += 1 

1894 i += 1 

1895 

1896 if not storage_pool_count: 

1897 raise LinstorVolumeManagerError( 

1898 'Unable to create SR `{}`: No VG group found'.format( 

1899 group_name, 

1900 ) 

1901 ) 

1902 

1903 # 2.b. Create resource groups. 

1904 ha_group_name = cls._build_ha_group_name(base_group_name) 

1905 cls._create_resource_group( 

1906 lin, 

1907 group_name, 

1908 storage_pool_name, 

1909 redundancy, 

1910 True 

1911 ) 

1912 cls._create_resource_group( 

1913 lin, 

1914 ha_group_name, 

1915 storage_pool_name, 

1916 3, 

1917 True 

1918 ) 

1919 

1920 # 3. Create the LINSTOR database volume and mount it. 

1921 try: 

1922 logger('Creating database volume...') 

1923 volume_path = cls._create_database_volume( 

1924 lin, ha_group_name, storage_pool_name, node_names, redundancy, auto_quorum 

1925 ) 

1926 except LinstorVolumeManagerError as e: 

1927 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

1928 logger('Destroying database volume after creation fail...') 

1929 cls._force_destroy_database_volume(lin, group_name) 

1930 raise 

1931 

1932 try: 

1933 logger('Mounting database volume...') 

1934 

1935 # First we must disable the controller to move safely the 

1936 # LINSTOR config. 

1937 cls._start_controller(start=False) 

1938 

1939 cls._mount_database_volume(volume_path) 

1940 except Exception as e: 

1941 # Ensure we are connected because controller has been 

1942 # restarted during mount call. 

1943 logger('Destroying database volume after mount fail...') 

1944 

1945 try: 

1946 cls._start_controller(start=True) 

1947 except Exception: 

1948 pass 

1949 

1950 lin = cls._create_linstor_instance( 

1951 uri, keep_uri_unmodified=True 

1952 ) 

1953 cls._force_destroy_database_volume(lin, group_name) 

1954 raise e 

1955 

1956 cls._start_controller(start=True) 

1957 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) 

1958 

1959 # 4. Remove storage pools/resource/volume group in the case of errors. 

1960 except Exception as e: 

1961 logger('Destroying resource group and storage pools after fail...') 

1962 try: 

1963 cls._destroy_resource_group(lin, group_name) 

1964 cls._destroy_resource_group(lin, ha_group_name) 

1965 except Exception as e2: 

1966 logger('Failed to destroy resource group: {}'.format(e2)) 

1967 pass 

1968 j = 0 

1969 i = min(i, len(node_names) - 1) 

1970 while j <= i: 

1971 try: 

1972 cls._destroy_storage_pool(lin, storage_pool_name, node_names[j]) 

1973 except Exception as e2: 

1974 logger('Failed to destroy resource group: {}'.format(e2)) 

1975 pass 

1976 j += 1 

1977 raise e 

1978 

1979 # 5. Return new instance. 

1980 instance = cls.__new__(cls) 

1981 instance._linstor = lin 

1982 instance._logger = logger 

1983 instance._redundancy = redundancy 

1984 instance._base_group_name = base_group_name 

1985 instance._group_name = group_name 

1986 instance._volumes = set() 

1987 instance._storage_pools_time = 0 

1988 instance._kv_cache = instance._create_kv_cache() 

1989 instance._resource_cache = None 

1990 instance._resource_cache_dirty = True 

1991 instance._volume_info_cache = None 

1992 instance._volume_info_cache_dirty = True 

1993 return instance 

1994 

1995 @classmethod 

1996 def build_device_path(cls, volume_name): 

1997 """ 

1998 Build a device path given a volume name. 

1999 :param str volume_name: The volume name to use. 

2000 :return: A valid or not device path. 

2001 :rtype: str 

2002 """ 

2003 

2004 return '{}{}/0'.format(cls.DEV_ROOT_PATH, volume_name) 

2005 

2006 @classmethod 

2007 def build_volume_name(cls, base_name): 

2008 """ 

2009 Build a volume name given a base name (i.e. a UUID). 

2010 :param str base_name: The volume name to use. 

2011 :return: A valid or not device path. 

2012 :rtype: str 

2013 """ 

2014 return '{}{}'.format(cls.PREFIX_VOLUME, base_name) 

2015 

2016 @classmethod 

2017 def round_up_volume_size(cls, volume_size): 

2018 """ 

2019 Align volume size on higher multiple of BLOCK_SIZE. 

2020 :param int volume_size: The volume size to align. 

2021 :return: An aligned volume size. 

2022 :rtype: int 

2023 """ 

2024 return round_up(volume_size, cls.BLOCK_SIZE) 

2025 

2026 @classmethod 

2027 def round_down_volume_size(cls, volume_size): 

2028 """ 

2029 Align volume size on lower multiple of BLOCK_SIZE. 

2030 :param int volume_size: The volume size to align. 

2031 :return: An aligned volume size. 

2032 :rtype: int 

2033 """ 

2034 return round_down(volume_size, cls.BLOCK_SIZE) 

2035 

2036 # -------------------------------------------------------------------------- 

2037 # Private helpers. 

2038 # -------------------------------------------------------------------------- 

2039 

2040 def _create_kv_cache(self): 

2041 self._kv_cache = self._create_linstor_kv('/') 

2042 self._kv_cache_dirty = False 

2043 return self._kv_cache 

2044 

2045 def _get_kv_cache(self): 

2046 if self._kv_cache_dirty: 

2047 self._kv_cache = self._create_kv_cache() 

2048 return self._kv_cache 

2049 

2050 def _create_resource_cache(self): 

2051 self._resource_cache = self._linstor.resource_list_raise() 

2052 self._resource_cache_dirty = False 

2053 return self._resource_cache 

2054 

2055 def _get_resource_cache(self): 

2056 if self._resource_cache_dirty: 

2057 self._resource_cache = self._create_resource_cache() 

2058 return self._resource_cache 

2059 

2060 def _mark_resource_cache_as_dirty(self): 

2061 self._resource_cache_dirty = True 

2062 self._volume_info_cache_dirty = True 

2063 

2064 # -------------------------------------------------------------------------- 

2065 

2066 def _ensure_volume_exists(self, volume_uuid): 

2067 if volume_uuid not in self._volumes: 

2068 raise LinstorVolumeManagerError( 

2069 'volume `{}` doesn\'t exist'.format(volume_uuid), 

2070 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS 

2071 ) 

2072 

2073 def _find_best_size_candidates(self): 

2074 result = self._linstor.resource_group_qmvs(self._group_name) 

2075 error_str = self._get_error_str(result) 

2076 if error_str: 

2077 raise LinstorVolumeManagerError( 

2078 'Failed to get max volume size allowed of SR `{}`: {}'.format( 

2079 self._group_name, 

2080 error_str 

2081 ) 

2082 ) 

2083 return result[0].candidates 

2084 

2085 def _fetch_resource_names(self, ignore_deleted=True): 

2086 resource_names = set() 

2087 dfns = self._linstor.resource_dfn_list_raise().resource_definitions 

2088 for dfn in dfns: 

2089 if dfn.resource_group_name in self.get_all_group_names(self._base_group_name) and ( 

2090 ignore_deleted or 

2091 linstor.consts.FLAG_DELETE not in dfn.flags 

2092 ): 

2093 resource_names.add(dfn.name) 

2094 return resource_names 

2095 

2096 def _get_volumes_info(self, volume_name=None): 

2097 all_volume_info = {} 

2098 

2099 if not self._volume_info_cache_dirty: 

2100 return self._volume_info_cache 

2101 

2102 for resource in self._get_resource_cache().resources: 

2103 if resource.name not in all_volume_info: 

2104 current = all_volume_info[resource.name] = self.VolumeInfo( 

2105 resource.name 

2106 ) 

2107 else: 

2108 current = all_volume_info[resource.name] 

2109 

2110 if linstor.consts.FLAG_DISKLESS not in resource.flags: 

2111 current.diskful.append(resource.node_name) 

2112 

2113 for volume in resource.volumes: 

2114 # We ignore diskless pools of the form "DfltDisklessStorPool". 

2115 if volume.storage_pool_name == self._group_name: 

2116 if volume.allocated_size < 0: 

2117 raise LinstorVolumeManagerError( 

2118 'Failed to get allocated size of `{}` on `{}`' 

2119 .format(resource.name, volume.storage_pool_name) 

2120 ) 

2121 allocated_size = volume.allocated_size 

2122 

2123 current.allocated_size = current.allocated_size and \ 

2124 max(current.allocated_size, allocated_size) or \ 

2125 allocated_size 

2126 

2127 usable_size = volume.usable_size 

2128 if usable_size > 0 and ( 

2129 usable_size < current.virtual_size or 

2130 not current.virtual_size 

2131 ): 

2132 current.virtual_size = usable_size 

2133 

2134 if current.virtual_size <= 0: 

2135 raise LinstorVolumeManagerError( 

2136 'Failed to get usable size of `{}` on `{}`' 

2137 .format(resource.name, volume.storage_pool_name) 

2138 ) 

2139 

2140 for current in all_volume_info.values(): 

2141 current.allocated_size *= 1024 

2142 current.virtual_size *= 1024 

2143 

2144 self._volume_info_cache_dirty = False 

2145 self._volume_info_cache = all_volume_info 

2146 

2147 return all_volume_info 

2148 

2149 def _get_volume_node_names_and_size(self, volume_name): 

2150 node_names = set() 

2151 size = -1 

2152 for resource in self._linstor.resource_list_raise( 

2153 filter_by_resources=[volume_name] 

2154 ).resources: 

2155 for volume in resource.volumes: 

2156 # We ignore diskless pools of the form "DfltDisklessStorPool". 

2157 if volume.storage_pool_name == self._group_name: 

2158 node_names.add(resource.node_name) 

2159 

2160 current_size = volume.usable_size 

2161 if current_size < 0: 

2162 raise LinstorVolumeManagerError( 

2163 'Failed to get usable size of `{}` on `{}`' 

2164 .format(resource.name, volume.storage_pool_name) 

2165 ) 

2166 

2167 if size < 0: 

2168 size = current_size 

2169 else: 

2170 size = min(size, current_size) 

2171 

2172 return (node_names, size * 1024) 

2173 

2174 def _compute_size(self, attr): 

2175 capacity = 0 

2176 for pool in self._get_storage_pools(force=True): 

2177 space = pool.free_space 

2178 if space: 

2179 size = getattr(space, attr) 

2180 if size < 0: 

2181 raise LinstorVolumeManagerError( 

2182 'Failed to get pool {} attr of `{}`' 

2183 .format(attr, pool.node_name) 

2184 ) 

2185 capacity += size 

2186 return capacity * 1024 

2187 

2188 def _get_node_names(self): 

2189 node_names = set() 

2190 for pool in self._get_storage_pools(): 

2191 node_names.add(pool.node_name) 

2192 return node_names 

2193 

2194 def _get_storage_pools(self, force=False): 

2195 cur_time = time.time() 

2196 elsaped_time = cur_time - self._storage_pools_time 

2197 

2198 if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL: 

2199 self._storage_pools = self._linstor.storage_pool_list_raise( 

2200 filter_by_stor_pools=[self._group_name] 

2201 ).storage_pools 

2202 self._storage_pools_time = time.time() 

2203 

2204 return self._storage_pools 

2205 

2206 def _create_volume( 

2207 self, 

2208 volume_uuid, 

2209 volume_name, 

2210 size, 

2211 place_resources, 

2212 high_availability 

2213 ): 

2214 size = self.round_up_volume_size(size) 

2215 self._mark_resource_cache_as_dirty() 

2216 

2217 group_name = self._ha_group_name if high_availability else self._group_name 

2218 def create_definition(): 

2219 first_attempt = True 

2220 while True: 

2221 try: 

2222 self._check_volume_creation_errors( 

2223 self._linstor.resource_group_spawn( 

2224 rsc_grp_name=group_name, 

2225 rsc_dfn_name=volume_name, 

2226 vlm_sizes=['{}B'.format(size)], 

2227 definitions_only=True 

2228 ), 

2229 volume_uuid, 

2230 self._group_name 

2231 ) 

2232 break 

2233 except LinstorVolumeManagerError as e: 

2234 if ( 

2235 not first_attempt or 

2236 not high_availability or 

2237 e.code != LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS 

2238 ): 

2239 raise 

2240 

2241 first_attempt = False 

2242 self._create_resource_group( 

2243 self._linstor, 

2244 group_name, 

2245 self._group_name, 

2246 3, 

2247 True 

2248 ) 

2249 

2250 self._configure_volume_peer_slots(self._linstor, volume_name) 

2251 

2252 def clean(): 

2253 try: 

2254 self._destroy_volume(volume_uuid, force=True, preserve_properties=True) 

2255 except Exception as e: 

2256 self._logger( 

2257 'Unable to destroy volume {} after creation fail: {}' 

2258 .format(volume_uuid, e) 

2259 ) 

2260 

2261 def create(): 

2262 try: 

2263 create_definition() 

2264 if place_resources: 

2265 # Basic case when we use the default redundancy of the group. 

2266 self._check_volume_creation_errors( 

2267 self._linstor.resource_auto_place( 

2268 rsc_name=volume_name, 

2269 place_count=self._redundancy, 

2270 diskless_on_remaining=False 

2271 ), 

2272 volume_uuid, 

2273 self._group_name 

2274 ) 

2275 except LinstorVolumeManagerError as e: 

2276 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

2277 clean() 

2278 raise 

2279 except Exception: 

2280 clean() 

2281 raise 

2282 

2283 util.retry(create, maxretry=5) 

2284 

2285 def _create_volume_with_properties( 

2286 self, 

2287 volume_uuid, 

2288 volume_name, 

2289 size, 

2290 place_resources, 

2291 high_availability 

2292 ): 

2293 if self.check_volume_exists(volume_uuid): 

2294 raise LinstorVolumeManagerError( 

2295 'Could not create volume `{}` from SR `{}`, it already exists' 

2296 .format(volume_uuid, self._group_name) + ' in properties', 

2297 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

2298 ) 

2299 

2300 if volume_name in self._fetch_resource_names(): 

2301 raise LinstorVolumeManagerError( 

2302 'Could not create volume `{}` from SR `{}`, '.format( 

2303 volume_uuid, self._group_name 

2304 ) + 'resource of the same name already exists in LINSTOR' 

2305 ) 

2306 

2307 # I am paranoid. 

2308 volume_properties = self._get_volume_properties(volume_uuid) 

2309 if (volume_properties.get(self.PROP_NOT_EXISTS) is not None): 

2310 raise LinstorVolumeManagerError( 

2311 'Could not create volume `{}`, '.format(volume_uuid) + 

2312 'properties already exist' 

2313 ) 

2314 

2315 try: 

2316 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_CREATING 

2317 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

2318 

2319 self._create_volume( 

2320 volume_uuid, 

2321 volume_name, 

2322 size, 

2323 place_resources, 

2324 high_availability 

2325 ) 

2326 

2327 assert volume_properties.namespace == \ 

2328 self._build_volume_namespace(volume_uuid) 

2329 return volume_properties 

2330 except LinstorVolumeManagerError as e: 

2331 # Do not destroy existing resource! 

2332 # In theory we can't get this error because we check this event 

2333 # before the `self._create_volume` case. 

2334 # It can only happen if the same volume uuid is used in the same 

2335 # call in another host. 

2336 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

2337 self._destroy_volume(volume_uuid, force=True) 

2338 raise 

2339 

2340 def _find_device_path(self, volume_uuid, volume_name): 

2341 current_device_path = self._request_device_path( 

2342 volume_uuid, volume_name, activate=True 

2343 ) 

2344 

2345 # We use realpath here to get the /dev/drbd<id> path instead of 

2346 # /dev/drbd/by-res/<resource_name>. 

2347 expected_device_path = self.build_device_path(volume_name) 

2348 util.wait_for_path(expected_device_path, 5) 

2349 

2350 device_realpath = os.path.realpath(expected_device_path) 

2351 if current_device_path != device_realpath: 

2352 raise LinstorVolumeManagerError( 

2353 'Invalid path, current={}, expected={} (realpath={})' 

2354 .format( 

2355 current_device_path, 

2356 expected_device_path, 

2357 device_realpath 

2358 ) 

2359 ) 

2360 return expected_device_path 

2361 

2362 def _request_device_path(self, volume_uuid, volume_name, activate=False): 

2363 node_name = socket.gethostname() 

2364 

2365 resource = next(filter( 

2366 lambda resource: resource.node_name == node_name and 

2367 resource.name == volume_name, 

2368 self._get_resource_cache().resources 

2369 ), None) 

2370 

2371 if not resource: 

2372 if activate: 

2373 self._mark_resource_cache_as_dirty() 

2374 self._activate_device_path( 

2375 self._linstor, node_name, volume_name 

2376 ) 

2377 return self._request_device_path(volume_uuid, volume_name) 

2378 raise LinstorVolumeManagerError( 

2379 'Empty dev path for `{}`, but definition "seems" to exist' 

2380 .format(volume_uuid) 

2381 ) 

2382 # Contains a path of the /dev/drbd<id> form. 

2383 return resource.volumes[0].device_path 

2384 

2385 def _destroy_resource(self, resource_name, force=False): 

2386 result = self._linstor.resource_dfn_delete(resource_name) 

2387 error_str = self._get_error_str(result) 

2388 if not error_str: 

2389 self._mark_resource_cache_as_dirty() 

2390 return 

2391 

2392 if not force: 

2393 self._mark_resource_cache_as_dirty() 

2394 raise LinstorVolumeManagerError( 

2395 'Could not destroy resource `{}` from SR `{}`: {}' 

2396 .format(resource_name, self._group_name, error_str) 

2397 ) 

2398 

2399 # If force is used, ensure there is no opener. 

2400 all_openers = get_all_volume_openers(resource_name, '0') 

2401 for openers in all_openers.values(): 

2402 if openers: 

2403 self._mark_resource_cache_as_dirty() 

2404 raise LinstorVolumeManagerError( 

2405 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)' 

2406 .format(resource_name, self._group_name, error_str, all_openers) 

2407 ) 

2408 

2409 # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue? 

2410 resource_states = filter( 

2411 lambda resource_state: resource_state.name == resource_name, 

2412 self._get_resource_cache().resource_states 

2413 ) 

2414 

2415 # Mark only after computation of states. 

2416 self._mark_resource_cache_as_dirty() 

2417 

2418 for resource_state in resource_states: 

2419 volume_state = resource_state.volume_states[0] 

2420 if resource_state.in_use: 

2421 demote_drbd_resource(resource_state.node_name, resource_name) 

2422 break 

2423 self._destroy_resource(resource_name) 

2424 

2425 def _destroy_volume(self, volume_uuid, force=False, preserve_properties=False): 

2426 volume_properties = self._get_volume_properties(volume_uuid) 

2427 try: 

2428 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

2429 if volume_name in self._fetch_resource_names(): 

2430 self._destroy_resource(volume_name, force) 

2431 

2432 # Assume this call is atomic. 

2433 if not preserve_properties: 

2434 volume_properties.clear() 

2435 except Exception as e: 

2436 raise LinstorVolumeManagerError( 

2437 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e) 

2438 ) 

2439 

2440 def _build_volumes(self, repair): 

2441 properties = self._kv_cache 

2442 resource_names = self._fetch_resource_names() 

2443 

2444 self._volumes = set() 

2445 

2446 updating_uuid_volumes = self._get_volumes_by_property( 

2447 self.REG_UPDATING_UUID_SRC, ignore_inexisting_volumes=False 

2448 ) 

2449 if updating_uuid_volumes and not repair: 

2450 raise LinstorVolumeManagerError( 

2451 'Cannot build LINSTOR volume list: ' 

2452 'It exists invalid "updating uuid volumes", repair is required' 

2453 ) 

2454 

2455 existing_volumes = self._get_volumes_by_property( 

2456 self.REG_NOT_EXISTS, ignore_inexisting_volumes=False 

2457 ) 

2458 for volume_uuid, not_exists in existing_volumes.items(): 

2459 properties.namespace = self._build_volume_namespace(volume_uuid) 

2460 

2461 src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC) 

2462 if src_uuid: 

2463 self._logger( 

2464 'Ignoring volume during manager initialization with prop ' 

2465 ' PROP_UPDATING_UUID_SRC: {} (properties={})' 

2466 .format( 

2467 volume_uuid, 

2468 self._get_filtered_properties(properties) 

2469 ) 

2470 ) 

2471 continue 

2472 

2473 # Insert volume in list if the volume exists. Or if the volume 

2474 # is being created and a slave wants to use it (repair = False). 

2475 # 

2476 # If we are on the master and if repair is True and state is 

2477 # Creating, it's probably a bug or crash: the creation process has 

2478 # been stopped. 

2479 if not_exists == self.STATE_EXISTS or ( 

2480 not repair and not_exists == self.STATE_CREATING 

2481 ): 

2482 self._volumes.add(volume_uuid) 

2483 continue 

2484 

2485 if not repair: 

2486 self._logger( 

2487 'Ignoring bad volume during manager initialization: {} ' 

2488 '(properties={})'.format( 

2489 volume_uuid, 

2490 self._get_filtered_properties(properties) 

2491 ) 

2492 ) 

2493 continue 

2494 

2495 # Remove bad volume. 

2496 try: 

2497 self._logger( 

2498 'Removing bad volume during manager initialization: {} ' 

2499 '(properties={})'.format( 

2500 volume_uuid, 

2501 self._get_filtered_properties(properties) 

2502 ) 

2503 ) 

2504 volume_name = properties.get(self.PROP_VOLUME_NAME) 

2505 

2506 # Little optimization, don't call `self._destroy_volume`, 

2507 # we already have resource name list. 

2508 if volume_name in resource_names: 

2509 self._destroy_resource(volume_name, force=True) 

2510 

2511 # Assume this call is atomic. 

2512 properties.clear() 

2513 except Exception as e: 

2514 # Do not raise, we don't want to block user action. 

2515 self._logger( 

2516 'Cannot clean volume {}: {}'.format(volume_uuid, e) 

2517 ) 

2518 

2519 # The volume can't be removed, maybe it's still in use, 

2520 # in this case rename it with the "DELETED_" prefix. 

2521 # This prefix is mandatory if it exists a snap transaction to 

2522 # rollback because the original VDI UUID can try to be renamed 

2523 # with the UUID we are trying to delete... 

2524 if not volume_uuid.startswith('DELETED_'): 

2525 self.update_volume_uuid( 

2526 volume_uuid, 'DELETED_' + volume_uuid, force=True 

2527 ) 

2528 

2529 for dest_uuid, src_uuid in updating_uuid_volumes.items(): 

2530 dest_namespace = self._build_volume_namespace(dest_uuid) 

2531 

2532 properties.namespace = dest_namespace 

2533 if int(properties.get(self.PROP_NOT_EXISTS)): 

2534 properties.clear() 

2535 continue 

2536 

2537 properties.namespace = self._build_volume_namespace(src_uuid) 

2538 properties.clear() 

2539 

2540 properties.namespace = dest_namespace 

2541 properties.pop(self.PROP_UPDATING_UUID_SRC) 

2542 

2543 if src_uuid in self._volumes: 

2544 self._volumes.remove(src_uuid) 

2545 self._volumes.add(dest_uuid) 

2546 

2547 def _get_sr_properties(self): 

2548 return self._create_linstor_kv(self._build_sr_namespace()) 

2549 

2550 def _get_volumes_by_property( 

2551 self, reg_prop, ignore_inexisting_volumes=True 

2552 ): 

2553 base_properties = self._get_kv_cache() 

2554 base_properties.namespace = self._build_volume_namespace() 

2555 

2556 volume_properties = {} 

2557 for volume_uuid in self._volumes: 

2558 volume_properties[volume_uuid] = '' 

2559 

2560 for key, value in base_properties.items(): 

2561 res = reg_prop.match(key) 

2562 if res: 

2563 volume_uuid = res.groups()[0] 

2564 if not ignore_inexisting_volumes or \ 

2565 volume_uuid in self._volumes: 

2566 volume_properties[volume_uuid] = value 

2567 

2568 return volume_properties 

2569 

2570 def _create_linstor_kv(self, namespace): 

2571 return linstor.KV( 

2572 self._group_name, 

2573 uri=self._linstor.controller_host(), 

2574 namespace=namespace 

2575 ) 

2576 

2577 def _get_volume_properties(self, volume_uuid): 

2578 properties = self._get_kv_cache() 

2579 properties.namespace = self._build_volume_namespace(volume_uuid) 

2580 return properties 

2581 

2582 @classmethod 

2583 def _build_sr_namespace(cls): 

2584 return '/{}/'.format(cls.NAMESPACE_SR) 

2585 

2586 @classmethod 

2587 def _build_volume_namespace(cls, volume_uuid=None): 

2588 # Return a path to all volumes if `volume_uuid` is not given. 

2589 if volume_uuid is None: 

2590 return '/{}/'.format(cls.NAMESPACE_VOLUME) 

2591 return '/{}/{}/'.format(cls.NAMESPACE_VOLUME, volume_uuid) 

2592 

2593 @classmethod 

2594 def _get_error_str(cls, result): 

2595 return ', '.join([ 

2596 err.message for err in cls._filter_errors(result) 

2597 ]) 

2598 

2599 @classmethod 

2600 def _create_linstor_instance( 

2601 cls, uri, keep_uri_unmodified=False, attempt_count=30 

2602 ): 

2603 retry = False 

2604 

2605 def connect(uri): 

2606 if not uri: 

2607 uri = get_controller_uri() 

2608 if not uri: 

2609 raise LinstorVolumeManagerError( 

2610 'Unable to find controller uri...' 

2611 ) 

2612 instance = linstor.Linstor(uri, keep_alive=True) 

2613 instance.connect() 

2614 return instance 

2615 

2616 try: 

2617 return connect(uri) 

2618 except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError): 

2619 pass 

2620 

2621 if not keep_uri_unmodified: 

2622 uri = None 

2623 

2624 return util.retry( 

2625 lambda: connect(uri), 

2626 maxretry=attempt_count, 

2627 period=1, 

2628 exceptions=[ 

2629 linstor.errors.LinstorNetworkError, 

2630 LinstorVolumeManagerError 

2631 ] 

2632 ) 

2633 

2634 @classmethod 

2635 def _configure_volume_peer_slots(cls, lin, volume_name): 

2636 result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3) 

2637 error_str = cls._get_error_str(result) 

2638 if error_str: 

2639 raise LinstorVolumeManagerError( 

2640 'Could not configure volume peer slots of {}: {}' 

2641 .format(volume_name, error_str) 

2642 ) 

2643 

2644 @classmethod 

2645 def _activate_device_path(cls, lin, node_name, volume_name): 

2646 result = lin.resource_make_available(node_name, volume_name, diskful=False) 

2647 if linstor.Linstor.all_api_responses_no_error(result): 

2648 return 

2649 errors = linstor.Linstor.filter_api_call_response_errors(result) 

2650 if len(errors) == 1 and errors[0].is_error( 

2651 linstor.consts.FAIL_EXISTS_RSC 

2652 ): 

2653 return 

2654 

2655 raise LinstorVolumeManagerError( 

2656 'Unable to activate device path of `{}` on node `{}`: {}' 

2657 .format(volume_name, node_name, ', '.join( 

2658 [str(x) for x in result])) 

2659 ) 

2660 

2661 @classmethod 

2662 def _request_database_path(cls, lin, activate=False): 

2663 node_name = socket.gethostname() 

2664 

2665 try: 

2666 resource = next(filter( 

2667 lambda resource: resource.node_name == node_name and 

2668 resource.name == DATABASE_VOLUME_NAME, 

2669 lin.resource_list_raise().resources 

2670 ), None) 

2671 except Exception as e: 

2672 raise LinstorVolumeManagerError( 

2673 'Unable to fetch database resource: {}' 

2674 .format(e) 

2675 ) 

2676 

2677 if not resource: 

2678 if activate: 

2679 cls._activate_device_path( 

2680 lin, node_name, DATABASE_VOLUME_NAME 

2681 ) 

2682 return cls._request_database_path( 

2683 DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME 

2684 ) 

2685 raise LinstorVolumeManagerError( 

2686 'Empty dev path for `{}`, but definition "seems" to exist' 

2687 .format(DATABASE_PATH) 

2688 ) 

2689 # Contains a path of the /dev/drbd<id> form. 

2690 return resource.volumes[0].device_path 

2691 

2692 @classmethod 

2693 def _create_database_volume( 

2694 cls, lin, group_name, storage_pool_name, node_names, redundancy, auto_quorum 

2695 ): 

2696 try: 

2697 dfns = lin.resource_dfn_list_raise().resource_definitions 

2698 except Exception as e: 

2699 raise LinstorVolumeManagerError( 

2700 'Unable to get definitions during database creation: {}' 

2701 .format(e) 

2702 ) 

2703 

2704 if dfns: 

2705 raise LinstorVolumeManagerError( 

2706 'Could not create volume `{}` from SR `{}`, '.format( 

2707 DATABASE_VOLUME_NAME, group_name 

2708 ) + 'LINSTOR volume list must be empty.' 

2709 ) 

2710 

2711 # Workaround to use thin lvm. Without this line an error is returned: 

2712 # "Not enough available nodes" 

2713 # I don't understand why but this command protect against this bug. 

2714 try: 

2715 pools = lin.storage_pool_list_raise( 

2716 filter_by_stor_pools=[storage_pool_name] 

2717 ) 

2718 except Exception as e: 

2719 raise LinstorVolumeManagerError( 

2720 'Failed to get storage pool list before database creation: {}' 

2721 .format(e) 

2722 ) 

2723 

2724 # Ensure we have a correct list of storage pools. 

2725 assert pools.storage_pools # We must have at least one storage pool! 

2726 nodes_with_pool = list(map(lambda pool: pool.node_name, pools.storage_pools)) 

2727 for node_name in nodes_with_pool: 

2728 assert node_name in node_names 

2729 util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool)) 

2730 

2731 # Create the database definition. 

2732 size = cls.round_up_volume_size(DATABASE_SIZE) 

2733 cls._check_volume_creation_errors(lin.resource_group_spawn( 

2734 rsc_grp_name=group_name, 

2735 rsc_dfn_name=DATABASE_VOLUME_NAME, 

2736 vlm_sizes=['{}B'.format(size)], 

2737 definitions_only=True 

2738 ), DATABASE_VOLUME_NAME, group_name) 

2739 cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME) 

2740 

2741 # Create real resources on the first nodes. 

2742 resources = [] 

2743 

2744 diskful_nodes = [] 

2745 diskless_nodes = [] 

2746 for node_name in node_names: 

2747 if node_name in nodes_with_pool: 

2748 diskful_nodes.append(node_name) 

2749 else: 

2750 diskless_nodes.append(node_name) 

2751 

2752 assert diskful_nodes 

2753 for node_name in diskful_nodes[:redundancy]: 

2754 util.SMlog('Create database diskful on {}'.format(node_name)) 

2755 resources.append(linstor.ResourceData( 

2756 node_name=node_name, 

2757 rsc_name=DATABASE_VOLUME_NAME, 

2758 storage_pool=storage_pool_name 

2759 )) 

2760 # Create diskless resources on the remaining set. 

2761 for node_name in diskful_nodes[redundancy:] + diskless_nodes: 

2762 util.SMlog('Create database diskless on {}'.format(node_name)) 

2763 resources.append(linstor.ResourceData( 

2764 node_name=node_name, 

2765 rsc_name=DATABASE_VOLUME_NAME, 

2766 diskless=True 

2767 )) 

2768 

2769 result = lin.resource_create(resources) 

2770 error_str = cls._get_error_str(result) 

2771 if error_str: 

2772 raise LinstorVolumeManagerError( 

2773 'Could not create database volume from SR `{}`: {}'.format( 

2774 group_name, error_str 

2775 ) 

2776 ) 

2777 

2778 # We must modify the quorum. Otherwise we can't use correctly the 

2779 # drbd-reactor daemon. 

2780 if auto_quorum: 

2781 result = lin.resource_dfn_modify(DATABASE_VOLUME_NAME, { 

2782 'DrbdOptions/auto-quorum': 'disabled', 

2783 'DrbdOptions/Resource/quorum': 'majority' 

2784 }) 

2785 error_str = cls._get_error_str(result) 

2786 if error_str: 

2787 raise LinstorVolumeManagerError( 

2788 'Could not activate quorum on database volume: {}' 

2789 .format(error_str) 

2790 ) 

2791 

2792 # Create database and ensure path exists locally and 

2793 # on replicated devices. 

2794 current_device_path = cls._request_database_path(lin, activate=True) 

2795 

2796 # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be 

2797 # plugged. 

2798 for node_name in node_names: 

2799 cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME) 

2800 

2801 # We use realpath here to get the /dev/drbd<id> path instead of 

2802 # /dev/drbd/by-res/<resource_name>. 

2803 expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME) 

2804 util.wait_for_path(expected_device_path, 5) 

2805 

2806 device_realpath = os.path.realpath(expected_device_path) 

2807 if current_device_path != device_realpath: 

2808 raise LinstorVolumeManagerError( 

2809 'Invalid path, current={}, expected={} (realpath={})' 

2810 .format( 

2811 current_device_path, 

2812 expected_device_path, 

2813 device_realpath 

2814 ) 

2815 ) 

2816 

2817 try: 

2818 util.retry( 

2819 lambda: util.pread2([DATABASE_MKFS, expected_device_path]), 

2820 maxretry=5 

2821 ) 

2822 except Exception as e: 

2823 raise LinstorVolumeManagerError( 

2824 'Failed to execute {} on database volume: {}' 

2825 .format(DATABASE_MKFS, e) 

2826 ) 

2827 

2828 return expected_device_path 

2829 

2830 @classmethod 

2831 def _destroy_database_volume(cls, lin, group_name): 

2832 error_str = cls._get_error_str( 

2833 lin.resource_dfn_delete(DATABASE_VOLUME_NAME) 

2834 ) 

2835 if error_str: 

2836 raise LinstorVolumeManagerError( 

2837 'Could not destroy resource `{}` from SR `{}`: {}' 

2838 .format(DATABASE_VOLUME_NAME, group_name, error_str) 

2839 ) 

2840 

2841 @classmethod 

2842 def _mount_database_volume(cls, volume_path, mount=True, force=False): 

2843 try: 

2844 # 1. Create a backup config folder. 

2845 database_not_empty = bool(os.listdir(DATABASE_PATH)) 

2846 backup_path = cls._create_database_backup_path() 

2847 

2848 # 2. Move the config in the mounted volume. 

2849 if database_not_empty: 

2850 cls._move_files(DATABASE_PATH, backup_path) 

2851 

2852 cls._mount_volume(volume_path, DATABASE_PATH, mount) 

2853 

2854 if database_not_empty: 

2855 cls._move_files(backup_path, DATABASE_PATH, force) 

2856 

2857 # 3. Remove useless backup directory. 

2858 try: 

2859 os.rmdir(backup_path) 

2860 except Exception as e: 

2861 raise LinstorVolumeManagerError( 

2862 'Failed to remove backup path {} of LINSTOR config: {}' 

2863 .format(backup_path, e) 

2864 ) 

2865 except Exception as e: 

2866 def force_exec(fn): 

2867 try: 

2868 fn() 

2869 except Exception: 

2870 pass 

2871 

2872 if mount == cls._is_mounted(DATABASE_PATH): 

2873 force_exec(lambda: cls._move_files( 

2874 DATABASE_PATH, backup_path 

2875 )) 

2876 force_exec(lambda: cls._mount_volume( 

2877 volume_path, DATABASE_PATH, not mount 

2878 )) 

2879 

2880 if mount != cls._is_mounted(DATABASE_PATH): 

2881 force_exec(lambda: cls._move_files( 

2882 backup_path, DATABASE_PATH 

2883 )) 

2884 

2885 force_exec(lambda: os.rmdir(backup_path)) 

2886 raise e 

2887 

2888 @classmethod 

2889 def _force_destroy_database_volume(cls, lin, group_name): 

2890 try: 

2891 cls._destroy_database_volume(lin, group_name) 

2892 except Exception: 

2893 pass 

2894 

2895 @classmethod 

2896 def _destroy_storage_pool(cls, lin, group_name, node_name): 

2897 def destroy(): 

2898 result = lin.storage_pool_delete(node_name, group_name) 

2899 errors = cls._filter_errors(result) 

2900 if cls._check_errors(errors, [ 

2901 linstor.consts.FAIL_NOT_FOUND_STOR_POOL, 

2902 linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN 

2903 ]): 

2904 return 

2905 

2906 if errors: 

2907 raise LinstorVolumeManagerError( 

2908 'Failed to destroy SP `{}` on node `{}`: {}'.format( 

2909 group_name, 

2910 node_name, 

2911 cls._get_error_str(errors) 

2912 ) 

2913 ) 

2914 

2915 # We must retry to avoid errors like: 

2916 # "can not be deleted as volumes / snapshot-volumes are still using it" 

2917 # after LINSTOR database volume destruction. 

2918 return util.retry(destroy, maxretry=10) 

2919 

2920 @classmethod 

2921 def _create_resource_group( 

2922 cls, 

2923 lin, 

2924 group_name, 

2925 storage_pool_name, 

2926 redundancy, 

2927 destroy_old_group 

2928 ): 

2929 rg_creation_attempt = 0 

2930 while True: 

2931 result = lin.resource_group_create( 

2932 name=group_name, 

2933 place_count=redundancy, 

2934 storage_pool=storage_pool_name, 

2935 diskless_on_remaining=False 

2936 ) 

2937 error_str = cls._get_error_str(result) 

2938 if not error_str: 

2939 break 

2940 

2941 errors = cls._filter_errors(result) 

2942 if destroy_old_group and cls._check_errors(errors, [ 

2943 linstor.consts.FAIL_EXISTS_RSC_GRP 

2944 ]): 

2945 rg_creation_attempt += 1 

2946 if rg_creation_attempt < 2: 

2947 try: 

2948 cls._destroy_resource_group(lin, group_name) 

2949 except Exception as e: 

2950 error_str = 'Failed to destroy old and empty RG: {}'.format(e) 

2951 else: 

2952 continue 

2953 

2954 raise LinstorVolumeManagerError( 

2955 'Could not create RG `{}`: {}'.format( 

2956 group_name, error_str 

2957 ) 

2958 ) 

2959 

2960 result = lin.volume_group_create(group_name) 

2961 error_str = cls._get_error_str(result) 

2962 if error_str: 

2963 raise LinstorVolumeManagerError( 

2964 'Could not create VG `{}`: {}'.format( 

2965 group_name, error_str 

2966 ) 

2967 ) 

2968 

2969 @classmethod 

2970 def _destroy_resource_group(cls, lin, group_name): 

2971 def destroy(): 

2972 result = lin.resource_group_delete(group_name) 

2973 errors = cls._filter_errors(result) 

2974 if cls._check_errors(errors, [ 

2975 linstor.consts.FAIL_NOT_FOUND_RSC_GRP 

2976 ]): 

2977 return 

2978 

2979 if errors: 

2980 raise LinstorVolumeManagerError( 

2981 'Failed to destroy RG `{}`: {}' 

2982 .format(group_name, cls._get_error_str(errors)) 

2983 ) 

2984 

2985 return util.retry(destroy, maxretry=10) 

2986 

2987 @classmethod 

2988 def _build_group_name(cls, base_name): 

2989 # If thin provisioning is used we have a path like this: 

2990 # `VG/LV`. "/" is not accepted by LINSTOR. 

2991 return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_')) 

2992 

2993 # Used to store important data in a HA context, 

2994 # i.e. a replication count of 3. 

2995 @classmethod 

2996 def _build_ha_group_name(cls, base_name): 

2997 return '{}{}'.format(cls.PREFIX_HA, base_name.replace('/', '_')) 

2998 

2999 @classmethod 

3000 def _check_volume_creation_errors(cls, result, volume_uuid, group_name): 

3001 errors = cls._filter_errors(result) 

3002 if cls._check_errors(errors, [ 

3003 linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN 

3004 ]): 

3005 raise LinstorVolumeManagerError( 

3006 'Failed to create volume `{}` from SR `{}`, it already exists' 

3007 .format(volume_uuid, group_name), 

3008 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

3009 ) 

3010 

3011 if cls._check_errors(errors, [linstor.consts.FAIL_NOT_FOUND_RSC_GRP]): 

3012 raise LinstorVolumeManagerError( 

3013 'Failed to create volume `{}` from SR `{}`, resource group doesn\'t exist' 

3014 .format(volume_uuid, group_name), 

3015 LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS 

3016 ) 

3017 

3018 if errors: 

3019 raise LinstorVolumeManagerError( 

3020 'Failed to create volume `{}` from SR `{}`: {}'.format( 

3021 volume_uuid, 

3022 group_name, 

3023 cls._get_error_str(errors) 

3024 ) 

3025 ) 

3026 

3027 @classmethod 

3028 def _move_files(cls, src_dir, dest_dir, force=False): 

3029 def listdir(dir): 

3030 ignored = ['lost+found'] 

3031 return [file for file in os.listdir(dir) if file not in ignored] 

3032 

3033 try: 

3034 if not force: 

3035 files = listdir(dest_dir) 

3036 if files: 

3037 raise LinstorVolumeManagerError( 

3038 'Cannot move files from {} to {} because destination ' 

3039 'contains: {}'.format(src_dir, dest_dir, files) 

3040 ) 

3041 except LinstorVolumeManagerError: 

3042 raise 

3043 except Exception as e: 

3044 raise LinstorVolumeManagerError( 

3045 'Cannot list dir {}: {}'.format(dest_dir, e) 

3046 ) 

3047 

3048 try: 

3049 for file in listdir(src_dir): 

3050 try: 

3051 dest_file = os.path.join(dest_dir, file) 

3052 if not force and os.path.exists(dest_file): 

3053 raise LinstorVolumeManagerError( 

3054 'Cannot move {} because it already exists in the ' 

3055 'destination'.format(file) 

3056 ) 

3057 shutil.move(os.path.join(src_dir, file), dest_file) 

3058 except LinstorVolumeManagerError: 

3059 raise 

3060 except Exception as e: 

3061 raise LinstorVolumeManagerError( 

3062 'Cannot move {}: {}'.format(file, e) 

3063 ) 

3064 except Exception as e: 

3065 if not force: 

3066 try: 

3067 cls._move_files(dest_dir, src_dir, force=True) 

3068 except Exception: 

3069 pass 

3070 

3071 raise LinstorVolumeManagerError( 

3072 'Failed to move files from {} to {}: {}'.format( 

3073 src_dir, dest_dir, e 

3074 ) 

3075 ) 

3076 

3077 @staticmethod 

3078 def _create_database_backup_path(): 

3079 path = DATABASE_PATH + '-' + str(uuid.uuid4()) 

3080 try: 

3081 os.mkdir(path) 

3082 return path 

3083 except Exception as e: 

3084 raise LinstorVolumeManagerError( 

3085 'Failed to create backup path {} of LINSTOR config: {}' 

3086 .format(path, e) 

3087 ) 

3088 

3089 @staticmethod 

3090 def _get_filtered_properties(properties): 

3091 return dict(properties.items()) 

3092 

3093 @staticmethod 

3094 def _filter_errors(result): 

3095 return [ 

3096 err for err in result 

3097 if hasattr(err, 'is_error') and err.is_error() 

3098 ] 

3099 

3100 @staticmethod 

3101 def _check_errors(result, codes): 

3102 for err in result: 

3103 for code in codes: 

3104 if err.is_error(code): 

3105 return True 

3106 return False 

3107 

3108 @classmethod 

3109 def _controller_is_running(cls): 

3110 return cls._service_is_running('linstor-controller') 

3111 

3112 @classmethod 

3113 def _start_controller(cls, start=True): 

3114 return cls._start_service('linstor-controller', start) 

3115 

3116 @staticmethod 

3117 def _start_service(name, start=True): 

3118 action = 'start' if start else 'stop' 

3119 (ret, out, err) = util.doexec([ 

3120 'systemctl', action, name 

3121 ]) 

3122 if ret != 0: 

3123 raise LinstorVolumeManagerError( 

3124 'Failed to {} {}: {} {}' 

3125 .format(action, name, out, err) 

3126 ) 

3127 

3128 @staticmethod 

3129 def _service_is_running(name): 

3130 (ret, out, err) = util.doexec([ 

3131 'systemctl', 'is-active', '--quiet', name 

3132 ]) 

3133 return not ret 

3134 

3135 @staticmethod 

3136 def _is_mounted(mountpoint): 

3137 (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint]) 

3138 return ret == 0 

3139 

3140 @classmethod 

3141 def _mount_volume(cls, volume_path, mountpoint, mount=True): 

3142 if mount: 

3143 try: 

3144 util.pread(['mount', volume_path, mountpoint]) 

3145 except Exception as e: 

3146 raise LinstorVolumeManagerError( 

3147 'Failed to mount volume {} on {}: {}' 

3148 .format(volume_path, mountpoint, e) 

3149 ) 

3150 else: 

3151 try: 

3152 if cls._is_mounted(mountpoint): 

3153 util.pread(['umount', mountpoint]) 

3154 except Exception as e: 

3155 raise LinstorVolumeManagerError( 

3156 'Failed to umount volume {} on {}: {}' 

3157 .format(volume_path, mountpoint, e) 

3158 ) 

3159 

3160 

3161# ============================================================================== 

3162 

3163# Check if a path is a DRBD resource and log the process name/pid 

3164# that opened it. 

3165def log_drbd_openers(path): 

3166 # Ignore if it's not a symlink to DRBD resource. 

3167 if not path.startswith(DRBD_BY_RES_PATH): 

3168 return 

3169 

3170 # Compute resource name. 

3171 res_name_end = path.find('/', len(DRBD_BY_RES_PATH)) 

3172 if res_name_end == -1: 

3173 return 

3174 res_name = path[len(DRBD_BY_RES_PATH):res_name_end] 

3175 

3176 volume_end = path.rfind('/') 

3177 if volume_end == res_name_end: 

3178 return 

3179 volume = path[volume_end + 1:] 

3180 

3181 try: 

3182 # Ensure path is a DRBD. 

3183 drbd_path = os.path.realpath(path) 

3184 stats = os.stat(drbd_path) 

3185 if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147: 

3186 return 

3187 

3188 # Find where the device is open. 

3189 (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name]) 

3190 if ret != 0: 

3191 util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format( 

3192 res_name, stderr 

3193 )) 

3194 return 

3195 

3196 # Is it a local device? 

3197 if stdout.startswith('{} role:Primary'.format(res_name)): 

3198 util.SMlog( 

3199 'DRBD resource `{}` is open on local host: {}' 

3200 .format(path, get_local_volume_openers(res_name, volume)) 

3201 ) 

3202 return 

3203 

3204 # Is it a remote device? 

3205 util.SMlog( 

3206 'DRBD resource `{}` is open on hosts: {}' 

3207 .format(path, get_all_volume_openers(res_name, volume)) 

3208 ) 

3209 except Exception as e: 

3210 util.SMlog( 

3211 'Got exception while trying to determine where DRBD resource ' + 

3212 '`{}` is open: {}'.format(path, e) 

3213 )