Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect VHD-based SR's in the background 

19# 

20 

21from sm_typing import Optional, override 

22 

23import os 

24import os.path 

25import sys 

26import time 

27import signal 

28import subprocess 

29import getopt 

30import datetime 

31import traceback 

32import base64 

33import zlib 

34import errno 

35import stat 

36 

37import XenAPI # pylint: disable=import-error 

38import util 

39import lvutil 

40import vhdutil 

41import lvhdutil 

42import lvmcache 

43import journaler 

44import fjournaler 

45import lock 

46import blktap2 

47import xs_errors 

48from refcounter import RefCounter 

49from ipc import IPCFlag 

50from lvmanager import LVActivator 

51from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

52from functools import reduce 

53from time import monotonic as _time 

54 

55try: 

56 from linstorjournaler import LinstorJournaler 

57 from linstorvhdutil import LinstorVhdUtil 

58 from linstorvolumemanager import get_controller_uri 

59 from linstorvolumemanager import LinstorVolumeManager 

60 from linstorvolumemanager import LinstorVolumeManagerError 

61 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX 

62 

63 LINSTOR_AVAILABLE = True 

64except ImportError: 

65 LINSTOR_AVAILABLE = False 

66 

67# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

68# possible due to lvhd_stop_using_() not working correctly. However, we leave 

69# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

70# record for use by the offline tool (which makes the operation safe by pausing 

71# the VM first) 

72AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

73 

74FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

75 

76# process "lock", used simply as an indicator that a process already exists 

77# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

78# check for the fact by trying the lock). 

79lockGCRunning = None 

80 

81# process "lock" to indicate that the GC process has been activated but may not 

82# yet be running, stops a second process from being started. 

83LOCK_TYPE_GC_ACTIVE = "gc_active" 

84lockGCActive = None 

85 

86# Default coalesce error rate limit, in messages per minute. A zero value 

87# disables throttling, and a negative value disables error reporting. 

88DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

89 

90COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

91COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

92VAR_RUN = "/var/run/" 

93SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

94 

95N_RUNNING_AVERAGE = 10 

96 

97NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

98 

99 

100class AbortException(util.SMException): 

101 pass 

102 

103 

104################################################################################ 

105# 

106# Util 

107# 

108class Util: 

109 RET_RC = 1 

110 RET_STDOUT = 2 

111 RET_STDERR = 4 

112 

113 UUID_LEN = 36 

114 

115 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

116 

117 @staticmethod 

118 def log(text) -> None: 

119 util.SMlog(text, ident="SMGC") 

120 

121 @staticmethod 

122 def logException(tag): 

123 info = sys.exc_info() 

124 if info[0] == SystemExit: 124 ↛ 126line 124 didn't jump to line 126, because the condition on line 124 was never true

125 # this should not be happening when catching "Exception", but it is 

126 sys.exit(0) 

127 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

128 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

129 Util.log(" ***********************") 

130 Util.log(" * E X C E P T I O N *") 

131 Util.log(" ***********************") 

132 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

133 Util.log(tb) 

134 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

135 

136 @staticmethod 

137 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

138 "Execute a subprocess, then return its return code, stdout, stderr" 

139 proc = subprocess.Popen(args, 

140 stdin=subprocess.PIPE, \ 

141 stdout=subprocess.PIPE, \ 

142 stderr=subprocess.PIPE, \ 

143 shell=True, \ 

144 close_fds=True) 

145 (stdout, stderr) = proc.communicate(inputtext) 

146 stdout = str(stdout) 

147 stderr = str(stderr) 

148 rc = proc.returncode 

149 if log: 

150 Util.log("`%s`: %s" % (args, rc)) 

151 if type(expectedRC) != type([]): 

152 expectedRC = [expectedRC] 

153 if not rc in expectedRC: 

154 reason = stderr.strip() 

155 if stdout.strip(): 

156 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

157 Util.log("Failed: %s" % reason) 

158 raise util.CommandException(rc, args, reason) 

159 

160 if ret == Util.RET_RC: 

161 return rc 

162 if ret == Util.RET_STDERR: 

163 return stderr 

164 return stdout 

165 

166 @staticmethod 

167 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): 

168 """execute func in a separate thread and kill it if abortTest signals 

169 so""" 

170 abortSignaled = abortTest() # check now before we clear resultFlag 

171 resultFlag = IPCFlag(ns) 

172 resultFlag.clearAll() 

173 pid = os.fork() 

174 if pid: 

175 startTime = _time() 

176 try: 

177 while True: 

178 if resultFlag.test("success"): 

179 Util.log(" Child process completed successfully") 

180 resultFlag.clear("success") 

181 return 

182 if resultFlag.test("failure"): 

183 resultFlag.clear("failure") 

184 raise util.SMException("Child process exited with error") 

185 if abortTest() or abortSignaled: 

186 os.killpg(pid, signal.SIGKILL) 

187 raise AbortException("Aborting due to signal") 

188 if timeOut and _time() - startTime > timeOut: 

189 os.killpg(pid, signal.SIGKILL) 

190 resultFlag.clearAll() 

191 raise util.SMException("Timed out") 

192 time.sleep(pollInterval) 

193 finally: 

194 wait_pid = 0 

195 rc = -1 

196 count = 0 

197 while wait_pid == 0 and count < 10: 

198 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

199 if wait_pid == 0: 

200 time.sleep(2) 

201 count += 1 

202 

203 if wait_pid == 0: 

204 Util.log("runAbortable: wait for process completion timed out") 

205 else: 

206 os.setpgrp() 

207 try: 

208 if func() == ret: 

209 resultFlag.set("success") 

210 else: 

211 resultFlag.set("failure") 

212 except Exception as e: 

213 Util.log("Child process failed with : (%s)" % e) 

214 resultFlag.set("failure") 

215 Util.logException("This exception has occured") 

216 os._exit(0) 

217 

218 @staticmethod 

219 def num2str(number): 

220 for prefix in ("G", "M", "K"): 220 ↛ 223line 220 didn't jump to line 223, because the loop on line 220 didn't complete

221 if number >= Util.PREFIX[prefix]: 

222 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

223 return "%s" % number 

224 

225 @staticmethod 

226 def numBits(val): 

227 count = 0 

228 while val: 

229 count += val & 1 

230 val = val >> 1 

231 return count 

232 

233 @staticmethod 

234 def countBits(bitmap1, bitmap2): 

235 """return bit count in the bitmap produced by ORing the two bitmaps""" 

236 len1 = len(bitmap1) 

237 len2 = len(bitmap2) 

238 lenLong = len1 

239 lenShort = len2 

240 bitmapLong = bitmap1 

241 if len2 > len1: 

242 lenLong = len2 

243 lenShort = len1 

244 bitmapLong = bitmap2 

245 

246 count = 0 

247 for i in range(lenShort): 

248 val = bitmap1[i] | bitmap2[i] 

249 count += Util.numBits(val) 

250 

251 for i in range(i + 1, lenLong): 

252 val = bitmapLong[i] 

253 count += Util.numBits(val) 

254 return count 

255 

256 @staticmethod 

257 def getThisScript(): 

258 thisScript = util.get_real_path(__file__) 

259 if thisScript.endswith(".pyc"): 

260 thisScript = thisScript[:-1] 

261 return thisScript 

262 

263 

264################################################################################ 

265# 

266# XAPI 

267# 

268class XAPI: 

269 USER = "root" 

270 PLUGIN_ON_SLAVE = "on-slave" 

271 

272 CONFIG_SM = 0 

273 CONFIG_OTHER = 1 

274 CONFIG_ON_BOOT = 2 

275 CONFIG_ALLOW_CACHING = 3 

276 

277 CONFIG_NAME = { 

278 CONFIG_SM: "sm-config", 

279 CONFIG_OTHER: "other-config", 

280 CONFIG_ON_BOOT: "on-boot", 

281 CONFIG_ALLOW_CACHING: "allow_caching" 

282 } 

283 

284 class LookupError(util.SMException): 

285 pass 

286 

287 @staticmethod 

288 def getSession(): 

289 session = XenAPI.xapi_local() 

290 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

291 return session 

292 

293 def __init__(self, session, srUuid): 

294 self.sessionPrivate = False 

295 self.session = session 

296 if self.session is None: 

297 self.session = self.getSession() 

298 self.sessionPrivate = True 

299 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

300 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

301 self.hostUuid = util.get_this_host() 

302 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

303 self.task = None 

304 self.task_progress = {"coalescable": 0, "done": 0} 

305 

306 def __del__(self): 

307 if self.sessionPrivate: 

308 self.session.xenapi.session.logout() 

309 

310 def isPluggedHere(self): 

311 pbds = self.getAttachedPBDs() 

312 for pbdRec in pbds: 

313 if pbdRec["host"] == self._hostRef: 

314 return True 

315 return False 

316 

317 def poolOK(self): 

318 host_recs = self.session.xenapi.host.get_all_records() 

319 for host_ref, host_rec in host_recs.items(): 

320 if not host_rec["enabled"]: 

321 Util.log("Host %s not enabled" % host_rec["uuid"]) 

322 return False 

323 return True 

324 

325 def isMaster(self): 

326 if self.srRecord["shared"]: 

327 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

328 return pool["master"] == self._hostRef 

329 else: 

330 pbds = self.getAttachedPBDs() 

331 if len(pbds) < 1: 

332 raise util.SMException("Local SR not attached") 

333 elif len(pbds) > 1: 

334 raise util.SMException("Local SR multiply attached") 

335 return pbds[0]["host"] == self._hostRef 

336 

337 def getAttachedPBDs(self): 

338 """Return PBD records for all PBDs of this SR that are currently 

339 attached""" 

340 attachedPBDs = [] 

341 pbds = self.session.xenapi.PBD.get_all_records() 

342 for pbdRec in pbds.values(): 

343 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

344 attachedPBDs.append(pbdRec) 

345 return attachedPBDs 

346 

347 def getOnlineHosts(self): 

348 return util.get_online_hosts(self.session) 

349 

350 def ensureInactive(self, hostRef, args): 

351 text = self.session.xenapi.host.call_plugin( \ 

352 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

353 Util.log("call-plugin returned: '%s'" % text) 

354 

355 def getRecordHost(self, hostRef): 

356 return self.session.xenapi.host.get_record(hostRef) 

357 

358 def _getRefVDI(self, uuid): 

359 return self.session.xenapi.VDI.get_by_uuid(uuid) 

360 

361 def getRefVDI(self, vdi): 

362 return self._getRefVDI(vdi.uuid) 

363 

364 def getRecordVDI(self, uuid): 

365 try: 

366 ref = self._getRefVDI(uuid) 

367 return self.session.xenapi.VDI.get_record(ref) 

368 except XenAPI.Failure: 

369 return None 

370 

371 def singleSnapshotVDI(self, vdi): 

372 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

373 {"type": "internal"}) 

374 

375 def forgetVDI(self, srUuid, vdiUuid): 

376 """Forget the VDI, but handle the case where the VDI has already been 

377 forgotten (i.e. ignore errors)""" 

378 try: 

379 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

380 self.session.xenapi.VDI.forget(vdiRef) 

381 except XenAPI.Failure: 

382 pass 

383 

384 def getConfigVDI(self, vdi, key): 

385 kind = vdi.CONFIG_TYPE[key] 

386 if kind == self.CONFIG_SM: 

387 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

388 elif kind == self.CONFIG_OTHER: 

389 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

390 elif kind == self.CONFIG_ON_BOOT: 

391 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

392 elif kind == self.CONFIG_ALLOW_CACHING: 

393 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

394 else: 

395 assert(False) 

396 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

397 return cfg 

398 

399 def removeFromConfigVDI(self, vdi, key): 

400 kind = vdi.CONFIG_TYPE[key] 

401 if kind == self.CONFIG_SM: 

402 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

403 elif kind == self.CONFIG_OTHER: 

404 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

405 else: 

406 assert(False) 

407 

408 def addToConfigVDI(self, vdi, key, val): 

409 kind = vdi.CONFIG_TYPE[key] 

410 if kind == self.CONFIG_SM: 

411 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

412 elif kind == self.CONFIG_OTHER: 

413 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

414 else: 

415 assert(False) 

416 

417 def isSnapshot(self, vdi): 

418 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

419 

420 def markCacheSRsDirty(self): 

421 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

422 'field "local_cache_enabled" = "true"') 

423 for sr_ref in sr_refs: 

424 Util.log("Marking SR %s dirty" % sr_ref) 

425 util.set_dirty(self.session, sr_ref) 

426 

427 def srUpdate(self): 

428 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

429 abortFlag = IPCFlag(self.srRecord["uuid"]) 

430 task = self.session.xenapi.Async.SR.update(self._srRef) 

431 cancelTask = True 

432 try: 

433 for i in range(60): 

434 status = self.session.xenapi.task.get_status(task) 

435 if not status == "pending": 

436 Util.log("SR.update_asynch status changed to [%s]" % status) 

437 cancelTask = False 

438 return 

439 if abortFlag.test(FLAG_TYPE_ABORT): 

440 Util.log("Abort signalled during srUpdate, cancelling task...") 

441 try: 

442 self.session.xenapi.task.cancel(task) 

443 cancelTask = False 

444 Util.log("Task cancelled") 

445 except: 

446 pass 

447 return 

448 time.sleep(1) 

449 finally: 

450 if cancelTask: 

451 self.session.xenapi.task.cancel(task) 

452 self.session.xenapi.task.destroy(task) 

453 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

454 

455 def update_task(self): 

456 self.session.xenapi.task.set_other_config( 

457 self.task, 

458 { 

459 "applies_to": self._srRef 

460 }) 

461 total = self.task_progress['coalescable'] + self.task_progress['done'] 

462 if (total > 0): 

463 self.session.xenapi.task.set_progress( 

464 self.task, float(self.task_progress['done']) / total) 

465 

466 def create_task(self, label, description): 

467 self.task = self.session.xenapi.task.create(label, description) 

468 self.update_task() 

469 

470 def update_task_progress(self, key, value): 

471 self.task_progress[key] = value 

472 if self.task: 

473 self.update_task() 

474 

475 def set_task_status(self, status): 

476 if self.task: 

477 self.session.xenapi.task.set_status(self.task, status) 

478 

479 

480################################################################################ 

481# 

482# VDI 

483# 

484class VDI(object): 

485 """Object representing a VDI of a VHD-based SR""" 

486 

487 POLL_INTERVAL = 1 

488 POLL_TIMEOUT = 30 

489 DEVICE_MAJOR = 202 

490 DRIVER_NAME_VHD = "vhd" 

491 

492 # config keys & values 

493 DB_VHD_PARENT = "vhd-parent" 

494 DB_VDI_TYPE = "vdi_type" 

495 DB_VHD_BLOCKS = "vhd-blocks" 

496 DB_VDI_PAUSED = "paused" 

497 DB_VDI_RELINKING = "relinking" 

498 DB_VDI_ACTIVATING = "activating" 

499 DB_GC = "gc" 

500 DB_COALESCE = "coalesce" 

501 DB_LEAFCLSC = "leaf-coalesce" # config key 

502 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

503 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

504 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

505 # no space to snap-coalesce or unable to keep 

506 # up with VDI. This is not used by the SM, it 

507 # might be used by external components. 

508 DB_ONBOOT = "on-boot" 

509 ONBOOT_RESET = "reset" 

510 DB_ALLOW_CACHING = "allow_caching" 

511 

512 CONFIG_TYPE = { 

513 DB_VHD_PARENT: XAPI.CONFIG_SM, 

514 DB_VDI_TYPE: XAPI.CONFIG_SM, 

515 DB_VHD_BLOCKS: XAPI.CONFIG_SM, 

516 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

517 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

518 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

519 DB_GC: XAPI.CONFIG_OTHER, 

520 DB_COALESCE: XAPI.CONFIG_OTHER, 

521 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

522 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

523 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

524 } 

525 

526 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

527 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

528 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

529 # feasibility of leaf coalesce 

530 

531 JRN_RELINK = "relink" # journal entry type for relinking children 

532 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

533 JRN_LEAF = "leaf" # used in coalesce-leaf 

534 

535 STR_TREE_INDENT = 4 

536 

537 def __init__(self, sr, uuid, raw): 

538 self.sr = sr 

539 self.scanError = True 

540 self.uuid = uuid 

541 self.raw = raw 

542 self.fileName = "" 

543 self.parentUuid = "" 

544 self.sizeVirt = -1 

545 self._sizeVHD = -1 

546 self._sizeAllocated = -1 

547 self.hidden = False 

548 self.parent = None 

549 self.children = [] 

550 self._vdiRef = None 

551 self._clearRef() 

552 

553 @staticmethod 

554 def extractUuid(path): 

555 raise NotImplementedError("Implement in sub class") 

556 

557 def load(self, info=None) -> None: 

558 """Load VDI info""" 

559 pass 

560 

561 def getDriverName(self) -> str: 

562 return self.DRIVER_NAME_VHD 

563 

564 def getRef(self): 

565 if self._vdiRef is None: 

566 self._vdiRef = self.sr.xapi.getRefVDI(self) 

567 return self._vdiRef 

568 

569 def getConfig(self, key, default=None): 

570 config = self.sr.xapi.getConfigVDI(self, key) 

571 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 571 ↛ 572line 571 didn't jump to line 572, because the condition on line 571 was never true

572 val = config 

573 else: 

574 val = config.get(key) 

575 if val: 

576 return val 

577 return default 

578 

579 def setConfig(self, key, val): 

580 self.sr.xapi.removeFromConfigVDI(self, key) 

581 self.sr.xapi.addToConfigVDI(self, key, val) 

582 Util.log("Set %s = %s for %s" % (key, val, self)) 

583 

584 def delConfig(self, key): 

585 self.sr.xapi.removeFromConfigVDI(self, key) 

586 Util.log("Removed %s from %s" % (key, self)) 

587 

588 def ensureUnpaused(self): 

589 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

590 Util.log("Unpausing VDI %s" % self) 

591 self.unpause() 

592 

593 def pause(self, failfast=False) -> None: 

594 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

595 self.uuid, failfast): 

596 raise util.SMException("Failed to pause VDI %s" % self) 

597 

598 def _report_tapdisk_unpause_error(self): 

599 try: 

600 xapi = self.sr.xapi.session.xenapi 

601 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

602 msg_name = "failed to unpause tapdisk" 

603 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

604 "VMs using this tapdisk have lost access " \ 

605 "to the corresponding disk(s)" % self.uuid 

606 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

607 except Exception as e: 

608 util.SMlog("failed to generate message: %s" % e) 

609 

610 def unpause(self): 

611 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

612 self.uuid): 

613 self._report_tapdisk_unpause_error() 

614 raise util.SMException("Failed to unpause VDI %s" % self) 

615 

616 def refresh(self, ignoreNonexistent=True): 

617 """Pause-unpause in one step""" 

618 self.sr.lock() 

619 try: 

620 try: 

621 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 621 ↛ 623line 621 didn't jump to line 623, because the condition on line 621 was never true

622 self.sr.uuid, self.uuid): 

623 self._report_tapdisk_unpause_error() 

624 raise util.SMException("Failed to refresh %s" % self) 

625 except XenAPI.Failure as e: 

626 if util.isInvalidVDI(e) and ignoreNonexistent: 

627 Util.log("VDI %s not found, ignoring" % self) 

628 return 

629 raise 

630 finally: 

631 self.sr.unlock() 

632 

633 def isSnapshot(self): 

634 return self.sr.xapi.isSnapshot(self) 

635 

636 def isAttachedRW(self): 

637 return util.is_attached_rw( 

638 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

639 

640 def getVHDBlocks(self): 

641 val = self.updateBlockInfo() 

642 bitmap = zlib.decompress(base64.b64decode(val)) 

643 return bitmap 

644 

645 def isCoalesceable(self): 

646 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

647 return not self.scanError and \ 

648 self.parent and \ 

649 len(self.parent.children) == 1 and \ 

650 self.hidden and \ 

651 len(self.children) > 0 

652 

653 def isLeafCoalesceable(self): 

654 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

655 return not self.scanError and \ 

656 self.parent and \ 

657 len(self.parent.children) == 1 and \ 

658 not self.hidden and \ 

659 len(self.children) == 0 

660 

661 def canLiveCoalesce(self, speed): 

662 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

663 isLeafCoalesceable() already""" 

664 feasibleSize = False 

665 allowedDownTime = \ 

666 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

667 vhd_size = self.getAllocatedSize() 

668 if speed: 

669 feasibleSize = \ 

670 vhd_size // speed < allowedDownTime 

671 else: 

672 feasibleSize = \ 

673 vhd_size < self.LIVE_LEAF_COALESCE_MAX_SIZE 

674 

675 return (feasibleSize or 

676 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

677 

678 def getAllPrunable(self): 

679 if len(self.children) == 0: # base case 

680 # it is possible to have a hidden leaf that was recently coalesced 

681 # onto its parent, its children already relinked but not yet 

682 # reloaded - in which case it may not be garbage collected yet: 

683 # some tapdisks could still be using the file. 

684 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

685 return [] 

686 if not self.scanError and self.hidden: 

687 return [self] 

688 return [] 

689 

690 thisPrunable = True 

691 vdiList = [] 

692 for child in self.children: 

693 childList = child.getAllPrunable() 

694 vdiList.extend(childList) 

695 if child not in childList: 

696 thisPrunable = False 

697 

698 # We can destroy the current VDI if all childs are hidden BUT the 

699 # current VDI must be hidden too to do that! 

700 # Example in this case (after a failed live leaf coalesce): 

701 # 

702 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

703 # SMGC: [32436] b5458d61(1.000G/4.127M) 

704 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

705 # 

706 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

707 # Normally we are not in this function when the delete action is 

708 # executed but in `_liveLeafCoalesce`. 

709 

710 if not self.scanError and not self.hidden and thisPrunable: 

711 vdiList.append(self) 

712 return vdiList 

713 

714 def getSizeVHD(self) -> int: 

715 return self._sizeVHD 

716 

717 def getAllocatedSize(self) -> int: 

718 return self._sizeAllocated 

719 

720 def getTreeRoot(self): 

721 "Get the root of the tree that self belongs to" 

722 root = self 

723 while root.parent: 

724 root = root.parent 

725 return root 

726 

727 def getTreeHeight(self): 

728 "Get the height of the subtree rooted at self" 

729 if len(self.children) == 0: 

730 return 1 

731 

732 maxChildHeight = 0 

733 for child in self.children: 

734 childHeight = child.getTreeHeight() 

735 if childHeight > maxChildHeight: 

736 maxChildHeight = childHeight 

737 

738 return maxChildHeight + 1 

739 

740 def getAllLeaves(self): 

741 "Get all leaf nodes in the subtree rooted at self" 

742 if len(self.children) == 0: 

743 return [self] 

744 

745 leaves = [] 

746 for child in self.children: 

747 leaves.extend(child.getAllLeaves()) 

748 return leaves 

749 

750 def updateBlockInfo(self) -> Optional[str]: 

751 val = base64.b64encode(self._queryVHDBlocks()).decode() 

752 self.setConfig(VDI.DB_VHD_BLOCKS, val) 

753 return val 

754 

755 def rename(self, uuid) -> None: 

756 "Rename the VDI file" 

757 assert(not self.sr.vdis.get(uuid)) 

758 self._clearRef() 

759 oldUuid = self.uuid 

760 self.uuid = uuid 

761 self.children = [] 

762 # updating the children themselves is the responsibility of the caller 

763 del self.sr.vdis[oldUuid] 

764 self.sr.vdis[self.uuid] = self 

765 

766 def delete(self) -> None: 

767 "Physically delete the VDI" 

768 lock.Lock.cleanup(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

769 lock.Lock.cleanupAll(self.uuid) 

770 self._clear() 

771 

772 def getParent(self) -> str: 

773 return vhdutil.getParent(self.path, lambda x: x.strip()) 773 ↛ exitline 773 didn't run the lambda on line 773

774 

775 def repair(self, parent) -> None: 

776 vhdutil.repair(parent) 

777 

778 @override 

779 def __str__(self) -> str: 

780 strHidden = "" 

781 if self.hidden: 781 ↛ 782line 781 didn't jump to line 782, because the condition on line 781 was never true

782 strHidden = "*" 

783 strSizeVirt = "?" 

784 if self.sizeVirt > 0: 784 ↛ 785line 784 didn't jump to line 785, because the condition on line 784 was never true

785 strSizeVirt = Util.num2str(self.sizeVirt) 

786 strSizeVHD = "?" 

787 if self._sizeVHD > 0: 787 ↛ 788line 787 didn't jump to line 788, because the condition on line 787 was never true

788 strSizeVHD = "/%s" % Util.num2str(self._sizeVHD) 

789 strSizeAllocated = "?" 

790 if self._sizeAllocated >= 0: 

791 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated) 

792 strType = "" 

793 if self.raw: 

794 strType = "[RAW]" 

795 strSizeVHD = "" 

796 

797 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

798 strSizeVHD, strSizeAllocated, strType) 

799 

800 def validate(self, fast=False) -> None: 

801 if not vhdutil.check(self.path, fast=fast): 801 ↛ 802line 801 didn't jump to line 802, because the condition on line 801 was never true

802 raise util.SMException("VHD %s corrupted" % self) 

803 

804 def _clear(self): 

805 self.uuid = "" 

806 self.path = "" 

807 self.parentUuid = "" 

808 self.parent = None 

809 self._clearRef() 

810 

811 def _clearRef(self): 

812 self._vdiRef = None 

813 

814 def _doCoalesce(self) -> None: 

815 """Coalesce self onto parent. Only perform the actual coalescing of 

816 VHD, but not the subsequent relinking. We'll do that as the next step, 

817 after reloading the entire SR in case things have changed while we 

818 were coalescing""" 

819 self.validate() 

820 self.parent.validate(True) 

821 self.parent._increaseSizeVirt(self.sizeVirt) 

822 self.sr._updateSlavesOnResize(self.parent) 

823 self._coalesceVHD(0) 

824 self.parent.validate(True) 

825 #self._verifyContents(0) 

826 self.parent.updateBlockInfo() 

827 

828 def _verifyContents(self, timeOut): 

829 Util.log(" Coalesce verification on %s" % self) 

830 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

831 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

832 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

833 Util.log(" Coalesce verification succeeded") 

834 

835 def _runTapdiskDiff(self): 

836 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

837 (self.getDriverName(), self.path, \ 

838 self.parent.getDriverName(), self.parent.path) 

839 Util.doexec(cmd, 0) 

840 return True 

841 

842 @staticmethod 

843 def _reportCoalesceError(vdi, ce): 

844 """Reports a coalesce error to XenCenter. 

845 

846 vdi: the VDI object on which the coalesce error occured 

847 ce: the CommandException that was raised""" 

848 

849 msg_name = os.strerror(ce.code) 

850 if ce.code == errno.ENOSPC: 

851 # TODO We could add more information here, e.g. exactly how much 

852 # space is required for the particular coalesce, as well as actions 

853 # to be taken by the user and consequences of not taking these 

854 # actions. 

855 msg_body = 'Run out of space while coalescing.' 

856 elif ce.code == errno.EIO: 

857 msg_body = 'I/O error while coalescing.' 

858 else: 

859 msg_body = '' 

860 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

861 % (vdi.sr.uuid, msg_name, msg_body)) 

862 

863 # Create a XenCenter message, but don't spam. 

864 xapi = vdi.sr.xapi.session.xenapi 

865 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

866 oth_cfg = xapi.SR.get_other_config(sr_ref) 

867 if COALESCE_ERR_RATE_TAG in oth_cfg: 

868 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

869 else: 

870 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

871 

872 xcmsg = False 

873 if coalesce_err_rate == 0: 

874 xcmsg = True 

875 elif coalesce_err_rate > 0: 

876 now = datetime.datetime.now() 

877 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

878 if COALESCE_LAST_ERR_TAG in sm_cfg: 

879 # seconds per message (minimum distance in time between two 

880 # messages in seconds) 

881 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

882 last = datetime.datetime.fromtimestamp( 

883 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

884 if now - last >= spm: 

885 xapi.SR.remove_from_sm_config(sr_ref, 

886 COALESCE_LAST_ERR_TAG) 

887 xcmsg = True 

888 else: 

889 xcmsg = True 

890 if xcmsg: 

891 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

892 str(now.strftime('%s'))) 

893 if xcmsg: 

894 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

895 

896 def coalesce(self) -> int: 

897 # size is returned in sectors 

898 return vhdutil.coalesce(self.path) * 512 

899 

900 @staticmethod 

901 def _doCoalesceVHD(vdi): 

902 try: 

903 startTime = time.time() 

904 vhdSize = vdi.getAllocatedSize() 

905 coalesced_size = vdi.coalesce() 

906 endTime = time.time() 

907 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) 

908 except util.CommandException as ce: 

909 # We use try/except for the following piece of code because it runs 

910 # in a separate process context and errors will not be caught and 

911 # reported by anyone. 

912 try: 

913 # Report coalesce errors back to user via XC 

914 VDI._reportCoalesceError(vdi, ce) 

915 except Exception as e: 

916 util.SMlog('failed to create XenCenter message: %s' % e) 

917 raise ce 

918 except: 

919 raise 

920 

921 def _vdi_is_raw(self, vdi_path): 

922 """ 

923 Given path to vdi determine if it is raw 

924 """ 

925 uuid = self.extractUuid(vdi_path) 

926 return self.sr.vdis[uuid].raw 

927 

928 def _coalesceVHD(self, timeOut): 

929 Util.log(" Running VHD coalesce on %s" % self) 

930 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 930 ↛ exitline 930 didn't run the lambda on line 930

931 try: 

932 util.fistpoint.activate_custom_fn( 

933 "cleanup_coalesceVHD_inject_failure", 

934 util.inject_failure) 

935 Util.runAbortable(lambda: VDI._doCoalesceVHD(self), None, 

936 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

937 except: 

938 #exception at this phase could indicate a failure in vhd coalesce 

939 # or a kill of vhd coalesce by runAbortable due to timeOut 

940 # Try a repair and reraise the exception 

941 parent = "" 

942 try: 

943 parent = self.getParent() 

944 if not self._vdi_is_raw(parent): 

945 # Repair error is logged and ignored. Error reraised later 

946 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

947 'parent %s' % (self.uuid, parent)) 

948 self.repair(parent) 

949 except Exception as e: 

950 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

951 'after failed coalesce on %s, err: %s' % 

952 (parent, self.path, e)) 

953 raise 

954 

955 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

956 

957 def _relinkSkip(self) -> None: 

958 """Relink children of this VDI to point to the parent of this VDI""" 

959 abortFlag = IPCFlag(self.sr.uuid) 

960 for child in self.children: 

961 if abortFlag.test(FLAG_TYPE_ABORT): 961 ↛ 962line 961 didn't jump to line 962, because the condition on line 961 was never true

962 raise AbortException("Aborting due to signal") 

963 Util.log(" Relinking %s from %s to %s" % \ 

964 (child, self, self.parent)) 

965 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

966 child._setParent(self.parent) 

967 self.children = [] 

968 

969 def _reloadChildren(self, vdiSkip): 

970 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

971 the VHD metadata for this file in any online VDI""" 

972 abortFlag = IPCFlag(self.sr.uuid) 

973 for child in self.children: 

974 if child == vdiSkip: 

975 continue 

976 if abortFlag.test(FLAG_TYPE_ABORT): 976 ↛ 977line 976 didn't jump to line 977, because the condition on line 976 was never true

977 raise AbortException("Aborting due to signal") 

978 Util.log(" Reloading VDI %s" % child) 

979 child._reload() 

980 

981 def _reload(self): 

982 """Pause & unpause to cause blktap to reload the VHD metadata""" 

983 for child in self.children: 983 ↛ 984line 983 didn't jump to line 984, because the loop on line 983 never started

984 child._reload() 

985 

986 # only leaves can be attached 

987 if len(self.children) == 0: 987 ↛ exitline 987 didn't return from function '_reload', because the condition on line 987 was never false

988 try: 

989 self.delConfig(VDI.DB_VDI_RELINKING) 

990 except XenAPI.Failure as e: 

991 if not util.isInvalidVDI(e): 

992 raise 

993 self.refresh() 

994 

995 def _tagChildrenForRelink(self): 

996 if len(self.children) == 0: 

997 retries = 0 

998 try: 

999 while retries < 15: 

1000 retries += 1 

1001 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

1002 Util.log("VDI %s is activating, wait to relink" % 

1003 self.uuid) 

1004 else: 

1005 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

1006 

1007 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

1008 self.delConfig(VDI.DB_VDI_RELINKING) 

1009 Util.log("VDI %s started activating while tagging" % 

1010 self.uuid) 

1011 else: 

1012 return 

1013 time.sleep(2) 

1014 

1015 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1016 except XenAPI.Failure as e: 

1017 if not util.isInvalidVDI(e): 

1018 raise 

1019 

1020 for child in self.children: 

1021 child._tagChildrenForRelink() 

1022 

1023 def _loadInfoParent(self): 

1024 ret = vhdutil.getParent(self.path, lvhdutil.extractUuid) 

1025 if ret: 

1026 self.parentUuid = ret 

1027 

1028 def _setParent(self, parent) -> None: 

1029 vhdutil.setParent(self.path, parent.path, False) 

1030 self.parent = parent 

1031 self.parentUuid = parent.uuid 

1032 parent.children.append(self) 

1033 try: 

1034 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1035 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1036 (self.uuid, self.parentUuid)) 

1037 except: 

1038 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1039 (self.uuid, self.parentUuid)) 

1040 

1041 def _loadInfoHidden(self) -> None: 

1042 hidden = vhdutil.getHidden(self.path) 

1043 self.hidden = (hidden != 0) 

1044 

1045 def _setHidden(self, hidden=True) -> None: 

1046 vhdutil.setHidden(self.path, hidden) 

1047 self.hidden = hidden 

1048 

1049 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1050 """ensure the virtual size of 'self' is at least 'size'. Note that 

1051 resizing a VHD must always be offline and atomically: the file must 

1052 not be open by anyone and no concurrent operations may take place. 

1053 Thus we use the Agent API call for performing paused atomic 

1054 operations. If the caller is already in the atomic context, it must 

1055 call with atomic = False""" 

1056 if self.sizeVirt >= size: 1056 ↛ 1058line 1056 didn't jump to line 1058, because the condition on line 1056 was never false

1057 return 

1058 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ 

1059 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1060 

1061 msize = vhdutil.getMaxResizeSize(self.path) * 1024 * 1024 

1062 if (size <= msize): 

1063 vhdutil.setSizeVirtFast(self.path, size) 

1064 else: 

1065 if atomic: 

1066 vdiList = self._getAllSubtree() 

1067 self.sr.lock() 

1068 try: 

1069 self.sr.pauseVDIs(vdiList) 

1070 try: 

1071 self._setSizeVirt(size) 

1072 finally: 

1073 self.sr.unpauseVDIs(vdiList) 

1074 finally: 

1075 self.sr.unlock() 

1076 else: 

1077 self._setSizeVirt(size) 

1078 

1079 self.sizeVirt = vhdutil.getSizeVirt(self.path) 

1080 

1081 def _setSizeVirt(self, size) -> None: 

1082 """WARNING: do not call this method directly unless all VDIs in the 

1083 subtree are guaranteed to be unplugged (and remain so for the duration 

1084 of the operation): this operation is only safe for offline VHDs""" 

1085 jFile = os.path.join(self.sr.path, self.uuid) 

1086 vhdutil.setSizeVirt(self.path, size, jFile) 

1087 

1088 def _queryVHDBlocks(self) -> bytes: 

1089 return vhdutil.getBlockBitmap(self.path) 

1090 

1091 def _getCoalescedSizeData(self): 

1092 """Get the data size of the resulting VHD if we coalesce self onto 

1093 parent. We calculate the actual size by using the VHD block allocation 

1094 information (as opposed to just adding up the two VHD sizes to get an 

1095 upper bound)""" 

1096 # make sure we don't use stale BAT info from vdi_rec since the child 

1097 # was writable all this time 

1098 self.delConfig(VDI.DB_VHD_BLOCKS) 

1099 blocksChild = self.getVHDBlocks() 

1100 blocksParent = self.parent.getVHDBlocks() 

1101 numBlocks = Util.countBits(blocksChild, blocksParent) 

1102 Util.log("Num combined blocks = %d" % numBlocks) 

1103 sizeData = numBlocks * vhdutil.VHD_BLOCK_SIZE 

1104 assert(sizeData <= self.sizeVirt) 

1105 return sizeData 

1106 

1107 def _calcExtraSpaceForCoalescing(self) -> int: 

1108 sizeData = self._getCoalescedSizeData() 

1109 sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \ 

1110 vhdutil.calcOverheadEmpty(self.sizeVirt) 

1111 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1112 return sizeCoalesced - self.parent.getSizeVHD() 

1113 

1114 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1115 """How much extra space in the SR will be required to 

1116 [live-]leaf-coalesce this VDI""" 

1117 # the space requirements are the same as for inline coalesce 

1118 return self._calcExtraSpaceForCoalescing() 

1119 

1120 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1121 """How much extra space in the SR will be required to 

1122 snapshot-coalesce this VDI""" 

1123 return self._calcExtraSpaceForCoalescing() + \ 

1124 vhdutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1125 

1126 def _getAllSubtree(self): 

1127 """Get self and all VDIs in the subtree of self as a flat list""" 

1128 vdiList = [self] 

1129 for child in self.children: 

1130 vdiList.extend(child._getAllSubtree()) 

1131 return vdiList 

1132 

1133 

1134class FileVDI(VDI): 

1135 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1136 

1137 @staticmethod 

1138 def extractUuid(path): 

1139 path = os.path.basename(path.strip()) 

1140 if not (path.endswith(vhdutil.FILE_EXTN_VHD) or \ 1140 ↛ 1142line 1140 didn't jump to line 1142, because the condition on line 1140 was never true

1141 path.endswith(vhdutil.FILE_EXTN_RAW)): 

1142 return None 

1143 uuid = path.replace(vhdutil.FILE_EXTN_VHD, "").replace( \ 

1144 vhdutil.FILE_EXTN_RAW, "") 

1145 # TODO: validate UUID format 

1146 return uuid 

1147 

1148 def __init__(self, sr, uuid, raw): 

1149 VDI.__init__(self, sr, uuid, raw) 

1150 if self.raw: 1150 ↛ 1151line 1150 didn't jump to line 1151, because the condition on line 1150 was never true

1151 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_RAW) 

1152 else: 

1153 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1154 

1155 @override 

1156 def load(self, info=None) -> None: 

1157 if not info: 

1158 if not util.pathexists(self.path): 

1159 raise util.SMException("%s not found" % self.path) 

1160 try: 

1161 info = vhdutil.getVHDInfo(self.path, self.extractUuid) 

1162 except util.SMException: 

1163 Util.log(" [VDI %s: failed to read VHD metadata]" % self.uuid) 

1164 return 

1165 self.parent = None 

1166 self.children = [] 

1167 self.parentUuid = info.parentUuid 

1168 self.sizeVirt = info.sizeVirt 

1169 self._sizeVHD = info.sizePhys 

1170 self._sizeAllocated = info.sizeAllocated 

1171 self.hidden = info.hidden 

1172 self.scanError = False 

1173 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1174 (self.uuid, vhdutil.FILE_EXTN_VHD)) 

1175 

1176 @override 

1177 def rename(self, uuid) -> None: 

1178 oldPath = self.path 

1179 VDI.rename(self, uuid) 

1180 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1181 self.path = os.path.join(self.sr.path, self.fileName) 

1182 assert(not util.pathexists(self.path)) 

1183 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1184 os.rename(oldPath, self.path) 

1185 

1186 @override 

1187 def delete(self) -> None: 

1188 if len(self.children) > 0: 1188 ↛ 1189line 1188 didn't jump to line 1189, because the condition on line 1188 was never true

1189 raise util.SMException("VDI %s has children, can't delete" % \ 

1190 self.uuid) 

1191 try: 

1192 self.sr.lock() 

1193 try: 

1194 os.unlink(self.path) 

1195 self.sr.forgetVDI(self.uuid) 

1196 finally: 

1197 self.sr.unlock() 

1198 except OSError: 

1199 raise util.SMException("os.unlink(%s) failed" % self.path) 

1200 VDI.delete(self) 

1201 

1202 @override 

1203 def getAllocatedSize(self) -> int: 

1204 if self._sizeAllocated == -1: 1204 ↛ 1205line 1204 didn't jump to line 1205, because the condition on line 1204 was never true

1205 self._sizeAllocated = vhdutil.getAllocatedSize(self.path) 

1206 return self._sizeAllocated 

1207 

1208 

1209class LVHDVDI(VDI): 

1210 """Object representing a VDI in an LVHD SR""" 

1211 

1212 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1213 DRIVER_NAME_RAW = "aio" 

1214 

1215 @override 

1216 def load(self, info=None) -> None: 

1217 # `info` is always set. `None` default value is only here to match parent method. 

1218 assert info, "No info given to LVHDVDI.load" 

1219 self.parent = None 

1220 self.children = [] 

1221 self._sizeVHD = -1 

1222 self._sizeAllocated = -1 

1223 self.scanError = info.scanError 

1224 self.sizeLV = info.sizeLV 

1225 self.sizeVirt = info.sizeVirt 

1226 self.fileName = info.lvName 

1227 self.lvActive = info.lvActive 

1228 self.lvOpen = info.lvOpen 

1229 self.lvReadonly = info.lvReadonly 

1230 self.hidden = info.hidden 

1231 self.parentUuid = info.parentUuid 

1232 self.path = os.path.join(self.sr.path, self.fileName) 

1233 

1234 @staticmethod 

1235 def extractUuid(path): 

1236 return lvhdutil.extractUuid(path) 

1237 

1238 @override 

1239 def getDriverName(self) -> str: 

1240 if self.raw: 

1241 return self.DRIVER_NAME_RAW 

1242 return self.DRIVER_NAME_VHD 

1243 

1244 def inflate(self, size): 

1245 """inflate the LV containing the VHD to 'size'""" 

1246 if self.raw: 

1247 return 

1248 self._activate() 

1249 self.sr.lock() 

1250 try: 

1251 lvhdutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, size) 

1252 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1253 finally: 

1254 self.sr.unlock() 

1255 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1256 self._sizeVHD = -1 

1257 self._sizeAllocated = -1 

1258 

1259 def deflate(self): 

1260 """deflate the LV containing the VHD to minimum""" 

1261 if self.raw: 

1262 return 

1263 self._activate() 

1264 self.sr.lock() 

1265 try: 

1266 lvhdutil.deflate(self.sr.lvmCache, self.fileName, self.getSizeVHD()) 

1267 finally: 

1268 self.sr.unlock() 

1269 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1270 self._sizeVHD = -1 

1271 self._sizeAllocated = -1 

1272 

1273 def inflateFully(self): 

1274 self.inflate(lvhdutil.calcSizeVHDLV(self.sizeVirt)) 

1275 

1276 def inflateParentForCoalesce(self): 

1277 """Inflate the parent only as much as needed for the purposes of 

1278 coalescing""" 

1279 if self.parent.raw: 

1280 return 

1281 inc = self._calcExtraSpaceForCoalescing() 

1282 if inc > 0: 

1283 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1284 self.parent.inflate(self.parent.sizeLV + inc) 

1285 

1286 @override 

1287 def updateBlockInfo(self) -> Optional[str]: 

1288 if not self.raw: 

1289 return VDI.updateBlockInfo(self) 

1290 return None 

1291 

1292 @override 

1293 def rename(self, uuid) -> None: 

1294 oldUuid = self.uuid 

1295 oldLVName = self.fileName 

1296 VDI.rename(self, uuid) 

1297 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + self.uuid 

1298 if self.raw: 

1299 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + self.uuid 

1300 self.path = os.path.join(self.sr.path, self.fileName) 

1301 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1302 

1303 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1304 if self.sr.lvActivator.get(oldUuid, False): 

1305 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1306 

1307 ns = lvhdutil.NS_PREFIX_LVM + self.sr.uuid 

1308 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1309 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1310 RefCounter.reset(oldUuid, ns) 

1311 

1312 @override 

1313 def delete(self) -> None: 

1314 if len(self.children) > 0: 

1315 raise util.SMException("VDI %s has children, can't delete" % \ 

1316 self.uuid) 

1317 self.sr.lock() 

1318 try: 

1319 self.sr.lvmCache.remove(self.fileName) 

1320 self.sr.forgetVDI(self.uuid) 

1321 finally: 

1322 self.sr.unlock() 

1323 RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

1324 VDI.delete(self) 

1325 

1326 @override 

1327 def getSizeVHD(self) -> int: 

1328 if self._sizeVHD == -1: 

1329 self._loadInfoSizeVHD() 

1330 return self._sizeVHD 

1331 

1332 def _loadInfoSizeVHD(self): 

1333 """Get the physical utilization of the VHD file. We do it individually 

1334 (and not using the VHD batch scanner) as an optimization: this info is 

1335 relatively expensive and we need it only for VDI's involved in 

1336 coalescing.""" 

1337 if self.raw: 

1338 return 

1339 self._activate() 

1340 self._sizeVHD = vhdutil.getSizePhys(self.path) 

1341 if self._sizeVHD <= 0: 

1342 raise util.SMException("phys size of %s = %d" % \ 

1343 (self, self._sizeVHD)) 

1344 

1345 @override 

1346 def getAllocatedSize(self) -> int: 

1347 if self._sizeAllocated == -1: 

1348 self._loadInfoSizeAllocated() 

1349 return self._sizeAllocated 

1350 

1351 def _loadInfoSizeAllocated(self): 

1352 """ 

1353 Get the allocated size of the VHD volume. 

1354 """ 

1355 if self.raw: 

1356 return 

1357 self._activate() 

1358 self._sizeAllocated = vhdutil.getAllocatedSize(self.path) 

1359 

1360 @override 

1361 def _loadInfoHidden(self) -> None: 

1362 if self.raw: 

1363 self.hidden = self.sr.lvmCache.getHidden(self.fileName) 

1364 else: 

1365 VDI._loadInfoHidden(self) 

1366 

1367 @override 

1368 def _setHidden(self, hidden=True) -> None: 

1369 if self.raw: 

1370 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1371 self.hidden = hidden 

1372 else: 

1373 VDI._setHidden(self, hidden) 

1374 

1375 @override 

1376 def __str__(self) -> str: 

1377 strType = "VHD" 

1378 if self.raw: 

1379 strType = "RAW" 

1380 strHidden = "" 

1381 if self.hidden: 

1382 strHidden = "*" 

1383 strSizeVHD = "" 

1384 if self._sizeVHD > 0: 

1385 strSizeVHD = Util.num2str(self._sizeVHD) 

1386 strSizeAllocated = "" 

1387 if self._sizeAllocated >= 0: 

1388 strSizeAllocated = Util.num2str(self._sizeAllocated) 

1389 strActive = "n" 

1390 if self.lvActive: 

1391 strActive = "a" 

1392 if self.lvOpen: 

1393 strActive += "o" 

1394 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1395 Util.num2str(self.sizeVirt), strSizeVHD, strSizeAllocated, 

1396 Util.num2str(self.sizeLV), strActive) 

1397 

1398 @override 

1399 def validate(self, fast=False) -> None: 

1400 if not self.raw: 

1401 VDI.validate(self, fast) 

1402 

1403 @override 

1404 def _doCoalesce(self) -> None: 

1405 """LVHD parents must first be activated, inflated, and made writable""" 

1406 try: 

1407 self._activateChain() 

1408 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1409 self.parent.validate() 

1410 self.inflateParentForCoalesce() 

1411 VDI._doCoalesce(self) 

1412 finally: 

1413 self.parent._loadInfoSizeVHD() 

1414 self.parent.deflate() 

1415 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1416 

1417 @override 

1418 def _setParent(self, parent) -> None: 

1419 self._activate() 

1420 if self.lvReadonly: 

1421 self.sr.lvmCache.setReadonly(self.fileName, False) 

1422 

1423 try: 

1424 vhdutil.setParent(self.path, parent.path, parent.raw) 

1425 finally: 

1426 if self.lvReadonly: 

1427 self.sr.lvmCache.setReadonly(self.fileName, True) 

1428 self._deactivate() 

1429 self.parent = parent 

1430 self.parentUuid = parent.uuid 

1431 parent.children.append(self) 

1432 try: 

1433 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1434 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1435 (self.uuid, self.parentUuid)) 

1436 except: 

1437 Util.log("Failed to update the vhd-parent with %s for child %s" % \ 

1438 (self.parentUuid, self.uuid)) 

1439 

1440 def _activate(self): 

1441 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1442 

1443 def _activateChain(self): 

1444 vdi = self 

1445 while vdi: 

1446 vdi._activate() 

1447 vdi = vdi.parent 

1448 

1449 def _deactivate(self): 

1450 self.sr.lvActivator.deactivate(self.uuid, False) 

1451 

1452 @override 

1453 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1454 "ensure the virtual size of 'self' is at least 'size'" 

1455 self._activate() 

1456 if not self.raw: 

1457 VDI._increaseSizeVirt(self, size, atomic) 

1458 return 

1459 

1460 # raw VDI case 

1461 offset = self.sizeLV 

1462 if self.sizeVirt < size: 

1463 oldSize = self.sizeLV 

1464 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1465 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1466 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1467 offset = oldSize 

1468 unfinishedZero = False 

1469 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1470 if jval: 

1471 unfinishedZero = True 

1472 offset = int(jval) 

1473 length = self.sizeLV - offset 

1474 if not length: 

1475 return 

1476 

1477 if unfinishedZero: 

1478 Util.log(" ==> Redoing unfinished zeroing out") 

1479 else: 

1480 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1481 str(offset)) 

1482 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1483 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1484 func = lambda: util.zeroOut(self.path, offset, length) 

1485 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1486 VDI.POLL_INTERVAL, 0) 

1487 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1488 

1489 @override 

1490 def _setSizeVirt(self, size) -> None: 

1491 """WARNING: do not call this method directly unless all VDIs in the 

1492 subtree are guaranteed to be unplugged (and remain so for the duration 

1493 of the operation): this operation is only safe for offline VHDs""" 

1494 self._activate() 

1495 jFile = lvhdutil.createVHDJournalLV(self.sr.lvmCache, self.uuid, 

1496 vhdutil.MAX_VHD_JOURNAL_SIZE) 

1497 try: 

1498 lvhdutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, 

1499 size, jFile) 

1500 finally: 

1501 lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid) 

1502 

1503 @override 

1504 def _queryVHDBlocks(self) -> bytes: 

1505 self._activate() 

1506 return VDI._queryVHDBlocks(self) 

1507 

1508 @override 

1509 def _calcExtraSpaceForCoalescing(self) -> int: 

1510 if self.parent.raw: 

1511 return 0 # raw parents are never deflated in the first place 

1512 sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData()) 

1513 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1514 return sizeCoalesced - self.parent.sizeLV 

1515 

1516 @override 

1517 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1518 """How much extra space in the SR will be required to 

1519 [live-]leaf-coalesce this VDI""" 

1520 # we can deflate the leaf to minimize the space requirements 

1521 deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD()) 

1522 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1523 

1524 @override 

1525 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1526 return self._calcExtraSpaceForCoalescing() + \ 

1527 lvhdutil.calcSizeLV(self.getSizeVHD()) 

1528 

1529 

1530class LinstorVDI(VDI): 

1531 """Object representing a VDI in a LINSTOR SR""" 

1532 

1533 VOLUME_LOCK_TIMEOUT = 30 

1534 

1535 @override 

1536 def load(self, info=None) -> None: 

1537 self.parentUuid = info.parentUuid 

1538 self.scanError = True 

1539 self.parent = None 

1540 self.children = [] 

1541 

1542 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1543 self.path = self.sr._linstor.build_device_path(self.fileName) 

1544 

1545 if not info: 

1546 try: 

1547 info = self.sr._vhdutil.get_vhd_info(self.uuid) 

1548 except util.SMException: 

1549 Util.log( 

1550 ' [VDI {}: failed to read VHD metadata]'.format(self.uuid) 

1551 ) 

1552 return 

1553 

1554 self.parentUuid = info.parentUuid 

1555 self.sizeVirt = info.sizeVirt 

1556 self._sizeVHD = -1 

1557 self._sizeAllocated = -1 

1558 self.drbd_size = -1 

1559 self.hidden = info.hidden 

1560 self.scanError = False 

1561 self.vdi_type = vhdutil.VDI_TYPE_VHD 

1562 

1563 @override 

1564 def getSizeVHD(self, fetch=False) -> int: 

1565 if self._sizeVHD < 0 or fetch: 

1566 self._sizeVHD = self.sr._vhdutil.get_size_phys(self.uuid) 

1567 return self._sizeVHD 

1568 

1569 def getDrbdSize(self, fetch=False): 

1570 if self.drbd_size < 0 or fetch: 

1571 self.drbd_size = self.sr._vhdutil.get_drbd_size(self.uuid) 

1572 return self.drbd_size 

1573 

1574 @override 

1575 def getAllocatedSize(self) -> int: 

1576 if self._sizeAllocated == -1: 

1577 if not self.raw: 

1578 self._sizeAllocated = self.sr._vhdutil.get_allocated_size(self.uuid) 

1579 return self._sizeAllocated 

1580 

1581 def inflate(self, size): 

1582 if self.raw: 

1583 return 

1584 self.sr.lock() 

1585 try: 

1586 # Ensure we use the real DRBD size and not the cached one. 

1587 # Why? Because this attribute can be changed if volume is resized by user. 

1588 self.drbd_size = self.getDrbdSize(fetch=True) 

1589 self.sr._vhdutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) 

1590 finally: 

1591 self.sr.unlock() 

1592 self.drbd_size = -1 

1593 self._sizeVHD = -1 

1594 self._sizeAllocated = -1 

1595 

1596 def deflate(self): 

1597 if self.raw: 

1598 return 

1599 self.sr.lock() 

1600 try: 

1601 # Ensure we use the real sizes and not the cached info. 

1602 self.drbd_size = self.getDrbdSize(fetch=True) 

1603 self._sizeVHD = self.getSizeVHD(fetch=True) 

1604 self.sr._vhdutil.force_deflate(self.path, self._sizeVHD, self.drbd_size, zeroize=False) 

1605 finally: 

1606 self.sr.unlock() 

1607 self.drbd_size = -1 

1608 self._sizeVHD = -1 

1609 self._sizeAllocated = -1 

1610 

1611 def inflateFully(self): 

1612 if not self.raw: 

1613 self.inflate(LinstorVhdUtil.compute_volume_size(self.sizeVirt, self.vdi_type)) 

1614 

1615 @override 

1616 def rename(self, uuid) -> None: 

1617 Util.log('Renaming {} -> {} (path={})'.format( 

1618 self.uuid, uuid, self.path 

1619 )) 

1620 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1621 VDI.rename(self, uuid) 

1622 

1623 @override 

1624 def delete(self) -> None: 

1625 if len(self.children) > 0: 

1626 raise util.SMException( 

1627 'VDI {} has children, can\'t delete'.format(self.uuid) 

1628 ) 

1629 self.sr.lock() 

1630 try: 

1631 self.sr._linstor.destroy_volume(self.uuid) 

1632 self.sr.forgetVDI(self.uuid) 

1633 finally: 

1634 self.sr.unlock() 

1635 VDI.delete(self) 

1636 

1637 @override 

1638 def validate(self, fast=False) -> None: 

1639 if not self.raw and not self.sr._vhdutil.check(self.uuid, fast=fast): 

1640 raise util.SMException('VHD {} corrupted'.format(self)) 

1641 

1642 @override 

1643 def pause(self, failfast=False) -> None: 

1644 self.sr._linstor.ensure_volume_is_not_locked( 

1645 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1646 ) 

1647 return super(LinstorVDI, self).pause(failfast) 

1648 

1649 @override 

1650 def coalesce(self) -> int: 

1651 # Note: We raise `SMException` here to skip the current coalesce in case of failure. 

1652 # Using another exception we can't execute the next coalesce calls. 

1653 return self.sr._vhdutil.force_coalesce(self.path) * 512 

1654 

1655 @override 

1656 def getParent(self) -> str: 

1657 return self.sr._vhdutil.get_parent( 

1658 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1659 ) 

1660 

1661 @override 

1662 def repair(self, parent_uuid) -> None: 

1663 self.sr._vhdutil.force_repair( 

1664 self.sr._linstor.get_device_path(parent_uuid) 

1665 ) 

1666 

1667 @override 

1668 def _relinkSkip(self) -> None: 

1669 abortFlag = IPCFlag(self.sr.uuid) 

1670 for child in self.children: 

1671 if abortFlag.test(FLAG_TYPE_ABORT): 

1672 raise AbortException('Aborting due to signal') 

1673 Util.log( 

1674 ' Relinking {} from {} to {}'.format( 

1675 child, self, self.parent 

1676 ) 

1677 ) 

1678 

1679 session = child.sr.xapi.session 

1680 sr_uuid = child.sr.uuid 

1681 vdi_uuid = child.uuid 

1682 try: 

1683 self.sr._linstor.ensure_volume_is_not_locked( 

1684 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1685 ) 

1686 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1687 child._setParent(self.parent) 

1688 finally: 

1689 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1690 self.children = [] 

1691 

1692 @override 

1693 def _setParent(self, parent) -> None: 

1694 self.sr._linstor.get_device_path(self.uuid) 

1695 self.sr._vhdutil.force_parent(self.path, parent.path) 

1696 self.parent = parent 

1697 self.parentUuid = parent.uuid 

1698 parent.children.append(self) 

1699 try: 

1700 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1701 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1702 (self.uuid, self.parentUuid)) 

1703 except: 

1704 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1705 (self.uuid, self.parentUuid)) 

1706 

1707 @override 

1708 def _doCoalesce(self) -> None: 

1709 try: 

1710 self._activateChain() 

1711 self.parent.validate() 

1712 self._inflateParentForCoalesce() 

1713 VDI._doCoalesce(self) 

1714 finally: 

1715 self.parent.deflate() 

1716 

1717 def _activateChain(self): 

1718 vdi = self 

1719 while vdi: 

1720 try: 

1721 p = self.sr._linstor.get_device_path(vdi.uuid) 

1722 except Exception as e: 

1723 # Use SMException to skip coalesce. 

1724 # Otherwise the GC is stopped... 

1725 raise util.SMException(str(e)) 

1726 vdi = vdi.parent 

1727 

1728 @override 

1729 def _setHidden(self, hidden=True) -> None: 

1730 HIDDEN_TAG = 'hidden' 

1731 

1732 if self.raw: 

1733 self.sr._linstor.update_volume_metadata(self.uuid, { 

1734 HIDDEN_TAG: hidden 

1735 }) 

1736 self.hidden = hidden 

1737 else: 

1738 VDI._setHidden(self, hidden) 

1739 

1740 @override 

1741 def _increaseSizeVirt(self, size, atomic=True): 

1742 if self.raw: 

1743 offset = self.drbd_size 

1744 if self.sizeVirt < size: 

1745 oldSize = self.drbd_size 

1746 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size) 

1747 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size)) 

1748 self.sr._linstor.resize_volume(self.uuid, self.drbd_size) 

1749 offset = oldSize 

1750 unfinishedZero = False 

1751 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid) 

1752 if jval: 

1753 unfinishedZero = True 

1754 offset = int(jval) 

1755 length = self.drbd_size - offset 

1756 if not length: 

1757 return 

1758 

1759 if unfinishedZero: 

1760 Util.log(" ==> Redoing unfinished zeroing out") 

1761 else: 

1762 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset)) 

1763 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1764 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1765 func = lambda: util.zeroOut(self.path, offset, length) 

1766 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) 

1767 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid) 

1768 return 

1769 

1770 if self.sizeVirt >= size: 

1771 return 

1772 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ 

1773 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1774 

1775 msize = self.sr._vhdutil.get_max_resize_size(self.uuid) * 1024 * 1024 

1776 if (size <= msize): 

1777 self.sr._vhdutil.set_size_virt_fast(self.path, size) 

1778 else: 

1779 if atomic: 

1780 vdiList = self._getAllSubtree() 

1781 self.sr.lock() 

1782 try: 

1783 self.sr.pauseVDIs(vdiList) 

1784 try: 

1785 self._setSizeVirt(size) 

1786 finally: 

1787 self.sr.unpauseVDIs(vdiList) 

1788 finally: 

1789 self.sr.unlock() 

1790 else: 

1791 self._setSizeVirt(size) 

1792 

1793 self.sizeVirt = self.sr._vhdutil.get_size_virt(self.uuid) 

1794 

1795 @override 

1796 def _setSizeVirt(self, size) -> None: 

1797 jfile = self.uuid + '-jvhd' 

1798 self.sr._linstor.create_volume( 

1799 jfile, vhdutil.MAX_VHD_JOURNAL_SIZE, persistent=False, volume_name=jfile 

1800 ) 

1801 try: 

1802 self.inflate(LinstorVhdUtil.compute_volume_size(size, self.vdi_type)) 

1803 self.sr._vhdutil.set_size_virt(size, jfile) 

1804 finally: 

1805 try: 

1806 self.sr._linstor.destroy_volume(jfile) 

1807 except Exception: 

1808 # We can ignore it, in any case this volume is not persistent. 

1809 pass 

1810 

1811 @override 

1812 def _queryVHDBlocks(self) -> bytes: 

1813 return self.sr._vhdutil.get_block_bitmap(self.uuid) 

1814 

1815 def _inflateParentForCoalesce(self): 

1816 if self.parent.raw: 

1817 return 

1818 inc = self._calcExtraSpaceForCoalescing() 

1819 if inc > 0: 

1820 self.parent.inflate(self.parent.getDrbdSize() + inc) 

1821 

1822 @override 

1823 def _calcExtraSpaceForCoalescing(self) -> int: 

1824 if self.parent.raw: 

1825 return 0 

1826 size_coalesced = LinstorVhdUtil.compute_volume_size( 

1827 self._getCoalescedSizeData(), self.vdi_type 

1828 ) 

1829 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) 

1830 return size_coalesced - self.parent.getDrbdSize() 

1831 

1832 @override 

1833 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1834 assert self.getDrbdSize() > 0 

1835 assert self.getSizeVHD() > 0 

1836 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) 

1837 assert deflate_diff >= 0 

1838 return self._calcExtraSpaceForCoalescing() - deflate_diff 

1839 

1840 @override 

1841 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1842 assert self.getSizeVHD() > 0 

1843 return self._calcExtraSpaceForCoalescing() + \ 

1844 LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) 

1845 

1846################################################################################ 

1847# 

1848# SR 

1849# 

1850class SR(object): 

1851 class LogFilter: 

1852 def __init__(self, sr): 

1853 self.sr = sr 

1854 self.stateLogged = False 

1855 self.prevState = {} 

1856 self.currState = {} 

1857 

1858 def logState(self): 

1859 changes = "" 

1860 self.currState.clear() 

1861 for vdi in self.sr.vdiTrees: 

1862 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

1863 if not self.prevState.get(vdi.uuid) or \ 

1864 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

1865 changes += self.currState[vdi.uuid] 

1866 

1867 for uuid in self.prevState: 

1868 if not self.currState.get(uuid): 

1869 changes += "Tree %s gone\n" % uuid 

1870 

1871 result = "SR %s (%d VDIs in %d VHD trees): " % \ 

1872 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

1873 

1874 if len(changes) > 0: 

1875 if self.stateLogged: 

1876 result += "showing only VHD trees that changed:" 

1877 result += "\n%s" % changes 

1878 else: 

1879 result += "no changes" 

1880 

1881 for line in result.split("\n"): 

1882 Util.log("%s" % line) 

1883 self.prevState.clear() 

1884 for key, val in self.currState.items(): 

1885 self.prevState[key] = val 

1886 self.stateLogged = True 

1887 

1888 def logNewVDI(self, uuid): 

1889 if self.stateLogged: 

1890 Util.log("Found new VDI when scanning: %s" % uuid) 

1891 

1892 def _getTreeStr(self, vdi, indent=8): 

1893 treeStr = "%s%s\n" % (" " * indent, vdi) 

1894 for child in vdi.children: 

1895 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

1896 return treeStr 

1897 

1898 TYPE_FILE = "file" 

1899 TYPE_LVHD = "lvhd" 

1900 TYPE_LINSTOR = "linstor" 

1901 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

1902 

1903 LOCK_RETRY_INTERVAL = 3 

1904 LOCK_RETRY_ATTEMPTS = 20 

1905 LOCK_RETRY_ATTEMPTS_LOCK = 100 

1906 

1907 SCAN_RETRY_ATTEMPTS = 3 

1908 

1909 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

1910 TMP_RENAME_PREFIX = "OLD_" 

1911 

1912 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

1913 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

1914 

1915 @staticmethod 

1916 def getInstance(uuid, xapiSession, createLock=True, force=False): 

1917 xapi = XAPI(xapiSession, uuid) 

1918 type = normalizeType(xapi.srRecord["type"]) 

1919 if type == SR.TYPE_FILE: 

1920 return FileSR(uuid, xapi, createLock, force) 

1921 elif type == SR.TYPE_LVHD: 

1922 return LVHDSR(uuid, xapi, createLock, force) 

1923 elif type == SR.TYPE_LINSTOR: 

1924 return LinstorSR(uuid, xapi, createLock, force) 

1925 raise util.SMException("SR type %s not recognized" % type) 

1926 

1927 def __init__(self, uuid, xapi, createLock, force): 

1928 self.logFilter = self.LogFilter(self) 

1929 self.uuid = uuid 

1930 self.path = "" 

1931 self.name = "" 

1932 self.vdis = {} 

1933 self.vdiTrees = [] 

1934 self.journaler = None 

1935 self.xapi = xapi 

1936 self._locked = 0 

1937 self._srLock = None 

1938 if createLock: 1938 ↛ 1939line 1938 didn't jump to line 1939, because the condition on line 1938 was never true

1939 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, self.uuid) 

1940 else: 

1941 Util.log("Requested no SR locking") 

1942 self.name = self.xapi.srRecord["name_label"] 

1943 self._failedCoalesceTargets = [] 

1944 

1945 if not self.xapi.isPluggedHere(): 

1946 if force: 1946 ↛ 1947line 1946 didn't jump to line 1947, because the condition on line 1946 was never true

1947 Util.log("SR %s not attached on this host, ignoring" % uuid) 

1948 else: 

1949 if not self.wait_for_plug(): 

1950 raise util.SMException("SR %s not attached on this host" % uuid) 

1951 

1952 if force: 1952 ↛ 1953line 1952 didn't jump to line 1953, because the condition on line 1952 was never true

1953 Util.log("Not checking if we are Master (SR %s)" % uuid) 

1954 elif not self.xapi.isMaster(): 1954 ↛ 1955line 1954 didn't jump to line 1955, because the condition on line 1954 was never true

1955 raise util.SMException("This host is NOT master, will not run") 

1956 

1957 def wait_for_plug(self): 

1958 for _ in range(1, 10): 

1959 time.sleep(2) 

1960 if self.xapi.isPluggedHere(): 

1961 return True 

1962 return False 

1963 

1964 def gcEnabled(self, refresh=True): 

1965 if refresh: 

1966 self.xapi.srRecord = \ 

1967 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

1968 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

1969 Util.log("GC is disabled for this SR, abort") 

1970 return False 

1971 return True 

1972 

1973 def scan(self, force=False) -> None: 

1974 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

1975 update VDI objects if they already exist""" 

1976 pass 

1977 

1978 def scanLocked(self, force=False): 

1979 self.lock() 

1980 try: 

1981 self.scan(force) 

1982 finally: 

1983 self.unlock() 

1984 

1985 def getVDI(self, uuid): 

1986 return self.vdis.get(uuid) 

1987 

1988 def hasWork(self): 

1989 if len(self.findGarbage()) > 0: 

1990 return True 

1991 if self.findCoalesceable(): 

1992 return True 

1993 if self.findLeafCoalesceable(): 

1994 return True 

1995 if self.needUpdateBlockInfo(): 

1996 return True 

1997 return False 

1998 

1999 def findCoalesceable(self): 

2000 """Find a coalesceable VDI. Return a vdi that should be coalesced 

2001 (choosing one among all coalesceable candidates according to some 

2002 criteria) or None if there is no VDI that could be coalesced""" 

2003 

2004 candidates = [] 

2005 

2006 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

2007 if srSwitch == "false": 

2008 Util.log("Coalesce disabled for this SR") 

2009 return candidates 

2010 

2011 # finish any VDI for which a relink journal entry exists first 

2012 journals = self.journaler.getAll(VDI.JRN_RELINK) 

2013 for uuid in journals: 

2014 vdi = self.getVDI(uuid) 

2015 if vdi and vdi not in self._failedCoalesceTargets: 

2016 return vdi 

2017 

2018 for vdi in self.vdis.values(): 

2019 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

2020 candidates.append(vdi) 

2021 Util.log("%s is coalescable" % vdi.uuid) 

2022 

2023 self.xapi.update_task_progress("coalescable", len(candidates)) 

2024 

2025 # pick one in the tallest tree 

2026 treeHeight = dict() 

2027 for c in candidates: 

2028 height = c.getTreeRoot().getTreeHeight() 

2029 if treeHeight.get(height): 

2030 treeHeight[height].append(c) 

2031 else: 

2032 treeHeight[height] = [c] 

2033 

2034 freeSpace = self.getFreeSpace() 

2035 heights = list(treeHeight.keys()) 

2036 heights.sort(reverse=True) 

2037 for h in heights: 

2038 for c in treeHeight[h]: 

2039 spaceNeeded = c._calcExtraSpaceForCoalescing() 

2040 if spaceNeeded <= freeSpace: 

2041 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

2042 return c 

2043 else: 

2044 Util.log("No space to coalesce %s (free space: %d)" % \ 

2045 (c, freeSpace)) 

2046 return None 

2047 

2048 def getSwitch(self, key): 

2049 return self.xapi.srRecord["other_config"].get(key) 

2050 

2051 def forbiddenBySwitch(self, switch, condition, fail_msg): 

2052 srSwitch = self.getSwitch(switch) 

2053 ret = False 

2054 if srSwitch: 

2055 ret = srSwitch == condition 

2056 

2057 if ret: 

2058 Util.log(fail_msg) 

2059 

2060 return ret 

2061 

2062 def leafCoalesceForbidden(self): 

2063 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

2064 "false", 

2065 "Coalesce disabled for this SR") or 

2066 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

2067 VDI.LEAFCLSC_DISABLED, 

2068 "Leaf-coalesce disabled for this SR")) 

2069 

2070 def findLeafCoalesceable(self): 

2071 """Find leaf-coalesceable VDIs in each VHD tree""" 

2072 

2073 candidates = [] 

2074 if self.leafCoalesceForbidden(): 

2075 return candidates 

2076 

2077 self.gatherLeafCoalesceable(candidates) 

2078 

2079 self.xapi.update_task_progress("coalescable", len(candidates)) 

2080 

2081 freeSpace = self.getFreeSpace() 

2082 for candidate in candidates: 

2083 # check the space constraints to see if leaf-coalesce is actually 

2084 # feasible for this candidate 

2085 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

2086 spaceNeededLive = spaceNeeded 

2087 if spaceNeeded > freeSpace: 

2088 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

2089 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

2090 spaceNeeded = spaceNeededLive 

2091 

2092 if spaceNeeded <= freeSpace: 

2093 Util.log("Leaf-coalesce candidate: %s" % candidate) 

2094 return candidate 

2095 else: 

2096 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

2097 (candidate, freeSpace)) 

2098 if spaceNeededLive <= freeSpace: 

2099 Util.log("...but enough space if skip snap-coalesce") 

2100 candidate.setConfig(VDI.DB_LEAFCLSC, 

2101 VDI.LEAFCLSC_OFFLINE) 

2102 

2103 return None 

2104 

2105 def gatherLeafCoalesceable(self, candidates): 

2106 for vdi in self.vdis.values(): 

2107 if not vdi.isLeafCoalesceable(): 

2108 continue 

2109 if vdi in self._failedCoalesceTargets: 

2110 continue 

2111 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

2112 Util.log("Skipping reset-on-boot %s" % vdi) 

2113 continue 

2114 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

2115 Util.log("Skipping allow_caching=true %s" % vdi) 

2116 continue 

2117 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

2118 Util.log("Leaf-coalesce disabled for %s" % vdi) 

2119 continue 

2120 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

2121 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

2122 continue 

2123 candidates.append(vdi) 

2124 

2125 def coalesce(self, vdi, dryRun=False): 

2126 """Coalesce vdi onto parent""" 

2127 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

2128 if dryRun: 2128 ↛ 2129line 2128 didn't jump to line 2129, because the condition on line 2128 was never true

2129 return 

2130 

2131 try: 

2132 self._coalesce(vdi) 

2133 except util.SMException as e: 

2134 if isinstance(e, AbortException): 2134 ↛ 2135line 2134 didn't jump to line 2135, because the condition on line 2134 was never true

2135 self.cleanup() 

2136 raise 

2137 else: 

2138 self._failedCoalesceTargets.append(vdi) 

2139 Util.logException("coalesce") 

2140 Util.log("Coalesce failed, skipping") 

2141 self.cleanup() 

2142 

2143 def coalesceLeaf(self, vdi, dryRun=False): 

2144 """Leaf-coalesce vdi onto parent""" 

2145 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

2146 if dryRun: 

2147 return 

2148 

2149 try: 

2150 uuid = vdi.uuid 

2151 try: 

2152 # "vdi" object will no longer be valid after this call 

2153 self._coalesceLeaf(vdi) 

2154 finally: 

2155 vdi = self.getVDI(uuid) 

2156 if vdi: 

2157 vdi.delConfig(vdi.DB_LEAFCLSC) 

2158 except AbortException: 

2159 self.cleanup() 

2160 raise 

2161 except (util.SMException, XenAPI.Failure) as e: 

2162 self._failedCoalesceTargets.append(vdi) 

2163 Util.logException("leaf-coalesce") 

2164 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

2165 self.cleanup() 

2166 

2167 def garbageCollect(self, dryRun=False): 

2168 vdiList = self.findGarbage() 

2169 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

2170 for vdi in vdiList: 

2171 Util.log(" %s" % vdi) 

2172 if not dryRun: 

2173 self.deleteVDIs(vdiList) 

2174 self.cleanupJournals(dryRun) 

2175 

2176 def findGarbage(self): 

2177 vdiList = [] 

2178 for vdi in self.vdiTrees: 

2179 vdiList.extend(vdi.getAllPrunable()) 

2180 return vdiList 

2181 

2182 def deleteVDIs(self, vdiList) -> None: 

2183 for vdi in vdiList: 

2184 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

2185 raise AbortException("Aborting due to signal") 

2186 Util.log("Deleting unlinked VDI %s" % vdi) 

2187 self.deleteVDI(vdi) 

2188 

2189 def deleteVDI(self, vdi) -> None: 

2190 assert(len(vdi.children) == 0) 

2191 del self.vdis[vdi.uuid] 

2192 if vdi.parent: 2192 ↛ 2194line 2192 didn't jump to line 2194, because the condition on line 2192 was never false

2193 vdi.parent.children.remove(vdi) 

2194 if vdi in self.vdiTrees: 2194 ↛ 2195line 2194 didn't jump to line 2195, because the condition on line 2194 was never true

2195 self.vdiTrees.remove(vdi) 

2196 vdi.delete() 

2197 

2198 def forgetVDI(self, vdiUuid) -> None: 

2199 self.xapi.forgetVDI(self.uuid, vdiUuid) 

2200 

2201 def pauseVDIs(self, vdiList) -> None: 

2202 paused = [] 

2203 failed = False 

2204 for vdi in vdiList: 

2205 try: 

2206 vdi.pause() 

2207 paused.append(vdi) 

2208 except: 

2209 Util.logException("pauseVDIs") 

2210 failed = True 

2211 break 

2212 

2213 if failed: 

2214 self.unpauseVDIs(paused) 

2215 raise util.SMException("Failed to pause VDIs") 

2216 

2217 def unpauseVDIs(self, vdiList): 

2218 failed = False 

2219 for vdi in vdiList: 

2220 try: 

2221 vdi.unpause() 

2222 except: 

2223 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

2224 failed = True 

2225 if failed: 

2226 raise util.SMException("Failed to unpause VDIs") 

2227 

2228 def getFreeSpace(self) -> int: 

2229 return 0 

2230 

2231 def cleanup(self): 

2232 Util.log("In cleanup") 

2233 return 

2234 

2235 @override 

2236 def __str__(self) -> str: 

2237 if self.name: 

2238 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

2239 else: 

2240 ret = "%s" % self.uuid 

2241 return ret 

2242 

2243 def lock(self): 

2244 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

2245 signal to avoid deadlocking (trying to acquire the SR lock while the 

2246 lock is held by a process that is trying to abort us)""" 

2247 if not self._srLock: 

2248 return 

2249 

2250 if self._locked == 0: 

2251 abortFlag = IPCFlag(self.uuid) 

2252 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

2253 if self._srLock.acquireNoblock(): 

2254 self._locked += 1 

2255 return 

2256 if abortFlag.test(FLAG_TYPE_ABORT): 

2257 raise AbortException("Abort requested") 

2258 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2259 raise util.SMException("Unable to acquire the SR lock") 

2260 

2261 self._locked += 1 

2262 

2263 def unlock(self): 

2264 if not self._srLock: 2264 ↛ 2266line 2264 didn't jump to line 2266, because the condition on line 2264 was never false

2265 return 

2266 assert(self._locked > 0) 

2267 self._locked -= 1 

2268 if self._locked == 0: 

2269 self._srLock.release() 

2270 

2271 def needUpdateBlockInfo(self) -> bool: 

2272 for vdi in self.vdis.values(): 

2273 if vdi.scanError or len(vdi.children) == 0: 

2274 continue 

2275 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2276 return True 

2277 return False 

2278 

2279 def updateBlockInfo(self) -> None: 

2280 for vdi in self.vdis.values(): 

2281 if vdi.scanError or len(vdi.children) == 0: 

2282 continue 

2283 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2284 vdi.updateBlockInfo() 

2285 

2286 def cleanupCoalesceJournals(self): 

2287 """Remove stale coalesce VDI indicators""" 

2288 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2289 for uuid, jval in entries.items(): 

2290 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2291 

2292 def cleanupJournals(self, dryRun=False): 

2293 """delete journal entries for non-existing VDIs""" 

2294 for t in [LVHDVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2295 entries = self.journaler.getAll(t) 

2296 for uuid, jval in entries.items(): 

2297 if self.getVDI(uuid): 

2298 continue 

2299 if t == SR.JRN_CLONE: 

2300 baseUuid, clonUuid = jval.split("_") 

2301 if self.getVDI(baseUuid): 

2302 continue 

2303 Util.log(" Deleting stale '%s' journal entry for %s " 

2304 "(%s)" % (t, uuid, jval)) 

2305 if not dryRun: 

2306 self.journaler.remove(t, uuid) 

2307 

2308 def cleanupCache(self, maxAge=-1) -> int: 

2309 return 0 

2310 

2311 def _coalesce(self, vdi): 

2312 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2312 ↛ 2315line 2312 didn't jump to line 2315, because the condition on line 2312 was never true

2313 # this means we had done the actual coalescing already and just 

2314 # need to finish relinking and/or refreshing the children 

2315 Util.log("==> Coalesce apparently already done: skipping") 

2316 else: 

2317 # JRN_COALESCE is used to check which VDI is being coalesced in 

2318 # order to decide whether to abort the coalesce. We remove the 

2319 # journal as soon as the VHD coalesce step is done, because we 

2320 # don't expect the rest of the process to take long 

2321 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2322 vdi._doCoalesce() 

2323 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2324 

2325 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2326 

2327 # we now need to relink the children: lock the SR to prevent ops 

2328 # like SM.clone from manipulating the VDIs we'll be relinking and 

2329 # rescan the SR first in case the children changed since the last 

2330 # scan 

2331 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2332 

2333 self.lock() 

2334 try: 

2335 vdi.parent._tagChildrenForRelink() 

2336 self.scan() 

2337 vdi._relinkSkip() 

2338 finally: 

2339 self.unlock() 

2340 # Reload the children to leave things consistent 

2341 vdi.parent._reloadChildren(vdi) 

2342 

2343 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2344 self.deleteVDI(vdi) 

2345 

2346 class CoalesceTracker: 

2347 GRACE_ITERATIONS = 1 

2348 MAX_ITERATIONS_NO_PROGRESS = 3 

2349 MAX_ITERATIONS = 10 

2350 MAX_INCREASE_FROM_MINIMUM = 1.2 

2351 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2352 " --> Final size {finSize}" 

2353 

2354 def __init__(self, sr): 

2355 self.itsNoProgress = 0 

2356 self.its = 0 

2357 self.minSize = float("inf") 

2358 self.history = [] 

2359 self.reason = "" 

2360 self.startSize = None 

2361 self.finishSize = None 

2362 self.sr = sr 

2363 

2364 def abortCoalesce(self, prevSize, curSize): 

2365 res = False 

2366 

2367 self.its += 1 

2368 self.history.append(self.HISTORY_STRING.format(its=self.its, 

2369 initSize=prevSize, 

2370 finSize=curSize)) 

2371 

2372 self.finishSize = curSize 

2373 

2374 if self.startSize is None: 

2375 self.startSize = prevSize 

2376 

2377 if curSize < self.minSize: 

2378 self.minSize = curSize 

2379 

2380 if prevSize < self.minSize: 

2381 self.minSize = prevSize 

2382 

2383 if prevSize < curSize: 

2384 self.itsNoProgress += 1 

2385 Util.log("No progress, attempt:" 

2386 " {attempt}".format(attempt=self.itsNoProgress)) 

2387 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2388 

2389 if (not res) and (self.its > self.MAX_ITERATIONS): 

2390 max = self.MAX_ITERATIONS 

2391 self.reason = \ 

2392 "Max iterations ({max}) exceeded".format(max=max) 

2393 res = True 

2394 

2395 if (not res) and (self.itsNoProgress > 

2396 self.MAX_ITERATIONS_NO_PROGRESS): 

2397 max = self.MAX_ITERATIONS_NO_PROGRESS 

2398 self.reason = \ 

2399 "No progress made for {max} iterations".format(max=max) 

2400 res = True 

2401 

2402 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2403 if (self.its > self.GRACE_ITERATIONS and 

2404 (not res) and (curSize > maxSizeFromMin)): 

2405 self.reason = "Unexpected bump in size," \ 

2406 " compared to minimum acheived" 

2407 res = True 

2408 

2409 return res 

2410 

2411 def printReasoning(self): 

2412 Util.log("Aborted coalesce") 

2413 for hist in self.history: 

2414 Util.log(hist) 

2415 Util.log(self.reason) 

2416 Util.log("Starting size was {size}" 

2417 .format(size=self.startSize)) 

2418 Util.log("Final size was {size}" 

2419 .format(size=self.finishSize)) 

2420 Util.log("Minimum size acheived was {size}" 

2421 .format(size=self.minSize)) 

2422 

2423 def _coalesceLeaf(self, vdi): 

2424 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2425 complete due to external changes, namely vdi_delete and vdi_snapshot 

2426 that alter leaf-coalescibility of vdi""" 

2427 tracker = self.CoalesceTracker(self) 

2428 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2429 prevSizeVHD = vdi.getSizeVHD() 

2430 if not self._snapshotCoalesce(vdi): 2430 ↛ 2431line 2430 didn't jump to line 2431, because the condition on line 2430 was never true

2431 return False 

2432 if tracker.abortCoalesce(prevSizeVHD, vdi.getSizeVHD()): 

2433 tracker.printReasoning() 

2434 raise util.SMException("VDI {uuid} could not be coalesced" 

2435 .format(uuid=vdi.uuid)) 

2436 return self._liveLeafCoalesce(vdi) 

2437 

2438 def calcStorageSpeed(self, startTime, endTime, vhdSize): 

2439 speed = None 

2440 total_time = endTime - startTime 

2441 if total_time > 0: 

2442 speed = float(vhdSize) / float(total_time) 

2443 return speed 

2444 

2445 def writeSpeedToFile(self, speed): 

2446 content = [] 

2447 speedFile = None 

2448 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2449 self.lock() 

2450 try: 

2451 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2452 lines = "" 

2453 if not os.path.isfile(path): 

2454 lines = str(speed) + "\n" 

2455 else: 

2456 speedFile = open(path, "r+") 

2457 content = speedFile.readlines() 

2458 content.append(str(speed) + "\n") 

2459 if len(content) > N_RUNNING_AVERAGE: 

2460 del content[0] 

2461 lines = "".join(content) 

2462 

2463 util.atomicFileWrite(path, VAR_RUN, lines) 

2464 finally: 

2465 if speedFile is not None: 

2466 speedFile.close() 

2467 Util.log("Closing file: {myfile}".format(myfile=path)) 

2468 self.unlock() 

2469 

2470 def recordStorageSpeed(self, startTime, endTime, vhdSize): 

2471 speed = self.calcStorageSpeed(startTime, endTime, vhdSize) 

2472 if speed is None: 

2473 return 

2474 

2475 self.writeSpeedToFile(speed) 

2476 

2477 def getStorageSpeed(self): 

2478 speedFile = None 

2479 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2480 self.lock() 

2481 try: 

2482 speed = None 

2483 if os.path.isfile(path): 

2484 speedFile = open(path) 

2485 content = speedFile.readlines() 

2486 try: 

2487 content = [float(i) for i in content] 

2488 except ValueError: 

2489 Util.log("Something bad in the speed log:{log}". 

2490 format(log=speedFile.readlines())) 

2491 return speed 

2492 

2493 if len(content): 

2494 speed = sum(content) / float(len(content)) 

2495 if speed <= 0: 2495 ↛ 2497line 2495 didn't jump to line 2497, because the condition on line 2495 was never true

2496 # Defensive, should be impossible. 

2497 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2498 format(speed=speed, uuid=self.uuid)) 

2499 speed = None 

2500 else: 

2501 Util.log("Speed file empty for SR: {uuid}". 

2502 format(uuid=self.uuid)) 

2503 else: 

2504 Util.log("Speed log missing for SR: {uuid}". 

2505 format(uuid=self.uuid)) 

2506 return speed 

2507 finally: 

2508 if not (speedFile is None): 

2509 speedFile.close() 

2510 self.unlock() 

2511 

2512 def _snapshotCoalesce(self, vdi): 

2513 # Note that because we are not holding any locks here, concurrent SM 

2514 # operations may change this tree under our feet. In particular, vdi 

2515 # can be deleted, or it can be snapshotted. 

2516 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2517 Util.log("Single-snapshotting %s" % vdi) 

2518 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2519 try: 

2520 ret = self.xapi.singleSnapshotVDI(vdi) 

2521 Util.log("Single-snapshot returned: %s" % ret) 

2522 except XenAPI.Failure as e: 

2523 if util.isInvalidVDI(e): 

2524 Util.log("The VDI appears to have been concurrently deleted") 

2525 return False 

2526 raise 

2527 self.scanLocked() 

2528 tempSnap = vdi.parent 

2529 if not tempSnap.isCoalesceable(): 

2530 Util.log("The VDI appears to have been concurrently snapshotted") 

2531 return False 

2532 Util.log("Coalescing parent %s" % tempSnap) 

2533 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2534 vhdSize = vdi.getSizeVHD() 

2535 self._coalesce(tempSnap) 

2536 if not vdi.isLeafCoalesceable(): 

2537 Util.log("The VDI tree appears to have been altered since") 

2538 return False 

2539 return True 

2540 

2541 def _liveLeafCoalesce(self, vdi) -> bool: 

2542 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2543 self.lock() 

2544 try: 

2545 self.scan() 

2546 if not self.getVDI(vdi.uuid): 

2547 Util.log("The VDI appears to have been deleted meanwhile") 

2548 return False 

2549 if not vdi.isLeafCoalesceable(): 

2550 Util.log("The VDI is no longer leaf-coalesceable") 

2551 return False 

2552 

2553 uuid = vdi.uuid 

2554 vdi.pause(failfast=True) 

2555 try: 

2556 try: 

2557 # "vdi" object will no longer be valid after this call 

2558 self._doCoalesceLeaf(vdi) 

2559 except: 

2560 Util.logException("_doCoalesceLeaf") 

2561 self._handleInterruptedCoalesceLeaf() 

2562 raise 

2563 finally: 

2564 vdi = self.getVDI(uuid) 

2565 if vdi: 

2566 vdi.ensureUnpaused() 

2567 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2568 if vdiOld: 

2569 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2570 self.deleteVDI(vdiOld) 

2571 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2572 finally: 

2573 self.cleanup() 

2574 self.unlock() 

2575 self.logFilter.logState() 

2576 return True 

2577 

2578 def _doCoalesceLeaf(self, vdi): 

2579 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2580 offline/atomic context""" 

2581 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2582 self._prepareCoalesceLeaf(vdi) 

2583 vdi.parent._setHidden(False) 

2584 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2585 vdi.validate(True) 

2586 vdi.parent.validate(True) 

2587 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2588 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2589 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 

2590 Util.log("Leaf-coalesce forced, will not use timeout") 

2591 timeout = 0 

2592 vdi._coalesceVHD(timeout) 

2593 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2594 vdi.parent.validate(True) 

2595 #vdi._verifyContents(timeout / 2) 

2596 

2597 # rename 

2598 vdiUuid = vdi.uuid 

2599 oldName = vdi.fileName 

2600 origParentUuid = vdi.parent.uuid 

2601 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2602 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2603 vdi.parent.rename(vdiUuid) 

2604 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2605 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2606 

2607 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2608 # garbage 

2609 

2610 # update the VDI record 

2611 vdi.parent.delConfig(VDI.DB_VHD_PARENT) 

2612 if vdi.parent.raw: 

2613 vdi.parent.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_RAW) 

2614 vdi.parent.delConfig(VDI.DB_VHD_BLOCKS) 

2615 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2616 

2617 self._updateNode(vdi) 

2618 

2619 # delete the obsolete leaf & inflate the parent (in that order, to 

2620 # minimize free space requirements) 

2621 parent = vdi.parent 

2622 vdi._setHidden(True) 

2623 vdi.parent.children = [] 

2624 vdi.parent = None 

2625 

2626 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2627 freeSpace = self.getFreeSpace() 

2628 if freeSpace < extraSpace: 

2629 # don't delete unless we need the space: deletion is time-consuming 

2630 # because it requires contacting the slaves, and we're paused here 

2631 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2632 self.deleteVDI(vdi) 

2633 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2634 

2635 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2636 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2637 

2638 self.forgetVDI(origParentUuid) 

2639 self._finishCoalesceLeaf(parent) 

2640 self._updateSlavesOnResize(parent) 

2641 

2642 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

2643 assert(not parent.raw) # raw parents not supported 

2644 extra = child.getSizeVHD() - parent.getSizeVHD() 

2645 if extra < 0: 

2646 extra = 0 

2647 return extra 

2648 

2649 def _prepareCoalesceLeaf(self, vdi) -> None: 

2650 pass 

2651 

2652 def _updateNode(self, vdi) -> None: 

2653 pass 

2654 

2655 def _finishCoalesceLeaf(self, parent) -> None: 

2656 pass 

2657 

2658 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

2659 pass 

2660 

2661 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None: 

2662 pass 

2663 

2664 def _updateSlavesOnResize(self, vdi) -> None: 

2665 pass 

2666 

2667 def _removeStaleVDIs(self, uuidsPresent) -> None: 

2668 for uuid in list(self.vdis.keys()): 

2669 if not uuid in uuidsPresent: 

2670 Util.log("VDI %s disappeared since last scan" % \ 

2671 self.vdis[uuid]) 

2672 del self.vdis[uuid] 

2673 

2674 def _handleInterruptedCoalesceLeaf(self) -> None: 

2675 """An interrupted leaf-coalesce operation may leave the VHD tree in an 

2676 inconsistent state. If the old-leaf VDI is still present, we revert the 

2677 operation (in case the original error is persistent); otherwise we must 

2678 finish the operation""" 

2679 pass 

2680 

2681 def _buildTree(self, force): 

2682 self.vdiTrees = [] 

2683 for vdi in self.vdis.values(): 

2684 if vdi.parentUuid: 

2685 parent = self.getVDI(vdi.parentUuid) 

2686 if not parent: 

2687 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2688 self.vdiTrees.append(vdi) 

2689 continue 

2690 if force: 

2691 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2692 (vdi.parentUuid, vdi.uuid)) 

2693 self.vdiTrees.append(vdi) 

2694 continue 

2695 else: 

2696 raise util.SMException("Parent VDI %s of %s not " \ 

2697 "found" % (vdi.parentUuid, vdi.uuid)) 

2698 vdi.parent = parent 

2699 parent.children.append(vdi) 

2700 else: 

2701 self.vdiTrees.append(vdi) 

2702 

2703 

2704class FileSR(SR): 

2705 TYPE = SR.TYPE_FILE 

2706 CACHE_FILE_EXT = ".vhdcache" 

2707 # cache cleanup actions 

2708 CACHE_ACTION_KEEP = 0 

2709 CACHE_ACTION_REMOVE = 1 

2710 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

2711 

2712 def __init__(self, uuid, xapi, createLock, force): 

2713 SR.__init__(self, uuid, xapi, createLock, force) 

2714 self.path = "/var/run/sr-mount/%s" % self.uuid 

2715 self.journaler = fjournaler.Journaler(self.path) 

2716 

2717 @override 

2718 def scan(self, force=False) -> None: 

2719 if not util.pathexists(self.path): 

2720 raise util.SMException("directory %s not found!" % self.uuid) 

2721 vhds = self._scan(force) 

2722 for uuid, vhdInfo in vhds.items(): 

2723 vdi = self.getVDI(uuid) 

2724 if not vdi: 

2725 self.logFilter.logNewVDI(uuid) 

2726 vdi = FileVDI(self, uuid, False) 

2727 self.vdis[uuid] = vdi 

2728 vdi.load(vhdInfo) 

2729 uuidsPresent = list(vhds.keys()) 

2730 rawList = [x for x in os.listdir(self.path) if x.endswith(vhdutil.FILE_EXTN_RAW)] 

2731 for rawName in rawList: 

2732 uuid = FileVDI.extractUuid(rawName) 

2733 uuidsPresent.append(uuid) 

2734 vdi = self.getVDI(uuid) 

2735 if not vdi: 

2736 self.logFilter.logNewVDI(uuid) 

2737 vdi = FileVDI(self, uuid, True) 

2738 self.vdis[uuid] = vdi 

2739 self._removeStaleVDIs(uuidsPresent) 

2740 self._buildTree(force) 

2741 self.logFilter.logState() 

2742 self._handleInterruptedCoalesceLeaf() 

2743 

2744 @override 

2745 def getFreeSpace(self) -> int: 

2746 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

2747 

2748 @override 

2749 def deleteVDIs(self, vdiList) -> None: 

2750 rootDeleted = False 

2751 for vdi in vdiList: 

2752 if not vdi.parent: 

2753 rootDeleted = True 

2754 break 

2755 SR.deleteVDIs(self, vdiList) 

2756 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

2757 self.xapi.markCacheSRsDirty() 

2758 

2759 @override 

2760 def cleanupCache(self, maxAge=-1) -> int: 

2761 """Clean up IntelliCache cache files. Caches for leaf nodes are 

2762 removed when the leaf node no longer exists or its allow-caching 

2763 attribute is not set. Caches for parent nodes are removed when the 

2764 parent node no longer exists or it hasn't been used in more than 

2765 <maxAge> hours. 

2766 Return number of caches removed. 

2767 """ 

2768 numRemoved = 0 

2769 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

2770 Util.log("Found %d cache files" % len(cacheFiles)) 

2771 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

2772 for cacheFile in cacheFiles: 

2773 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

2774 action = self.CACHE_ACTION_KEEP 

2775 rec = self.xapi.getRecordVDI(uuid) 

2776 if not rec: 

2777 Util.log("Cache %s: VDI doesn't exist" % uuid) 

2778 action = self.CACHE_ACTION_REMOVE 

2779 elif rec["managed"] and not rec["allow_caching"]: 

2780 Util.log("Cache %s: caching disabled" % uuid) 

2781 action = self.CACHE_ACTION_REMOVE 

2782 elif not rec["managed"] and maxAge >= 0: 

2783 lastAccess = datetime.datetime.fromtimestamp( \ 

2784 os.path.getatime(os.path.join(self.path, cacheFile))) 

2785 if lastAccess < cutoff: 

2786 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

2787 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

2788 

2789 if action == self.CACHE_ACTION_KEEP: 

2790 Util.log("Keeping cache %s" % uuid) 

2791 continue 

2792 

2793 lockId = uuid 

2794 parentUuid = None 

2795 if rec and rec["managed"]: 

2796 parentUuid = rec["sm_config"].get("vhd-parent") 

2797 if parentUuid: 

2798 lockId = parentUuid 

2799 

2800 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

2801 cacheLock.acquire() 

2802 try: 

2803 if self._cleanupCache(uuid, action): 

2804 numRemoved += 1 

2805 finally: 

2806 cacheLock.release() 

2807 return numRemoved 

2808 

2809 def _cleanupCache(self, uuid, action): 

2810 assert(action != self.CACHE_ACTION_KEEP) 

2811 rec = self.xapi.getRecordVDI(uuid) 

2812 if rec and rec["allow_caching"]: 

2813 Util.log("Cache %s appears to have become valid" % uuid) 

2814 return False 

2815 

2816 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

2817 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

2818 if tapdisk: 

2819 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

2820 Util.log("Cache %s still in use" % uuid) 

2821 return False 

2822 Util.log("Shutting down tapdisk for %s" % fullPath) 

2823 tapdisk.shutdown() 

2824 

2825 Util.log("Deleting file %s" % fullPath) 

2826 os.unlink(fullPath) 

2827 return True 

2828 

2829 def _isCacheFileName(self, name): 

2830 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

2831 name.endswith(self.CACHE_FILE_EXT) 

2832 

2833 def _scan(self, force): 

2834 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2835 error = False 

2836 pattern = os.path.join(self.path, "*%s" % vhdutil.FILE_EXTN_VHD) 

2837 vhds = vhdutil.getAllVHDs(pattern, FileVDI.extractUuid) 

2838 for uuid, vhdInfo in vhds.items(): 

2839 if vhdInfo.error: 

2840 error = True 

2841 break 

2842 if not error: 

2843 return vhds 

2844 Util.log("Scan error on attempt %d" % i) 

2845 if force: 

2846 return vhds 

2847 raise util.SMException("Scan error") 

2848 

2849 @override 

2850 def deleteVDI(self, vdi) -> None: 

2851 self._checkSlaves(vdi) 

2852 SR.deleteVDI(self, vdi) 

2853 

2854 def _checkSlaves(self, vdi): 

2855 onlineHosts = self.xapi.getOnlineHosts() 

2856 abortFlag = IPCFlag(self.uuid) 

2857 for pbdRecord in self.xapi.getAttachedPBDs(): 

2858 hostRef = pbdRecord["host"] 

2859 if hostRef == self.xapi._hostRef: 

2860 continue 

2861 if abortFlag.test(FLAG_TYPE_ABORT): 

2862 raise AbortException("Aborting due to signal") 

2863 try: 

2864 self._checkSlave(hostRef, vdi) 

2865 except util.CommandException: 

2866 if hostRef in onlineHosts: 

2867 raise 

2868 

2869 def _checkSlave(self, hostRef, vdi): 

2870 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

2871 Util.log("Checking with slave: %s" % repr(call)) 

2872 _host = self.xapi.session.xenapi.host 

2873 text = _host.call_plugin( * call) 

2874 

2875 @override 

2876 def _handleInterruptedCoalesceLeaf(self) -> None: 

2877 entries = self.journaler.getAll(VDI.JRN_LEAF) 

2878 for uuid, parentUuid in entries.items(): 

2879 fileList = os.listdir(self.path) 

2880 childName = uuid + vhdutil.FILE_EXTN_VHD 

2881 tmpChildName = self.TMP_RENAME_PREFIX + uuid + vhdutil.FILE_EXTN_VHD 

2882 parentName1 = parentUuid + vhdutil.FILE_EXTN_VHD 

2883 parentName2 = parentUuid + vhdutil.FILE_EXTN_RAW 

2884 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

2885 if parentPresent or tmpChildName in fileList: 

2886 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

2887 else: 

2888 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

2889 self.journaler.remove(VDI.JRN_LEAF, uuid) 

2890 vdi = self.getVDI(uuid) 

2891 if vdi: 

2892 vdi.ensureUnpaused() 

2893 

2894 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2895 Util.log("*** UNDO LEAF-COALESCE") 

2896 parent = self.getVDI(parentUuid) 

2897 if not parent: 

2898 parent = self.getVDI(childUuid) 

2899 if not parent: 

2900 raise util.SMException("Neither %s nor %s found" % \ 

2901 (parentUuid, childUuid)) 

2902 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

2903 parent.rename(parentUuid) 

2904 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

2905 

2906 child = self.getVDI(childUuid) 

2907 if not child: 

2908 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

2909 if not child: 

2910 raise util.SMException("Neither %s nor %s found" % \ 

2911 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

2912 Util.log("Renaming child back to %s" % childUuid) 

2913 child.rename(childUuid) 

2914 Util.log("Updating the VDI record") 

2915 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

2916 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

2917 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

2918 

2919 if child.hidden: 

2920 child._setHidden(False) 

2921 if not parent.hidden: 

2922 parent._setHidden(True) 

2923 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

2924 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

2925 Util.log("*** leaf-coalesce undo successful") 

2926 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

2927 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

2928 

2929 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2930 Util.log("*** FINISH LEAF-COALESCE") 

2931 vdi = self.getVDI(childUuid) 

2932 if not vdi: 

2933 raise util.SMException("VDI %s not found" % childUuid) 

2934 try: 

2935 self.forgetVDI(parentUuid) 

2936 except XenAPI.Failure: 

2937 pass 

2938 self._updateSlavesOnResize(vdi) 

2939 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

2940 Util.log("*** finished leaf-coalesce successfully") 

2941 

2942 

2943class LVHDSR(SR): 

2944 TYPE = SR.TYPE_LVHD 

2945 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

2946 

2947 def __init__(self, uuid, xapi, createLock, force): 

2948 SR.__init__(self, uuid, xapi, createLock, force) 

2949 self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid) 

2950 self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName) 

2951 

2952 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid) 

2953 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref) 

2954 lvm_conf = other_conf.get('lvm-conf') if other_conf else None 

2955 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf) 

2956 

2957 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

2958 self.journaler = journaler.Journaler(self.lvmCache) 

2959 

2960 @override 

2961 def deleteVDI(self, vdi) -> None: 

2962 if self.lvActivator.get(vdi.uuid, False): 

2963 self.lvActivator.deactivate(vdi.uuid, False) 

2964 self._checkSlaves(vdi) 

2965 SR.deleteVDI(self, vdi) 

2966 

2967 @override 

2968 def forgetVDI(self, vdiUuid) -> None: 

2969 SR.forgetVDI(self, vdiUuid) 

2970 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

2971 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

2972 

2973 @override 

2974 def getFreeSpace(self) -> int: 

2975 stats = lvutil._getVGstats(self.vgName) 

2976 return stats['physical_size'] - stats['physical_utilisation'] 

2977 

2978 @override 

2979 def cleanup(self): 

2980 if not self.lvActivator.deactivateAll(): 

2981 Util.log("ERROR deactivating LVs while cleaning up") 

2982 

2983 @override 

2984 def needUpdateBlockInfo(self) -> bool: 

2985 for vdi in self.vdis.values(): 

2986 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

2987 continue 

2988 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2989 return True 

2990 return False 

2991 

2992 @override 

2993 def updateBlockInfo(self) -> None: 

2994 numUpdated = 0 

2995 for vdi in self.vdis.values(): 

2996 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

2997 continue 

2998 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2999 vdi.updateBlockInfo() 

3000 numUpdated += 1 

3001 if numUpdated: 

3002 # deactivate the LVs back sooner rather than later. If we don't 

3003 # now, by the time this thread gets to deactivations, another one 

3004 # might have leaf-coalesced a node and deleted it, making the child 

3005 # inherit the refcount value and preventing the correct decrement 

3006 self.cleanup() 

3007 

3008 @override 

3009 def scan(self, force=False) -> None: 

3010 vdis = self._scan(force) 

3011 for uuid, vdiInfo in vdis.items(): 

3012 vdi = self.getVDI(uuid) 

3013 if not vdi: 

3014 self.logFilter.logNewVDI(uuid) 

3015 vdi = LVHDVDI(self, uuid, 

3016 vdiInfo.vdiType == vhdutil.VDI_TYPE_RAW) 

3017 self.vdis[uuid] = vdi 

3018 vdi.load(vdiInfo) 

3019 self._removeStaleVDIs(vdis.keys()) 

3020 self._buildTree(force) 

3021 self.logFilter.logState() 

3022 self._handleInterruptedCoalesceLeaf() 

3023 

3024 def _scan(self, force): 

3025 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3026 error = False 

3027 self.lvmCache.refresh() 

3028 vdis = lvhdutil.getVDIInfo(self.lvmCache) 

3029 for uuid, vdiInfo in vdis.items(): 

3030 if vdiInfo.scanError: 

3031 error = True 

3032 break 

3033 if not error: 

3034 return vdis 

3035 Util.log("Scan error, retrying (%d)" % i) 

3036 if force: 

3037 return vdis 

3038 raise util.SMException("Scan error") 

3039 

3040 @override 

3041 def _removeStaleVDIs(self, uuidsPresent) -> None: 

3042 for uuid in list(self.vdis.keys()): 

3043 if not uuid in uuidsPresent: 

3044 Util.log("VDI %s disappeared since last scan" % \ 

3045 self.vdis[uuid]) 

3046 del self.vdis[uuid] 

3047 if self.lvActivator.get(uuid, False): 

3048 self.lvActivator.remove(uuid, False) 

3049 

3050 @override 

3051 def _liveLeafCoalesce(self, vdi) -> bool: 

3052 """If the parent is raw and the child was resized (virt. size), then 

3053 we'll need to resize the parent, which can take a while due to zeroing 

3054 out of the extended portion of the LV. Do it before pausing the child 

3055 to avoid a protracted downtime""" 

3056 if vdi.parent.raw and vdi.sizeVirt > vdi.parent.sizeVirt: 

3057 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3058 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

3059 

3060 return SR._liveLeafCoalesce(self, vdi) 

3061 

3062 @override 

3063 def _prepareCoalesceLeaf(self, vdi) -> None: 

3064 vdi._activateChain() 

3065 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3066 vdi.deflate() 

3067 vdi.inflateParentForCoalesce() 

3068 

3069 @override 

3070 def _updateNode(self, vdi) -> None: 

3071 # fix the refcounts: the remaining node should inherit the binary 

3072 # refcount from the leaf (because if it was online, it should remain 

3073 # refcounted as such), but the normal refcount from the parent (because 

3074 # this node is really the parent node) - minus 1 if it is online (since 

3075 # non-leaf nodes increment their normal counts when they are online and 

3076 # we are now a leaf, storing that 1 in the binary refcount). 

3077 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

3078 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

3079 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

3080 pCnt = pCnt - cBcnt 

3081 assert(pCnt >= 0) 

3082 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

3083 

3084 @override 

3085 def _finishCoalesceLeaf(self, parent) -> None: 

3086 if not parent.isSnapshot() or parent.isAttachedRW(): 

3087 parent.inflateFully() 

3088 else: 

3089 parent.deflate() 

3090 

3091 @override 

3092 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3093 return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV 

3094 

3095 @override 

3096 def _handleInterruptedCoalesceLeaf(self) -> None: 

3097 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3098 for uuid, parentUuid in entries.items(): 

3099 childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid 

3100 tmpChildLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

3101 self.TMP_RENAME_PREFIX + uuid 

3102 parentLV1 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + parentUuid 

3103 parentLV2 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + parentUuid 

3104 parentPresent = (self.lvmCache.checkLV(parentLV1) or \ 

3105 self.lvmCache.checkLV(parentLV2)) 

3106 if parentPresent or self.lvmCache.checkLV(tmpChildLV): 

3107 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3108 else: 

3109 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3110 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3111 vdi = self.getVDI(uuid) 

3112 if vdi: 

3113 vdi.ensureUnpaused() 

3114 

3115 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3116 Util.log("*** UNDO LEAF-COALESCE") 

3117 parent = self.getVDI(parentUuid) 

3118 if not parent: 

3119 parent = self.getVDI(childUuid) 

3120 if not parent: 

3121 raise util.SMException("Neither %s nor %s found" % \ 

3122 (parentUuid, childUuid)) 

3123 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3124 parent.rename(parentUuid) 

3125 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3126 

3127 child = self.getVDI(childUuid) 

3128 if not child: 

3129 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3130 if not child: 

3131 raise util.SMException("Neither %s nor %s found" % \ 

3132 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3133 Util.log("Renaming child back to %s" % childUuid) 

3134 child.rename(childUuid) 

3135 Util.log("Updating the VDI record") 

3136 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3137 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3138 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3139 

3140 # refcount (best effort - assume that it had succeeded if the 

3141 # second rename succeeded; if not, this adjustment will be wrong, 

3142 # leading to a non-deactivation of the LV) 

3143 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

3144 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

3145 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

3146 pCnt = pCnt + cBcnt 

3147 RefCounter.set(parent.uuid, pCnt, 0, ns) 

3148 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

3149 

3150 parent.deflate() 

3151 child.inflateFully() 

3152 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

3153 if child.hidden: 

3154 child._setHidden(False) 

3155 if not parent.hidden: 

3156 parent._setHidden(True) 

3157 if not parent.lvReadonly: 

3158 self.lvmCache.setReadonly(parent.fileName, True) 

3159 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3160 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3161 Util.log("*** leaf-coalesce undo successful") 

3162 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3163 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3164 

3165 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3166 Util.log("*** FINISH LEAF-COALESCE") 

3167 vdi = self.getVDI(childUuid) 

3168 if not vdi: 

3169 raise util.SMException("VDI %s not found" % childUuid) 

3170 vdi.inflateFully() 

3171 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

3172 try: 

3173 self.forgetVDI(parentUuid) 

3174 except XenAPI.Failure: 

3175 pass 

3176 self._updateSlavesOnResize(vdi) 

3177 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3178 Util.log("*** finished leaf-coalesce successfully") 

3179 

3180 def _checkSlaves(self, vdi): 

3181 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

3182 try to check all slaves, including those that the Agent believes are 

3183 offline, but ignore failures for offline hosts. This is to avoid cases 

3184 where the Agent thinks a host is offline but the host is up.""" 

3185 args = {"vgName": self.vgName, 

3186 "action1": "deactivateNoRefcount", 

3187 "lvName1": vdi.fileName, 

3188 "action2": "cleanupLockAndRefcount", 

3189 "uuid2": vdi.uuid, 

3190 "ns2": lvhdutil.NS_PREFIX_LVM + self.uuid} 

3191 onlineHosts = self.xapi.getOnlineHosts() 

3192 abortFlag = IPCFlag(self.uuid) 

3193 for pbdRecord in self.xapi.getAttachedPBDs(): 

3194 hostRef = pbdRecord["host"] 

3195 if hostRef == self.xapi._hostRef: 

3196 continue 

3197 if abortFlag.test(FLAG_TYPE_ABORT): 

3198 raise AbortException("Aborting due to signal") 

3199 Util.log("Checking with slave %s (path %s)" % ( 

3200 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

3201 try: 

3202 self.xapi.ensureInactive(hostRef, args) 

3203 except XenAPI.Failure: 

3204 if hostRef in onlineHosts: 

3205 raise 

3206 

3207 @override 

3208 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

3209 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

3210 if not slaves: 

3211 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

3212 child) 

3213 return 

3214 

3215 tmpName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

3216 self.TMP_RENAME_PREFIX + child.uuid 

3217 args = {"vgName": self.vgName, 

3218 "action1": "deactivateNoRefcount", 

3219 "lvName1": tmpName, 

3220 "action2": "deactivateNoRefcount", 

3221 "lvName2": child.fileName, 

3222 "action3": "refresh", 

3223 "lvName3": child.fileName, 

3224 "action4": "refresh", 

3225 "lvName4": parent.fileName} 

3226 for slave in slaves: 

3227 Util.log("Updating %s, %s, %s on slave %s" % \ 

3228 (tmpName, child.fileName, parent.fileName, 

3229 self.xapi.getRecordHost(slave)['hostname'])) 

3230 text = self.xapi.session.xenapi.host.call_plugin( \ 

3231 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3232 Util.log("call-plugin returned: '%s'" % text) 

3233 

3234 @override 

3235 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None: 

3236 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

3237 if not slaves: 

3238 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

3239 return 

3240 

3241 args = {"vgName": self.vgName, 

3242 "action1": "deactivateNoRefcount", 

3243 "lvName1": oldNameLV, 

3244 "action2": "refresh", 

3245 "lvName2": vdi.fileName, 

3246 "action3": "cleanupLockAndRefcount", 

3247 "uuid3": origParentUuid, 

3248 "ns3": lvhdutil.NS_PREFIX_LVM + self.uuid} 

3249 for slave in slaves: 

3250 Util.log("Updating %s to %s on slave %s" % \ 

3251 (oldNameLV, vdi.fileName, 

3252 self.xapi.getRecordHost(slave)['hostname'])) 

3253 text = self.xapi.session.xenapi.host.call_plugin( \ 

3254 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3255 Util.log("call-plugin returned: '%s'" % text) 

3256 

3257 @override 

3258 def _updateSlavesOnResize(self, vdi) -> None: 

3259 uuids = [x.uuid for x in vdi.getAllLeaves()] 

3260 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

3261 if not slaves: 

3262 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

3263 return 

3264 lvhdutil.lvRefreshOnSlaves(self.xapi.session, self.uuid, self.vgName, 

3265 vdi.fileName, vdi.uuid, slaves) 

3266 

3267 

3268class LinstorSR(SR): 

3269 TYPE = SR.TYPE_LINSTOR 

3270 

3271 def __init__(self, uuid, xapi, createLock, force): 

3272 if not LINSTOR_AVAILABLE: 

3273 raise util.SMException( 

3274 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

3275 ) 

3276 

3277 SR.__init__(self, uuid, xapi, createLock, force) 

3278 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

3279 self._reloadLinstor(journaler_only=True) 

3280 

3281 @override 

3282 def deleteVDI(self, vdi) -> None: 

3283 self._checkSlaves(vdi) 

3284 SR.deleteVDI(self, vdi) 

3285 

3286 @override 

3287 def getFreeSpace(self) -> int: 

3288 return self._linstor.max_volume_size_allowed 

3289 

3290 @override 

3291 def scan(self, force=False) -> None: 

3292 all_vdi_info = self._scan(force) 

3293 for uuid, vdiInfo in all_vdi_info.items(): 

3294 # When vdiInfo is None, the VDI is RAW. 

3295 vdi = self.getVDI(uuid) 

3296 if not vdi: 

3297 self.logFilter.logNewVDI(uuid) 

3298 vdi = LinstorVDI(self, uuid, not vdiInfo) 

3299 self.vdis[uuid] = vdi 

3300 if vdiInfo: 

3301 vdi.load(vdiInfo) 

3302 self._removeStaleVDIs(all_vdi_info.keys()) 

3303 self._buildTree(force) 

3304 self.logFilter.logState() 

3305 self._handleInterruptedCoalesceLeaf() 

3306 

3307 @override 

3308 def pauseVDIs(self, vdiList) -> None: 

3309 self._linstor.ensure_volume_list_is_not_locked( 

3310 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3311 ) 

3312 return super(LinstorSR, self).pauseVDIs(vdiList) 

3313 

3314 def _reloadLinstor(self, journaler_only=False): 

3315 session = self.xapi.session 

3316 host_ref = util.get_this_host_ref(session) 

3317 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3318 

3319 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3320 if pbd is None: 

3321 raise util.SMException('Failed to find PBD') 

3322 

3323 dconf = session.xenapi.PBD.get_device_config(pbd) 

3324 group_name = dconf['group-name'] 

3325 

3326 controller_uri = get_controller_uri() 

3327 self.journaler = LinstorJournaler( 

3328 controller_uri, group_name, logger=util.SMlog 

3329 ) 

3330 

3331 if journaler_only: 

3332 return 

3333 

3334 self._linstor = LinstorVolumeManager( 

3335 controller_uri, 

3336 group_name, 

3337 repair=True, 

3338 logger=util.SMlog 

3339 ) 

3340 self._vhdutil = LinstorVhdUtil(session, self._linstor) 

3341 

3342 def _scan(self, force): 

3343 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3344 self._reloadLinstor() 

3345 error = False 

3346 try: 

3347 all_vdi_info = self._load_vdi_info() 

3348 for uuid, vdiInfo in all_vdi_info.items(): 

3349 if vdiInfo and vdiInfo.error: 

3350 error = True 

3351 break 

3352 if not error: 

3353 return all_vdi_info 

3354 Util.log('Scan error, retrying ({})'.format(i)) 

3355 except Exception as e: 

3356 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3357 Util.log(traceback.format_exc()) 

3358 

3359 if force: 

3360 return all_vdi_info 

3361 raise util.SMException('Scan error') 

3362 

3363 def _load_vdi_info(self): 

3364 all_vdi_info = {} 

3365 

3366 # TODO: Ensure metadata contains the right info. 

3367 

3368 all_volume_info = self._linstor.get_volumes_with_info() 

3369 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3370 for vdi_uuid, volume_info in all_volume_info.items(): 

3371 try: 

3372 volume_metadata = volumes_metadata[vdi_uuid] 

3373 if not volume_info.name and not list(volume_metadata.items()): 

3374 continue # Ignore it, probably deleted. 

3375 

3376 if vdi_uuid.startswith('DELETED_'): 

3377 # Assume it's really a RAW volume of a failed snap without VHD header/footer. 

3378 # We must remove this VDI now without adding it in the VDI list. 

3379 # Otherwise `Relinking` calls and other actions can be launched on it. 

3380 # We don't want that... 

3381 Util.log('Deleting bad VDI {}'.format(vdi_uuid)) 

3382 

3383 self.lock() 

3384 try: 

3385 self._linstor.destroy_volume(vdi_uuid) 

3386 try: 

3387 self.forgetVDI(vdi_uuid) 

3388 except: 

3389 pass 

3390 except Exception as e: 

3391 Util.log('Cannot delete bad VDI: {}'.format(e)) 

3392 finally: 

3393 self.unlock() 

3394 continue 

3395 

3396 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

3397 volume_name = self._linstor.get_volume_name(vdi_uuid) 

3398 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX): 

3399 # Always RAW! 

3400 info = None 

3401 elif vdi_type == vhdutil.VDI_TYPE_VHD: 

3402 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3403 else: 

3404 # Ensure it's not a VHD... 

3405 try: 

3406 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3407 except: 

3408 try: 

3409 self._vhdutil.force_repair( 

3410 self._linstor.get_device_path(vdi_uuid) 

3411 ) 

3412 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3413 except: 

3414 info = None 

3415 

3416 except Exception as e: 

3417 Util.log( 

3418 ' [VDI {}: failed to load VDI info]: {}' 

3419 .format(vdi_uuid, e) 

3420 ) 

3421 info = vhdutil.VHDInfo(vdi_uuid) 

3422 info.error = 1 

3423 

3424 all_vdi_info[vdi_uuid] = info 

3425 

3426 return all_vdi_info 

3427 

3428 @override 

3429 def _prepareCoalesceLeaf(self, vdi) -> None: 

3430 vdi._activateChain() 

3431 vdi.deflate() 

3432 vdi._inflateParentForCoalesce() 

3433 

3434 @override 

3435 def _finishCoalesceLeaf(self, parent) -> None: 

3436 if not parent.isSnapshot() or parent.isAttachedRW(): 

3437 parent.inflateFully() 

3438 else: 

3439 parent.deflate() 

3440 

3441 @override 

3442 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3443 return LinstorVhdUtil.compute_volume_size(parent.sizeVirt, parent.vdi_type) - parent.getDrbdSize() 

3444 

3445 def _hasValidDevicePath(self, uuid): 

3446 try: 

3447 self._linstor.get_device_path(uuid) 

3448 except Exception: 

3449 # TODO: Maybe log exception. 

3450 return False 

3451 return True 

3452 

3453 @override 

3454 def _liveLeafCoalesce(self, vdi) -> bool: 

3455 self.lock() 

3456 try: 

3457 self._linstor.ensure_volume_is_not_locked( 

3458 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3459 ) 

3460 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3461 finally: 

3462 self.unlock() 

3463 

3464 @override 

3465 def _handleInterruptedCoalesceLeaf(self) -> None: 

3466 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3467 for uuid, parentUuid in entries.items(): 

3468 if self._hasValidDevicePath(parentUuid) or \ 

3469 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3470 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3471 else: 

3472 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3473 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3474 vdi = self.getVDI(uuid) 

3475 if vdi: 

3476 vdi.ensureUnpaused() 

3477 

3478 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3479 Util.log('*** UNDO LEAF-COALESCE') 

3480 parent = self.getVDI(parentUuid) 

3481 if not parent: 

3482 parent = self.getVDI(childUuid) 

3483 if not parent: 

3484 raise util.SMException( 

3485 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3486 ) 

3487 Util.log( 

3488 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3489 ) 

3490 parent.rename(parentUuid) 

3491 

3492 child = self.getVDI(childUuid) 

3493 if not child: 

3494 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3495 if not child: 

3496 raise util.SMException( 

3497 'Neither {} nor {} found'.format( 

3498 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3499 ) 

3500 ) 

3501 Util.log('Renaming child back to {}'.format(childUuid)) 

3502 child.rename(childUuid) 

3503 Util.log('Updating the VDI record') 

3504 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3505 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3506 

3507 # TODO: Maybe deflate here. 

3508 

3509 if child.hidden: 

3510 child._setHidden(False) 

3511 if not parent.hidden: 

3512 parent._setHidden(True) 

3513 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3514 Util.log('*** leaf-coalesce undo successful') 

3515 

3516 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3517 Util.log('*** FINISH LEAF-COALESCE') 

3518 vdi = self.getVDI(childUuid) 

3519 if not vdi: 

3520 raise util.SMException('VDI {} not found'.format(childUuid)) 

3521 # TODO: Maybe inflate. 

3522 try: 

3523 self.forgetVDI(parentUuid) 

3524 except XenAPI.Failure: 

3525 pass 

3526 self._updateSlavesOnResize(vdi) 

3527 Util.log('*** finished leaf-coalesce successfully') 

3528 

3529 def _checkSlaves(self, vdi): 

3530 try: 

3531 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3532 for openers in all_openers.values(): 

3533 for opener in openers.values(): 

3534 if opener['process-name'] != 'tapdisk': 

3535 raise util.SMException( 

3536 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3537 ) 

3538 except LinstorVolumeManagerError as e: 

3539 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3540 raise 

3541 

3542 

3543################################################################################ 

3544# 

3545# Helpers 

3546# 

3547def daemonize(): 

3548 pid = os.fork() 

3549 if pid: 

3550 os.waitpid(pid, 0) 

3551 Util.log("New PID [%d]" % pid) 

3552 return False 

3553 os.chdir("/") 

3554 os.setsid() 

3555 pid = os.fork() 

3556 if pid: 

3557 Util.log("Will finish as PID [%d]" % pid) 

3558 os._exit(0) 

3559 for fd in [0, 1, 2]: 

3560 try: 

3561 os.close(fd) 

3562 except OSError: 

3563 pass 

3564 # we need to fill those special fd numbers or pread won't work 

3565 sys.stdin = open("/dev/null", 'r') 

3566 sys.stderr = open("/dev/null", 'w') 

3567 sys.stdout = open("/dev/null", 'w') 

3568 # As we're a new process we need to clear the lock objects 

3569 lock.Lock.clearAll() 

3570 return True 

3571 

3572 

3573def normalizeType(type): 

3574 if type in LVHDSR.SUBTYPES: 

3575 type = SR.TYPE_LVHD 

3576 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3577 # temporary while LVHD is symlinked as LVM 

3578 type = SR.TYPE_LVHD 

3579 if type in [ 

3580 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3581 "moosefs", "xfs", "zfs", "largeblock" 

3582 ]: 

3583 type = SR.TYPE_FILE 

3584 if type in ["linstor"]: 

3585 type = SR.TYPE_LINSTOR 

3586 if type not in SR.TYPES: 

3587 raise util.SMException("Unsupported SR type: %s" % type) 

3588 return type 

3589 

3590GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3591 

3592 

3593def _gc_init_file(sr_uuid): 

3594 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3595 

3596 

3597def _create_init_file(sr_uuid): 

3598 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3599 with open(os.path.join( 

3600 NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init'), 'w+') as f: 

3601 f.write('1') 

3602 

3603 

3604def _gcLoopPause(sr, dryRun=False, immediate=False): 

3605 if immediate: 

3606 return 

3607 

3608 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3609 # point will just return. Otherwise, fall back on an abortable sleep. 

3610 

3611 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3612 

3613 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3613 ↛ exitline 3613 didn't jump to the function exit

3614 lambda *args: None) 

3615 elif os.path.exists(_gc_init_file(sr.uuid)): 

3616 def abortTest(): 

3617 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3618 

3619 # If time.sleep hangs we are in deep trouble, however for 

3620 # completeness we set the timeout of the abort thread to 

3621 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3622 Util.log("GC active, about to go quiet") 

3623 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3623 ↛ exitline 3623 didn't run the lambda on line 3623

3624 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3625 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3626 Util.log("GC active, quiet period ended") 

3627 

3628 

3629def _gcLoop(sr, dryRun=False, immediate=False): 

3630 if not lockGCActive.acquireNoblock(): 3630 ↛ 3631line 3630 didn't jump to line 3631, because the condition on line 3630 was never true

3631 Util.log("Another GC instance already active, exiting") 

3632 return 

3633 

3634 # Check we're still attached after acquiring locks 

3635 if not sr.xapi.isPluggedHere(): 

3636 Util.log("SR no longer attached, exiting") 

3637 return 

3638 

3639 # Clean up Intellicache files 

3640 sr.cleanupCache() 

3641 

3642 # Track how many we do 

3643 coalesced = 0 

3644 task_status = "success" 

3645 try: 

3646 # Check if any work needs to be done 

3647 if not sr.xapi.isPluggedHere(): 3647 ↛ 3648line 3647 didn't jump to line 3648, because the condition on line 3647 was never true

3648 Util.log("SR no longer attached, exiting") 

3649 return 

3650 sr.scanLocked() 

3651 if not sr.hasWork(): 

3652 Util.log("No work, exiting") 

3653 return 

3654 sr.xapi.create_task( 

3655 "Garbage Collection", 

3656 "Garbage collection for SR %s" % sr.uuid) 

3657 _gcLoopPause(sr, dryRun, immediate=immediate) 

3658 while True: 

3659 if not sr.xapi.isPluggedHere(): 3659 ↛ 3660line 3659 didn't jump to line 3660, because the condition on line 3659 was never true

3660 Util.log("SR no longer attached, exiting") 

3661 break 

3662 sr.scanLocked() 

3663 if not sr.hasWork(): 

3664 Util.log("No work, exiting") 

3665 break 

3666 

3667 if not lockGCRunning.acquireNoblock(): 3667 ↛ 3668line 3667 didn't jump to line 3668, because the condition on line 3667 was never true

3668 Util.log("Unable to acquire GC running lock.") 

3669 return 

3670 try: 

3671 if not sr.gcEnabled(): 3671 ↛ 3672line 3671 didn't jump to line 3672, because the condition on line 3671 was never true

3672 break 

3673 

3674 sr.xapi.update_task_progress("done", coalesced) 

3675 

3676 sr.cleanupCoalesceJournals() 

3677 # Create the init file here in case startup is waiting on it 

3678 _create_init_file(sr.uuid) 

3679 sr.scanLocked() 

3680 sr.updateBlockInfo() 

3681 

3682 howmany = len(sr.findGarbage()) 

3683 if howmany > 0: 

3684 Util.log("Found %d orphaned vdis" % howmany) 

3685 sr.lock() 

3686 try: 

3687 sr.garbageCollect(dryRun) 

3688 finally: 

3689 sr.unlock() 

3690 sr.xapi.srUpdate() 

3691 

3692 candidate = sr.findCoalesceable() 

3693 if candidate: 

3694 util.fistpoint.activate( 

3695 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

3696 sr.coalesce(candidate, dryRun) 

3697 sr.xapi.srUpdate() 

3698 coalesced += 1 

3699 continue 

3700 

3701 candidate = sr.findLeafCoalesceable() 

3702 if candidate: 3702 ↛ 3709line 3702 didn't jump to line 3709, because the condition on line 3702 was never false

3703 sr.coalesceLeaf(candidate, dryRun) 

3704 sr.xapi.srUpdate() 

3705 coalesced += 1 

3706 continue 

3707 

3708 finally: 

3709 lockGCRunning.release() 3709 ↛ 3714line 3709 didn't jump to line 3714, because the break on line 3672 wasn't executed

3710 except: 

3711 task_status = "failure" 

3712 raise 

3713 finally: 

3714 sr.xapi.set_task_status(task_status) 

3715 Util.log("GC process exiting, no work left") 

3716 _create_init_file(sr.uuid) 

3717 lockGCActive.release() 

3718 

3719 

3720def _xapi_enabled(session, hostref): 

3721 host = session.xenapi.host.get_record(hostref) 

3722 return host['enabled'] 

3723 

3724 

3725def _ensure_xapi_initialised(session): 

3726 """ 

3727 Don't want to start GC until Xapi is fully initialised 

3728 """ 

3729 local_session = None 

3730 if session is None: 

3731 local_session = util.get_localAPI_session() 

3732 session = local_session 

3733 

3734 try: 

3735 hostref = session.xenapi.host.get_by_uuid(util.get_this_host()) 

3736 while not _xapi_enabled(session, hostref): 

3737 util.SMlog("Xapi not ready, GC waiting") 

3738 time.sleep(15) 

3739 finally: 

3740 if local_session is not None: 

3741 local_session.xenapi.session.logout() 

3742 

3743def _gc(session, srUuid, dryRun=False, immediate=False): 

3744 init(srUuid) 

3745 _ensure_xapi_initialised(session) 

3746 sr = SR.getInstance(srUuid, session) 

3747 if not sr.gcEnabled(False): 3747 ↛ 3748line 3747 didn't jump to line 3748, because the condition on line 3747 was never true

3748 return 

3749 

3750 try: 

3751 _gcLoop(sr, dryRun, immediate=immediate) 

3752 finally: 

3753 sr.cleanup() 

3754 sr.logFilter.logState() 

3755 del sr.xapi 

3756 

3757 

3758def _abort(srUuid, soft=False): 

3759 """Aborts an GC/coalesce. 

3760 

3761 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

3762 soft: If set to True and there is a pending abort signal, the function 

3763 doesn't do anything. If set to False, a new abort signal is issued. 

3764 

3765 returns: If soft is set to False, we return True holding lockGCActive. If 

3766 soft is set to False and an abort signal is pending, we return False 

3767 without holding lockGCActive. An exception is raised in case of error.""" 

3768 Util.log("=== SR %s: abort ===" % (srUuid)) 

3769 init(srUuid) 

3770 if not lockGCActive.acquireNoblock(): 

3771 gotLock = False 

3772 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

3773 abortFlag = IPCFlag(srUuid) 

3774 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

3775 return False 

3776 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

3777 gotLock = lockGCActive.acquireNoblock() 

3778 if gotLock: 

3779 break 

3780 time.sleep(SR.LOCK_RETRY_INTERVAL) 

3781 abortFlag.clear(FLAG_TYPE_ABORT) 

3782 if not gotLock: 

3783 raise util.CommandException(code=errno.ETIMEDOUT, 

3784 reason="SR %s: error aborting existing process" % srUuid) 

3785 return True 

3786 

3787 

3788def init(srUuid): 

3789 global lockGCRunning 

3790 if not lockGCRunning: 3790 ↛ 3791line 3790 didn't jump to line 3791, because the condition on line 3790 was never true

3791 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid) 

3792 global lockGCActive 

3793 if not lockGCActive: 3793 ↛ 3794line 3793 didn't jump to line 3794, because the condition on line 3793 was never true

3794 lockGCActive = LockActive(srUuid) 

3795 

3796 

3797class LockActive: 

3798 """ 

3799 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired 

3800 if another process holds the SR lock. 

3801 """ 

3802 def __init__(self, srUuid): 

3803 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

3804 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, srUuid) 

3805 

3806 def acquireNoblock(self): 

3807 self._srLock.acquire() 

3808 

3809 try: 

3810 return self._lock.acquireNoblock() 

3811 finally: 

3812 self._srLock.release() 

3813 

3814 def release(self): 

3815 self._lock.release() 

3816 

3817 

3818def usage(): 

3819 output = """Garbage collect and/or coalesce VHDs in a VHD-based SR 

3820 

3821Parameters: 

3822 -u --uuid UUID SR UUID 

3823 and one of: 

3824 -g --gc garbage collect, coalesce, and repeat while there is work 

3825 -G --gc_force garbage collect once, aborting any current operations 

3826 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

3827 max_age hours 

3828 -a --abort abort any currently running operation (GC or coalesce) 

3829 -q --query query the current state (GC'ing, coalescing or not running) 

3830 -x --disable disable GC/coalesce (will be in effect until you exit) 

3831 -t --debug see Debug below 

3832 

3833Options: 

3834 -b --background run in background (return immediately) (valid for -g only) 

3835 -f --force continue in the presence of VHDs with errors (when doing 

3836 GC, this might cause removal of any such VHDs) (only valid 

3837 for -G) (DANGEROUS) 

3838 

3839Debug: 

3840 The --debug parameter enables manipulation of LVHD VDIs for debugging 

3841 purposes. ** NEVER USE IT ON A LIVE VM ** 

3842 The following parameters are required: 

3843 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

3844 "deflate". 

3845 -v --vdi_uuid VDI UUID 

3846 """ 

3847 #-d --dry-run don't actually perform any SR-modifying operations 

3848 print(output) 

3849 Util.log("(Invalid usage)") 

3850 sys.exit(1) 

3851 

3852 

3853############################################################################## 

3854# 

3855# API 

3856# 

3857def abort(srUuid, soft=False): 

3858 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

3859 """ 

3860 if _abort(srUuid, soft): 

3861 Util.log("abort: releasing the process lock") 

3862 lockGCActive.release() 

3863 return True 

3864 else: 

3865 return False 

3866 

3867 

3868def gc(session, srUuid, inBackground, dryRun=False): 

3869 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

3870 immediately if inBackground=True. 

3871 

3872 The following algorithm is used: 

3873 1. If we are already GC'ing in this SR, return 

3874 2. If we are already coalescing a VDI pair: 

3875 a. Scan the SR and determine if the VDI pair is GC'able 

3876 b. If the pair is not GC'able, return 

3877 c. If the pair is GC'able, abort coalesce 

3878 3. Scan the SR 

3879 4. If there is nothing to collect, nor to coalesce, return 

3880 5. If there is something to collect, GC all, then goto 3 

3881 6. If there is something to coalesce, coalesce one pair, then goto 3 

3882 """ 

3883 Util.log("=== SR %s: gc ===" % srUuid) 

3884 if inBackground: 

3885 if daemonize(): 3885 ↛ exitline 3885 didn't return from function 'gc', because the condition on line 3885 was never false

3886 # we are now running in the background. Catch & log any errors 

3887 # because there is no other way to propagate them back at this 

3888 # point 

3889 

3890 try: 

3891 _gc(None, srUuid, dryRun) 

3892 except AbortException: 

3893 Util.log("Aborted") 

3894 except Exception: 

3895 Util.logException("gc") 

3896 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

3897 os._exit(0) 

3898 else: 

3899 _gc(session, srUuid, dryRun, immediate=True) 

3900 

3901 

3902def start_gc(session, sr_uuid): 

3903 """ 

3904 This function is used to try to start a backgrounded GC session by forking 

3905 the current process. If using the systemd version, call start_gc_service() instead. 

3906 """ 

3907 # don't bother if an instance already running (this is just an 

3908 # optimization to reduce the overhead of forking a new process if we 

3909 # don't have to, but the process will check the lock anyways) 

3910 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid) 

3911 if not lockRunning.acquireNoblock(): 

3912 if should_preempt(session, sr_uuid): 

3913 util.SMlog("Aborting currently-running coalesce of garbage VDI") 

3914 try: 

3915 if not abort(sr_uuid, soft=True): 

3916 util.SMlog("The GC has already been scheduled to re-start") 

3917 except util.CommandException as e: 

3918 if e.code != errno.ETIMEDOUT: 

3919 raise 

3920 util.SMlog('failed to abort the GC') 

3921 else: 

3922 util.SMlog("A GC instance already running, not kicking") 

3923 return 

3924 else: 

3925 lockRunning.release() 

3926 

3927 util.SMlog(f"Starting GC file is {__file__}") 

3928 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'], 

3929 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

3930 

3931def start_gc_service(sr_uuid, wait=False): 

3932 """ 

3933 This starts the templated systemd service which runs GC on the given SR UUID. 

3934 If the service was already started, this is a no-op. 

3935 

3936 Because the service is a one-shot with RemainAfterExit=no, when called with 

3937 wait=True this will run the service synchronously and will not return until the 

3938 run has finished. This is used to force a run of the GC instead of just kicking it 

3939 in the background. 

3940 """ 

3941 sr_uuid_esc = sr_uuid.replace("-", "\\x2d") 

3942 util.SMlog(f"Kicking SMGC@{sr_uuid}...") 

3943 cmd=[ "/usr/bin/systemctl", "--quiet" ] 

3944 if not wait: 3944 ↛ 3946line 3944 didn't jump to line 3946, because the condition on line 3944 was never false

3945 cmd.append("--no-block") 

3946 cmd += ["start", f"SMGC@{sr_uuid_esc}"] 

3947 subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

3948 

3949 

3950def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

3951 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

3952 the SR lock is held. 

3953 The following algorithm is used: 

3954 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

3955 2. Scan the SR 

3956 3. GC 

3957 4. return 

3958 """ 

3959 Util.log("=== SR %s: gc_force ===" % srUuid) 

3960 init(srUuid) 

3961 sr = SR.getInstance(srUuid, session, lockSR, True) 

3962 if not lockGCActive.acquireNoblock(): 

3963 abort(srUuid) 

3964 else: 

3965 Util.log("Nothing was running, clear to proceed") 

3966 

3967 if force: 

3968 Util.log("FORCED: will continue even if there are VHD errors") 

3969 sr.scanLocked(force) 

3970 sr.cleanupCoalesceJournals() 

3971 

3972 try: 

3973 sr.cleanupCache() 

3974 sr.garbageCollect(dryRun) 

3975 finally: 

3976 sr.cleanup() 

3977 sr.logFilter.logState() 

3978 lockGCActive.release() 

3979 

3980 

3981def get_state(srUuid): 

3982 """Return whether GC/coalesce is currently running or not. This asks systemd for 

3983 the state of the templated SMGC service and will return True if it is "activating" 

3984 or "running" (for completeness, as in practice it will never achieve the latter state) 

3985 """ 

3986 sr_uuid_esc = srUuid.replace("-", "\\x2d") 

3987 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"] 

3988 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

3989 state = result.stdout.decode('utf-8').rstrip() 

3990 if state == "activating" or state == "running": 

3991 return True 

3992 return False 

3993 

3994 

3995def should_preempt(session, srUuid): 

3996 sr = SR.getInstance(srUuid, session) 

3997 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

3998 if len(entries) == 0: 

3999 return False 

4000 elif len(entries) > 1: 

4001 raise util.SMException("More than one coalesce entry: " + str(entries)) 

4002 sr.scanLocked() 

4003 coalescedUuid = entries.popitem()[0] 

4004 garbage = sr.findGarbage() 

4005 for vdi in garbage: 

4006 if vdi.uuid == coalescedUuid: 

4007 return True 

4008 return False 

4009 

4010 

4011def get_coalesceable_leaves(session, srUuid, vdiUuids): 

4012 coalesceable = [] 

4013 sr = SR.getInstance(srUuid, session) 

4014 sr.scanLocked() 

4015 for uuid in vdiUuids: 

4016 vdi = sr.getVDI(uuid) 

4017 if not vdi: 

4018 raise util.SMException("VDI %s not found" % uuid) 

4019 if vdi.isLeafCoalesceable(): 

4020 coalesceable.append(uuid) 

4021 return coalesceable 

4022 

4023 

4024def cache_cleanup(session, srUuid, maxAge): 

4025 sr = SR.getInstance(srUuid, session) 

4026 return sr.cleanupCache(maxAge) 

4027 

4028 

4029def debug(sr_uuid, cmd, vdi_uuid): 

4030 Util.log("Debug command: %s" % cmd) 

4031 sr = SR.getInstance(sr_uuid, None) 

4032 if not isinstance(sr, LVHDSR): 

4033 print("Error: not an LVHD SR") 

4034 return 

4035 sr.scanLocked() 

4036 vdi = sr.getVDI(vdi_uuid) 

4037 if not vdi: 

4038 print("Error: VDI %s not found") 

4039 return 

4040 print("Running %s on SR %s" % (cmd, sr)) 

4041 print("VDI before: %s" % vdi) 

4042 if cmd == "activate": 

4043 vdi._activate() 

4044 print("VDI file: %s" % vdi.path) 

4045 if cmd == "deactivate": 

4046 ns = lvhdutil.NS_PREFIX_LVM + sr.uuid 

4047 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

4048 if cmd == "inflate": 

4049 vdi.inflateFully() 

4050 sr.cleanup() 

4051 if cmd == "deflate": 

4052 vdi.deflate() 

4053 sr.cleanup() 

4054 sr.scanLocked() 

4055 print("VDI after: %s" % vdi) 

4056 

4057 

4058def abort_optional_reenable(uuid): 

4059 print("Disabling GC/coalesce for %s" % uuid) 

4060 ret = _abort(uuid) 

4061 input("Press enter to re-enable...") 

4062 print("GC/coalesce re-enabled") 

4063 lockGCRunning.release() 

4064 if ret: 

4065 lockGCActive.release() 

4066 

4067 

4068############################################################################## 

4069# 

4070# CLI 

4071# 

4072def main(): 

4073 action = "" 

4074 uuid = "" 

4075 background = False 

4076 force = False 

4077 dryRun = False 

4078 debug_cmd = "" 

4079 vdi_uuid = "" 

4080 shortArgs = "gGc:aqxu:bfdt:v:" 

4081 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

4082 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

4083 

4084 try: 

4085 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

4086 except getopt.GetoptError: 

4087 usage() 

4088 for o, a in opts: 

4089 if o in ("-g", "--gc"): 

4090 action = "gc" 

4091 if o in ("-G", "--gc_force"): 

4092 action = "gc_force" 

4093 if o in ("-c", "--clean_cache"): 

4094 action = "clean_cache" 

4095 maxAge = int(a) 

4096 if o in ("-a", "--abort"): 

4097 action = "abort" 

4098 if o in ("-q", "--query"): 

4099 action = "query" 

4100 if o in ("-x", "--disable"): 

4101 action = "disable" 

4102 if o in ("-u", "--uuid"): 

4103 uuid = a 

4104 if o in ("-b", "--background"): 

4105 background = True 

4106 if o in ("-f", "--force"): 

4107 force = True 

4108 if o in ("-d", "--dry-run"): 

4109 Util.log("Dry run mode") 

4110 dryRun = True 

4111 if o in ("-t", "--debug"): 

4112 action = "debug" 

4113 debug_cmd = a 

4114 if o in ("-v", "--vdi_uuid"): 

4115 vdi_uuid = a 

4116 

4117 if not action or not uuid: 

4118 usage() 

4119 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

4120 action != "debug" and (debug_cmd or vdi_uuid): 

4121 usage() 

4122 

4123 if action != "query" and action != "debug": 

4124 print("All output goes to log") 

4125 

4126 if action == "gc": 

4127 gc(None, uuid, background, dryRun) 

4128 elif action == "gc_force": 

4129 gc_force(None, uuid, force, dryRun, True) 

4130 elif action == "clean_cache": 

4131 cache_cleanup(None, uuid, maxAge) 

4132 elif action == "abort": 

4133 abort(uuid) 

4134 elif action == "query": 

4135 print("Currently running: %s" % get_state(uuid)) 

4136 elif action == "disable": 

4137 abort_optional_reenable(uuid) 

4138 elif action == "debug": 

4139 debug(uuid, debug_cmd, vdi_uuid) 

4140 

4141 

4142if __name__ == '__main__': 4142 ↛ 4143line 4142 didn't jump to line 4143, because the condition on line 4142 was never true

4143 main()