Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect VHD-based SR's in the background 

19# 

20 

21from sm_typing import Optional, override 

22 

23import os 

24import os.path 

25import sys 

26import time 

27import signal 

28import subprocess 

29import getopt 

30import datetime 

31import traceback 

32import base64 

33import zlib 

34import errno 

35import stat 

36 

37import XenAPI # pylint: disable=import-error 

38import util 

39import lvutil 

40import vhdutil 

41import lvhdutil 

42import lvmcache 

43import journaler 

44import fjournaler 

45import lock 

46import blktap2 

47import xs_errors 

48from refcounter import RefCounter 

49from ipc import IPCFlag 

50from lvmanager import LVActivator 

51from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

52from functools import reduce 

53from time import monotonic as _time 

54 

55try: 

56 from linstorjournaler import LinstorJournaler 

57 from linstorvhdutil import LinstorVhdUtil 

58 from linstorvolumemanager import get_controller_uri 

59 from linstorvolumemanager import LinstorVolumeManager 

60 from linstorvolumemanager import LinstorVolumeManagerError 

61 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX 

62 

63 LINSTOR_AVAILABLE = True 

64except ImportError: 

65 LINSTOR_AVAILABLE = False 

66 

67# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

68# possible due to lvhd_stop_using_() not working correctly. However, we leave 

69# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

70# record for use by the offline tool (which makes the operation safe by pausing 

71# the VM first) 

72AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

73 

74FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

75 

76# process "lock", used simply as an indicator that a process already exists 

77# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

78# check for the fact by trying the lock). 

79lockGCRunning = None 

80 

81# process "lock" to indicate that the GC process has been activated but may not 

82# yet be running, stops a second process from being started. 

83LOCK_TYPE_GC_ACTIVE = "gc_active" 

84lockGCActive = None 

85 

86# Default coalesce error rate limit, in messages per minute. A zero value 

87# disables throttling, and a negative value disables error reporting. 

88DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

89 

90COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

91COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

92VAR_RUN = "/var/run/" 

93SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

94 

95N_RUNNING_AVERAGE = 10 

96 

97NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

98 

99# Signal Handler 

100SIGTERM = False 

101 

102 

103class AbortException(util.SMException): 

104 pass 

105 

106 

107def receiveSignal(signalNumber, frame): 

108 global SIGTERM 

109 

110 util.SMlog("GC: recieved SIGTERM") 

111 SIGTERM = True 

112 return 

113 

114 

115################################################################################ 

116# 

117# Util 

118# 

119class Util: 

120 RET_RC = 1 

121 RET_STDOUT = 2 

122 RET_STDERR = 4 

123 

124 UUID_LEN = 36 

125 

126 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

127 

128 @staticmethod 

129 def log(text) -> None: 

130 util.SMlog(text, ident="SMGC") 

131 

132 @staticmethod 

133 def logException(tag): 

134 info = sys.exc_info() 

135 if info[0] == SystemExit: 135 ↛ 137line 135 didn't jump to line 137, because the condition on line 135 was never true

136 # this should not be happening when catching "Exception", but it is 

137 sys.exit(0) 

138 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

139 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

140 Util.log(" ***********************") 

141 Util.log(" * E X C E P T I O N *") 

142 Util.log(" ***********************") 

143 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

144 Util.log(tb) 

145 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

146 

147 @staticmethod 

148 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

149 "Execute a subprocess, then return its return code, stdout, stderr" 

150 proc = subprocess.Popen(args, 

151 stdin=subprocess.PIPE, \ 

152 stdout=subprocess.PIPE, \ 

153 stderr=subprocess.PIPE, \ 

154 shell=True, \ 

155 close_fds=True) 

156 (stdout, stderr) = proc.communicate(inputtext) 

157 stdout = str(stdout) 

158 stderr = str(stderr) 

159 rc = proc.returncode 

160 if log: 

161 Util.log("`%s`: %s" % (args, rc)) 

162 if type(expectedRC) != type([]): 

163 expectedRC = [expectedRC] 

164 if not rc in expectedRC: 

165 reason = stderr.strip() 

166 if stdout.strip(): 

167 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

168 Util.log("Failed: %s" % reason) 

169 raise util.CommandException(rc, args, reason) 

170 

171 if ret == Util.RET_RC: 

172 return rc 

173 if ret == Util.RET_STDERR: 

174 return stderr 

175 return stdout 

176 

177 @staticmethod 

178 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): 

179 """execute func in a separate thread and kill it if abortTest signals 

180 so""" 

181 abortSignaled = abortTest() # check now before we clear resultFlag 

182 resultFlag = IPCFlag(ns) 

183 resultFlag.clearAll() 

184 pid = os.fork() 

185 if pid: 

186 startTime = _time() 

187 try: 

188 while True: 

189 if resultFlag.test("success"): 

190 Util.log(" Child process completed successfully") 

191 resultFlag.clear("success") 

192 return 

193 if resultFlag.test("failure"): 

194 resultFlag.clear("failure") 

195 raise util.SMException("Child process exited with error") 

196 if abortTest() or abortSignaled or SIGTERM: 

197 os.killpg(pid, signal.SIGKILL) 

198 raise AbortException("Aborting due to signal") 

199 if timeOut and _time() - startTime > timeOut: 

200 os.killpg(pid, signal.SIGKILL) 

201 resultFlag.clearAll() 

202 raise util.SMException("Timed out") 

203 time.sleep(pollInterval) 

204 finally: 

205 wait_pid = 0 

206 rc = -1 

207 count = 0 

208 while wait_pid == 0 and count < 10: 

209 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

210 if wait_pid == 0: 

211 time.sleep(2) 

212 count += 1 

213 

214 if wait_pid == 0: 

215 Util.log("runAbortable: wait for process completion timed out") 

216 else: 

217 os.setpgrp() 

218 try: 

219 if func() == ret: 

220 resultFlag.set("success") 

221 else: 

222 resultFlag.set("failure") 

223 except Exception as e: 

224 Util.log("Child process failed with : (%s)" % e) 

225 resultFlag.set("failure") 

226 Util.logException("This exception has occured") 

227 os._exit(0) 

228 

229 @staticmethod 

230 def num2str(number): 

231 for prefix in ("G", "M", "K"): 231 ↛ 234line 231 didn't jump to line 234, because the loop on line 231 didn't complete

232 if number >= Util.PREFIX[prefix]: 

233 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

234 return "%s" % number 

235 

236 @staticmethod 

237 def numBits(val): 

238 count = 0 

239 while val: 

240 count += val & 1 

241 val = val >> 1 

242 return count 

243 

244 @staticmethod 

245 def countBits(bitmap1, bitmap2): 

246 """return bit count in the bitmap produced by ORing the two bitmaps""" 

247 len1 = len(bitmap1) 

248 len2 = len(bitmap2) 

249 lenLong = len1 

250 lenShort = len2 

251 bitmapLong = bitmap1 

252 if len2 > len1: 

253 lenLong = len2 

254 lenShort = len1 

255 bitmapLong = bitmap2 

256 

257 count = 0 

258 for i in range(lenShort): 

259 val = bitmap1[i] | bitmap2[i] 

260 count += Util.numBits(val) 

261 

262 for i in range(i + 1, lenLong): 

263 val = bitmapLong[i] 

264 count += Util.numBits(val) 

265 return count 

266 

267 @staticmethod 

268 def getThisScript(): 

269 thisScript = util.get_real_path(__file__) 

270 if thisScript.endswith(".pyc"): 

271 thisScript = thisScript[:-1] 

272 return thisScript 

273 

274 

275################################################################################ 

276# 

277# XAPI 

278# 

279class XAPI: 

280 USER = "root" 

281 PLUGIN_ON_SLAVE = "on-slave" 

282 

283 CONFIG_SM = 0 

284 CONFIG_OTHER = 1 

285 CONFIG_ON_BOOT = 2 

286 CONFIG_ALLOW_CACHING = 3 

287 

288 CONFIG_NAME = { 

289 CONFIG_SM: "sm-config", 

290 CONFIG_OTHER: "other-config", 

291 CONFIG_ON_BOOT: "on-boot", 

292 CONFIG_ALLOW_CACHING: "allow_caching" 

293 } 

294 

295 class LookupError(util.SMException): 

296 pass 

297 

298 @staticmethod 

299 def getSession(): 

300 session = XenAPI.xapi_local() 

301 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

302 return session 

303 

304 def __init__(self, session, srUuid): 

305 self.sessionPrivate = False 

306 self.session = session 

307 if self.session is None: 

308 self.session = self.getSession() 

309 self.sessionPrivate = True 

310 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

311 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

312 self.hostUuid = util.get_this_host() 

313 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

314 self.task = None 

315 self.task_progress = {"coalescable": 0, "done": 0} 

316 

317 def __del__(self): 

318 if self.sessionPrivate: 

319 self.session.xenapi.session.logout() 

320 

321 @property 

322 def srRef(self): 

323 return self._srRef 

324 

325 def isPluggedHere(self): 

326 pbds = self.getAttachedPBDs() 

327 for pbdRec in pbds: 

328 if pbdRec["host"] == self._hostRef: 

329 return True 

330 return False 

331 

332 def poolOK(self): 

333 host_recs = self.session.xenapi.host.get_all_records() 

334 for host_ref, host_rec in host_recs.items(): 

335 if not host_rec["enabled"]: 

336 Util.log("Host %s not enabled" % host_rec["uuid"]) 

337 return False 

338 return True 

339 

340 def isMaster(self): 

341 if self.srRecord["shared"]: 

342 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

343 return pool["master"] == self._hostRef 

344 else: 

345 pbds = self.getAttachedPBDs() 

346 if len(pbds) < 1: 

347 raise util.SMException("Local SR not attached") 

348 elif len(pbds) > 1: 

349 raise util.SMException("Local SR multiply attached") 

350 return pbds[0]["host"] == self._hostRef 

351 

352 def getAttachedPBDs(self): 

353 """Return PBD records for all PBDs of this SR that are currently 

354 attached""" 

355 attachedPBDs = [] 

356 pbds = self.session.xenapi.PBD.get_all_records() 

357 for pbdRec in pbds.values(): 

358 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

359 attachedPBDs.append(pbdRec) 

360 return attachedPBDs 

361 

362 def getOnlineHosts(self): 

363 return util.get_online_hosts(self.session) 

364 

365 def ensureInactive(self, hostRef, args): 

366 text = self.session.xenapi.host.call_plugin( \ 

367 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

368 Util.log("call-plugin returned: '%s'" % text) 

369 

370 def getRecordHost(self, hostRef): 

371 return self.session.xenapi.host.get_record(hostRef) 

372 

373 def _getRefVDI(self, uuid): 

374 return self.session.xenapi.VDI.get_by_uuid(uuid) 

375 

376 def getRefVDI(self, vdi): 

377 return self._getRefVDI(vdi.uuid) 

378 

379 def getRecordVDI(self, uuid): 

380 try: 

381 ref = self._getRefVDI(uuid) 

382 return self.session.xenapi.VDI.get_record(ref) 

383 except XenAPI.Failure: 

384 return None 

385 

386 def singleSnapshotVDI(self, vdi): 

387 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

388 {"type": "internal"}) 

389 

390 def forgetVDI(self, srUuid, vdiUuid): 

391 """Forget the VDI, but handle the case where the VDI has already been 

392 forgotten (i.e. ignore errors)""" 

393 try: 

394 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

395 self.session.xenapi.VDI.forget(vdiRef) 

396 except XenAPI.Failure: 

397 pass 

398 

399 def getConfigVDI(self, vdi, key): 

400 kind = vdi.CONFIG_TYPE[key] 

401 if kind == self.CONFIG_SM: 

402 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

403 elif kind == self.CONFIG_OTHER: 

404 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

405 elif kind == self.CONFIG_ON_BOOT: 

406 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

407 elif kind == self.CONFIG_ALLOW_CACHING: 

408 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

409 else: 

410 assert(False) 

411 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

412 return cfg 

413 

414 def removeFromConfigVDI(self, vdi, key): 

415 kind = vdi.CONFIG_TYPE[key] 

416 if kind == self.CONFIG_SM: 

417 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

418 elif kind == self.CONFIG_OTHER: 

419 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

420 else: 

421 assert(False) 

422 

423 def addToConfigVDI(self, vdi, key, val): 

424 kind = vdi.CONFIG_TYPE[key] 

425 if kind == self.CONFIG_SM: 

426 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

427 elif kind == self.CONFIG_OTHER: 

428 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

429 else: 

430 assert(False) 

431 

432 def isSnapshot(self, vdi): 

433 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

434 

435 def markCacheSRsDirty(self): 

436 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

437 'field "local_cache_enabled" = "true"') 

438 for sr_ref in sr_refs: 

439 Util.log("Marking SR %s dirty" % sr_ref) 

440 util.set_dirty(self.session, sr_ref) 

441 

442 def srUpdate(self): 

443 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

444 abortFlag = IPCFlag(self.srRecord["uuid"]) 

445 task = self.session.xenapi.Async.SR.update(self._srRef) 

446 cancelTask = True 

447 try: 

448 for i in range(60): 

449 status = self.session.xenapi.task.get_status(task) 

450 if not status == "pending": 

451 Util.log("SR.update_asynch status changed to [%s]" % status) 

452 cancelTask = False 

453 return 

454 if abortFlag.test(FLAG_TYPE_ABORT): 

455 Util.log("Abort signalled during srUpdate, cancelling task...") 

456 try: 

457 self.session.xenapi.task.cancel(task) 

458 cancelTask = False 

459 Util.log("Task cancelled") 

460 except: 

461 pass 

462 return 

463 time.sleep(1) 

464 finally: 

465 if cancelTask: 

466 self.session.xenapi.task.cancel(task) 

467 self.session.xenapi.task.destroy(task) 

468 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

469 

470 def update_task(self): 

471 self.session.xenapi.task.set_other_config( 

472 self.task, 

473 { 

474 "applies_to": self._srRef 

475 }) 

476 total = self.task_progress['coalescable'] + self.task_progress['done'] 

477 if (total > 0): 

478 self.session.xenapi.task.set_progress( 

479 self.task, float(self.task_progress['done']) / total) 

480 

481 def create_task(self, label, description): 

482 self.task = self.session.xenapi.task.create(label, description) 

483 self.update_task() 

484 

485 def update_task_progress(self, key, value): 

486 self.task_progress[key] = value 

487 if self.task: 

488 self.update_task() 

489 

490 def set_task_status(self, status): 

491 if self.task: 

492 self.session.xenapi.task.set_status(self.task, status) 

493 

494 

495################################################################################ 

496# 

497# VDI 

498# 

499class VDI(object): 

500 """Object representing a VDI of a VHD-based SR""" 

501 

502 POLL_INTERVAL = 1 

503 POLL_TIMEOUT = 30 

504 DEVICE_MAJOR = 202 

505 DRIVER_NAME_VHD = "vhd" 

506 

507 # config keys & values 

508 DB_VHD_PARENT = "vhd-parent" 

509 DB_VDI_TYPE = "vdi_type" 

510 DB_VHD_BLOCKS = "vhd-blocks" 

511 DB_VDI_PAUSED = "paused" 

512 DB_VDI_RELINKING = "relinking" 

513 DB_VDI_ACTIVATING = "activating" 

514 DB_GC = "gc" 

515 DB_COALESCE = "coalesce" 

516 DB_LEAFCLSC = "leaf-coalesce" # config key 

517 DB_GC_NO_SPACE = "gc_no_space" 

518 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

519 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

520 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

521 # no space to snap-coalesce or unable to keep 

522 # up with VDI. This is not used by the SM, it 

523 # might be used by external components. 

524 DB_ONBOOT = "on-boot" 

525 ONBOOT_RESET = "reset" 

526 DB_ALLOW_CACHING = "allow_caching" 

527 

528 CONFIG_TYPE = { 

529 DB_VHD_PARENT: XAPI.CONFIG_SM, 

530 DB_VDI_TYPE: XAPI.CONFIG_SM, 

531 DB_VHD_BLOCKS: XAPI.CONFIG_SM, 

532 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

533 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

534 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

535 DB_GC: XAPI.CONFIG_OTHER, 

536 DB_COALESCE: XAPI.CONFIG_OTHER, 

537 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

538 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

539 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

540 DB_GC_NO_SPACE: XAPI.CONFIG_SM 

541 } 

542 

543 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

544 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

545 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

546 # feasibility of leaf coalesce 

547 

548 JRN_RELINK = "relink" # journal entry type for relinking children 

549 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

550 JRN_LEAF = "leaf" # used in coalesce-leaf 

551 

552 STR_TREE_INDENT = 4 

553 

554 def __init__(self, sr, uuid, raw): 

555 self.sr = sr 

556 self.scanError = True 

557 self.uuid = uuid 

558 self.raw = raw 

559 self.fileName = "" 

560 self.parentUuid = "" 

561 self.sizeVirt = -1 

562 self._sizeVHD = -1 

563 self._sizeAllocated = -1 

564 self._hidden = False 

565 self.parent = None 

566 self.children = [] 

567 self._vdiRef = None 

568 self._clearRef() 

569 

570 @staticmethod 

571 def extractUuid(path): 

572 raise NotImplementedError("Implement in sub class") 

573 

574 def load(self, info=None) -> None: 

575 """Load VDI info""" 

576 pass 

577 

578 def getDriverName(self) -> str: 

579 return self.DRIVER_NAME_VHD 

580 

581 def getRef(self): 

582 if self._vdiRef is None: 

583 self._vdiRef = self.sr.xapi.getRefVDI(self) 

584 return self._vdiRef 

585 

586 def getConfig(self, key, default=None): 

587 config = self.sr.xapi.getConfigVDI(self, key) 

588 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 588 ↛ 589line 588 didn't jump to line 589, because the condition on line 588 was never true

589 val = config 

590 else: 

591 val = config.get(key) 

592 if val: 

593 return val 

594 return default 

595 

596 def setConfig(self, key, val): 

597 self.sr.xapi.removeFromConfigVDI(self, key) 

598 self.sr.xapi.addToConfigVDI(self, key, val) 

599 Util.log("Set %s = %s for %s" % (key, val, self)) 

600 

601 def delConfig(self, key): 

602 self.sr.xapi.removeFromConfigVDI(self, key) 

603 Util.log("Removed %s from %s" % (key, self)) 

604 

605 def ensureUnpaused(self): 

606 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

607 Util.log("Unpausing VDI %s" % self) 

608 self.unpause() 

609 

610 def pause(self, failfast=False) -> None: 

611 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

612 self.uuid, failfast): 

613 raise util.SMException("Failed to pause VDI %s" % self) 

614 

615 def _report_tapdisk_unpause_error(self): 

616 try: 

617 xapi = self.sr.xapi.session.xenapi 

618 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

619 msg_name = "failed to unpause tapdisk" 

620 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

621 "VMs using this tapdisk have lost access " \ 

622 "to the corresponding disk(s)" % self.uuid 

623 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

624 except Exception as e: 

625 util.SMlog("failed to generate message: %s" % e) 

626 

627 def unpause(self): 

628 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

629 self.uuid): 

630 self._report_tapdisk_unpause_error() 

631 raise util.SMException("Failed to unpause VDI %s" % self) 

632 

633 def refresh(self, ignoreNonexistent=True): 

634 """Pause-unpause in one step""" 

635 self.sr.lock() 

636 try: 

637 try: 

638 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 638 ↛ 640line 638 didn't jump to line 640, because the condition on line 638 was never true

639 self.sr.uuid, self.uuid): 

640 self._report_tapdisk_unpause_error() 

641 raise util.SMException("Failed to refresh %s" % self) 

642 except XenAPI.Failure as e: 

643 if util.isInvalidVDI(e) and ignoreNonexistent: 

644 Util.log("VDI %s not found, ignoring" % self) 

645 return 

646 raise 

647 finally: 

648 self.sr.unlock() 

649 

650 def isSnapshot(self): 

651 return self.sr.xapi.isSnapshot(self) 

652 

653 def isAttachedRW(self): 

654 return util.is_attached_rw( 

655 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

656 

657 def getVHDBlocks(self): 

658 val = self.updateBlockInfo() 

659 bitmap = zlib.decompress(base64.b64decode(val)) 

660 return bitmap 

661 

662 def isCoalesceable(self): 

663 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

664 return not self.scanError and \ 

665 self.parent and \ 

666 len(self.parent.children) == 1 and \ 

667 self.isHidden() and \ 

668 len(self.children) > 0 

669 

670 def isLeafCoalesceable(self): 

671 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

672 return not self.scanError and \ 

673 self.parent and \ 

674 len(self.parent.children) == 1 and \ 

675 not self.isHidden() and \ 

676 len(self.children) == 0 

677 

678 def canLiveCoalesce(self, speed): 

679 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

680 isLeafCoalesceable() already""" 

681 feasibleSize = False 

682 allowedDownTime = \ 

683 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

684 vhd_size = self.getAllocatedSize() 

685 if speed: 

686 feasibleSize = \ 

687 vhd_size // speed < allowedDownTime 

688 else: 

689 feasibleSize = \ 

690 vhd_size < self.LIVE_LEAF_COALESCE_MAX_SIZE 

691 

692 return (feasibleSize or 

693 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

694 

695 def getAllPrunable(self): 

696 if len(self.children) == 0: # base case 

697 # it is possible to have a hidden leaf that was recently coalesced 

698 # onto its parent, its children already relinked but not yet 

699 # reloaded - in which case it may not be garbage collected yet: 

700 # some tapdisks could still be using the file. 

701 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

702 return [] 

703 if not self.scanError and self.isHidden(): 

704 return [self] 

705 return [] 

706 

707 thisPrunable = True 

708 vdiList = [] 

709 for child in self.children: 

710 childList = child.getAllPrunable() 

711 vdiList.extend(childList) 

712 if child not in childList: 

713 thisPrunable = False 

714 

715 # We can destroy the current VDI if all childs are hidden BUT the 

716 # current VDI must be hidden too to do that! 

717 # Example in this case (after a failed live leaf coalesce): 

718 # 

719 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

720 # SMGC: [32436] b5458d61(1.000G/4.127M) 

721 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

722 # 

723 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

724 # Normally we are not in this function when the delete action is 

725 # executed but in `_liveLeafCoalesce`. 

726 

727 if not self.scanError and not self.isHidden() and thisPrunable: 

728 vdiList.append(self) 

729 return vdiList 

730 

731 def getSizeVHD(self) -> int: 

732 return self._sizeVHD 

733 

734 def getAllocatedSize(self) -> int: 

735 return self._sizeAllocated 

736 

737 def getTreeRoot(self): 

738 "Get the root of the tree that self belongs to" 

739 root = self 

740 while root.parent: 

741 root = root.parent 

742 return root 

743 

744 def getTreeHeight(self): 

745 "Get the height of the subtree rooted at self" 

746 if len(self.children) == 0: 

747 return 1 

748 

749 maxChildHeight = 0 

750 for child in self.children: 

751 childHeight = child.getTreeHeight() 

752 if childHeight > maxChildHeight: 

753 maxChildHeight = childHeight 

754 

755 return maxChildHeight + 1 

756 

757 def getAllLeaves(self): 

758 "Get all leaf nodes in the subtree rooted at self" 

759 if len(self.children) == 0: 

760 return [self] 

761 

762 leaves = [] 

763 for child in self.children: 

764 leaves.extend(child.getAllLeaves()) 

765 return leaves 

766 

767 def updateBlockInfo(self) -> Optional[str]: 

768 val = base64.b64encode(self._queryVHDBlocks()).decode() 

769 self.setConfig(VDI.DB_VHD_BLOCKS, val) 

770 return val 

771 

772 def rename(self, uuid) -> None: 

773 "Rename the VDI file" 

774 assert(not self.sr.vdis.get(uuid)) 

775 self._clearRef() 

776 oldUuid = self.uuid 

777 self.uuid = uuid 

778 self.children = [] 

779 # updating the children themselves is the responsibility of the caller 

780 del self.sr.vdis[oldUuid] 

781 self.sr.vdis[self.uuid] = self 

782 

783 def delete(self) -> None: 

784 "Physically delete the VDI" 

785 lock.Lock.cleanup(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

786 lock.Lock.cleanupAll(self.uuid) 

787 self._clear() 

788 

789 def getParent(self) -> str: 

790 return vhdutil.getParent(self.path, lambda x: x.strip()) 790 ↛ exitline 790 didn't run the lambda on line 790

791 

792 def repair(self, parent) -> None: 

793 vhdutil.repair(parent) 

794 

795 @override 

796 def __str__(self) -> str: 

797 strHidden = "" 

798 if self.isHidden(): 798 ↛ 799line 798 didn't jump to line 799, because the condition on line 798 was never true

799 strHidden = "*" 

800 strSizeVirt = "?" 

801 if self.sizeVirt > 0: 801 ↛ 802line 801 didn't jump to line 802, because the condition on line 801 was never true

802 strSizeVirt = Util.num2str(self.sizeVirt) 

803 strSizeVHD = "?" 

804 if self._sizeVHD > 0: 804 ↛ 805line 804 didn't jump to line 805, because the condition on line 804 was never true

805 strSizeVHD = "/%s" % Util.num2str(self._sizeVHD) 

806 strSizeAllocated = "?" 

807 if self._sizeAllocated >= 0: 

808 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated) 

809 strType = "" 

810 if self.raw: 

811 strType = "[RAW]" 

812 strSizeVHD = "" 

813 

814 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

815 strSizeVHD, strSizeAllocated, strType) 

816 

817 def validate(self, fast=False) -> None: 

818 if not vhdutil.check(self.path, fast=fast): 818 ↛ 819line 818 didn't jump to line 819, because the condition on line 818 was never true

819 raise util.SMException("VHD %s corrupted" % self) 

820 

821 def _clear(self): 

822 self.uuid = "" 

823 self.path = "" 

824 self.parentUuid = "" 

825 self.parent = None 

826 self._clearRef() 

827 

828 def _clearRef(self): 

829 self._vdiRef = None 

830 

831 def _doCoalesce(self) -> None: 

832 """Coalesce self onto parent. Only perform the actual coalescing of 

833 VHD, but not the subsequent relinking. We'll do that as the next step, 

834 after reloading the entire SR in case things have changed while we 

835 were coalescing""" 

836 self.validate() 

837 self.parent.validate(True) 

838 self.parent._increaseSizeVirt(self.sizeVirt) 

839 self.sr._updateSlavesOnResize(self.parent) 

840 self._coalesceVHD(0) 

841 self.parent.validate(True) 

842 #self._verifyContents(0) 

843 self.parent.updateBlockInfo() 

844 

845 def _verifyContents(self, timeOut): 

846 Util.log(" Coalesce verification on %s" % self) 

847 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

848 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

849 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

850 Util.log(" Coalesce verification succeeded") 

851 

852 def _runTapdiskDiff(self): 

853 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

854 (self.getDriverName(), self.path, \ 

855 self.parent.getDriverName(), self.parent.path) 

856 Util.doexec(cmd, 0) 

857 return True 

858 

859 @staticmethod 

860 def _reportCoalesceError(vdi, ce): 

861 """Reports a coalesce error to XenCenter. 

862 

863 vdi: the VDI object on which the coalesce error occured 

864 ce: the CommandException that was raised""" 

865 

866 msg_name = os.strerror(ce.code) 

867 if ce.code == errno.ENOSPC: 

868 # TODO We could add more information here, e.g. exactly how much 

869 # space is required for the particular coalesce, as well as actions 

870 # to be taken by the user and consequences of not taking these 

871 # actions. 

872 msg_body = 'Run out of space while coalescing.' 

873 elif ce.code == errno.EIO: 

874 msg_body = 'I/O error while coalescing.' 

875 else: 

876 msg_body = '' 

877 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

878 % (vdi.sr.uuid, msg_name, msg_body)) 

879 

880 # Create a XenCenter message, but don't spam. 

881 xapi = vdi.sr.xapi.session.xenapi 

882 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

883 oth_cfg = xapi.SR.get_other_config(sr_ref) 

884 if COALESCE_ERR_RATE_TAG in oth_cfg: 

885 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

886 else: 

887 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

888 

889 xcmsg = False 

890 if coalesce_err_rate == 0: 

891 xcmsg = True 

892 elif coalesce_err_rate > 0: 

893 now = datetime.datetime.now() 

894 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

895 if COALESCE_LAST_ERR_TAG in sm_cfg: 

896 # seconds per message (minimum distance in time between two 

897 # messages in seconds) 

898 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

899 last = datetime.datetime.fromtimestamp( 

900 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

901 if now - last >= spm: 

902 xapi.SR.remove_from_sm_config(sr_ref, 

903 COALESCE_LAST_ERR_TAG) 

904 xcmsg = True 

905 else: 

906 xcmsg = True 

907 if xcmsg: 

908 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

909 str(now.strftime('%s'))) 

910 if xcmsg: 

911 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

912 

913 def coalesce(self) -> int: 

914 # size is returned in sectors 

915 return vhdutil.coalesce(self.path) * 512 

916 

917 @staticmethod 

918 def _doCoalesceVHD(vdi): 

919 try: 

920 startTime = time.time() 

921 vhdSize = vdi.getAllocatedSize() 

922 coalesced_size = vdi.coalesce() 

923 endTime = time.time() 

924 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) 

925 except util.CommandException as ce: 

926 # We use try/except for the following piece of code because it runs 

927 # in a separate process context and errors will not be caught and 

928 # reported by anyone. 

929 try: 

930 # Report coalesce errors back to user via XC 

931 VDI._reportCoalesceError(vdi, ce) 

932 except Exception as e: 

933 util.SMlog('failed to create XenCenter message: %s' % e) 

934 raise ce 

935 except: 

936 raise 

937 

938 def _vdi_is_raw(self, vdi_path): 

939 """ 

940 Given path to vdi determine if it is raw 

941 """ 

942 uuid = self.extractUuid(vdi_path) 

943 return self.sr.vdis[uuid].raw 

944 

945 def _coalesceVHD(self, timeOut): 

946 Util.log(" Running VHD coalesce on %s" % self) 

947 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 947 ↛ exitline 947 didn't run the lambda on line 947

948 try: 

949 util.fistpoint.activate_custom_fn( 

950 "cleanup_coalesceVHD_inject_failure", 

951 util.inject_failure) 

952 Util.runAbortable(lambda: VDI._doCoalesceVHD(self), None, 

953 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

954 except: 

955 #exception at this phase could indicate a failure in vhd coalesce 

956 # or a kill of vhd coalesce by runAbortable due to timeOut 

957 # Try a repair and reraise the exception 

958 parent = "" 

959 try: 

960 parent = self.getParent() 

961 if not self._vdi_is_raw(parent): 

962 # Repair error is logged and ignored. Error reraised later 

963 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

964 'parent %s' % (self.uuid, parent)) 

965 self.repair(parent) 

966 except Exception as e: 

967 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

968 'after failed coalesce on %s, err: %s' % 

969 (parent, self.path, e)) 

970 raise 

971 

972 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

973 

974 def _relinkSkip(self) -> None: 

975 """Relink children of this VDI to point to the parent of this VDI""" 

976 abortFlag = IPCFlag(self.sr.uuid) 

977 for child in self.children: 

978 if abortFlag.test(FLAG_TYPE_ABORT): 978 ↛ 979line 978 didn't jump to line 979, because the condition on line 978 was never true

979 raise AbortException("Aborting due to signal") 

980 Util.log(" Relinking %s from %s to %s" % \ 

981 (child, self, self.parent)) 

982 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

983 child._setParent(self.parent) 

984 self.children = [] 

985 

986 def _reloadChildren(self, vdiSkip): 

987 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

988 the VHD metadata for this file in any online VDI""" 

989 abortFlag = IPCFlag(self.sr.uuid) 

990 for child in self.children: 

991 if child == vdiSkip: 

992 continue 

993 if abortFlag.test(FLAG_TYPE_ABORT): 993 ↛ 994line 993 didn't jump to line 994, because the condition on line 993 was never true

994 raise AbortException("Aborting due to signal") 

995 Util.log(" Reloading VDI %s" % child) 

996 child._reload() 

997 

998 def _reload(self): 

999 """Pause & unpause to cause blktap to reload the VHD metadata""" 

1000 for child in self.children: 1000 ↛ 1001line 1000 didn't jump to line 1001, because the loop on line 1000 never started

1001 child._reload() 

1002 

1003 # only leaves can be attached 

1004 if len(self.children) == 0: 1004 ↛ exitline 1004 didn't return from function '_reload', because the condition on line 1004 was never false

1005 try: 

1006 self.delConfig(VDI.DB_VDI_RELINKING) 

1007 except XenAPI.Failure as e: 

1008 if not util.isInvalidVDI(e): 

1009 raise 

1010 self.refresh() 

1011 

1012 def _tagChildrenForRelink(self): 

1013 if len(self.children) == 0: 

1014 retries = 0 

1015 try: 

1016 while retries < 15: 

1017 retries += 1 

1018 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

1019 Util.log("VDI %s is activating, wait to relink" % 

1020 self.uuid) 

1021 else: 

1022 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

1023 

1024 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

1025 self.delConfig(VDI.DB_VDI_RELINKING) 

1026 Util.log("VDI %s started activating while tagging" % 

1027 self.uuid) 

1028 else: 

1029 return 

1030 time.sleep(2) 

1031 

1032 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1033 except XenAPI.Failure as e: 

1034 if not util.isInvalidVDI(e): 

1035 raise 

1036 

1037 for child in self.children: 

1038 child._tagChildrenForRelink() 

1039 

1040 def _loadInfoParent(self): 

1041 ret = vhdutil.getParent(self.path, lvhdutil.extractUuid) 

1042 if ret: 

1043 self.parentUuid = ret 

1044 

1045 def _setParent(self, parent) -> None: 

1046 vhdutil.setParent(self.path, parent.path, False) 

1047 self.parent = parent 

1048 self.parentUuid = parent.uuid 

1049 parent.children.append(self) 

1050 try: 

1051 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1052 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1053 (self.uuid, self.parentUuid)) 

1054 except: 

1055 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1056 (self.uuid, self.parentUuid)) 

1057 

1058 def isHidden(self) -> bool: 

1059 if self._hidden is None: 1059 ↛ 1060line 1059 didn't jump to line 1060, because the condition on line 1059 was never true

1060 self._loadInfoHidden() 

1061 return self._hidden 

1062 

1063 def _loadInfoHidden(self) -> None: 

1064 hidden = vhdutil.getHidden(self.path) 

1065 self._hidden = (hidden != 0) 

1066 

1067 def _setHidden(self, hidden=True) -> None: 

1068 self._hidden = None 

1069 vhdutil.setHidden(self.path, hidden) 

1070 self._hidden = hidden 

1071 

1072 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1073 """ensure the virtual size of 'self' is at least 'size'. Note that 

1074 resizing a VHD must always be offline and atomically: the file must 

1075 not be open by anyone and no concurrent operations may take place. 

1076 Thus we use the Agent API call for performing paused atomic 

1077 operations. If the caller is already in the atomic context, it must 

1078 call with atomic = False""" 

1079 if self.sizeVirt >= size: 1079 ↛ 1081line 1079 didn't jump to line 1081, because the condition on line 1079 was never false

1080 return 

1081 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ 

1082 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1083 

1084 msize = vhdutil.getMaxResizeSize(self.path) * 1024 * 1024 

1085 if (size <= msize): 

1086 vhdutil.setSizeVirtFast(self.path, size) 

1087 else: 

1088 if atomic: 

1089 vdiList = self._getAllSubtree() 

1090 self.sr.lock() 

1091 try: 

1092 self.sr.pauseVDIs(vdiList) 

1093 try: 

1094 self._setSizeVirt(size) 

1095 finally: 

1096 self.sr.unpauseVDIs(vdiList) 

1097 finally: 

1098 self.sr.unlock() 

1099 else: 

1100 self._setSizeVirt(size) 

1101 

1102 self.sizeVirt = vhdutil.getSizeVirt(self.path) 

1103 

1104 def _setSizeVirt(self, size) -> None: 

1105 """WARNING: do not call this method directly unless all VDIs in the 

1106 subtree are guaranteed to be unplugged (and remain so for the duration 

1107 of the operation): this operation is only safe for offline VHDs""" 

1108 jFile = os.path.join(self.sr.path, self.uuid) 

1109 vhdutil.setSizeVirt(self.path, size, jFile) 

1110 

1111 def _queryVHDBlocks(self) -> bytes: 

1112 return vhdutil.getBlockBitmap(self.path) 

1113 

1114 def _getCoalescedSizeData(self): 

1115 """Get the data size of the resulting VHD if we coalesce self onto 

1116 parent. We calculate the actual size by using the VHD block allocation 

1117 information (as opposed to just adding up the two VHD sizes to get an 

1118 upper bound)""" 

1119 # make sure we don't use stale BAT info from vdi_rec since the child 

1120 # was writable all this time 

1121 self.delConfig(VDI.DB_VHD_BLOCKS) 

1122 blocksChild = self.getVHDBlocks() 

1123 blocksParent = self.parent.getVHDBlocks() 

1124 numBlocks = Util.countBits(blocksChild, blocksParent) 

1125 Util.log("Num combined blocks = %d" % numBlocks) 

1126 sizeData = numBlocks * vhdutil.VHD_BLOCK_SIZE 

1127 assert(sizeData <= self.sizeVirt) 

1128 return sizeData 

1129 

1130 def _calcExtraSpaceForCoalescing(self) -> int: 

1131 sizeData = self._getCoalescedSizeData() 

1132 sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \ 

1133 vhdutil.calcOverheadEmpty(self.sizeVirt) 

1134 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1135 return sizeCoalesced - self.parent.getSizeVHD() 

1136 

1137 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1138 """How much extra space in the SR will be required to 

1139 [live-]leaf-coalesce this VDI""" 

1140 # the space requirements are the same as for inline coalesce 

1141 return self._calcExtraSpaceForCoalescing() 

1142 

1143 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1144 """How much extra space in the SR will be required to 

1145 snapshot-coalesce this VDI""" 

1146 return self._calcExtraSpaceForCoalescing() + \ 

1147 vhdutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1148 

1149 def _getAllSubtree(self): 

1150 """Get self and all VDIs in the subtree of self as a flat list""" 

1151 vdiList = [self] 

1152 for child in self.children: 

1153 vdiList.extend(child._getAllSubtree()) 

1154 return vdiList 

1155 

1156 

1157class FileVDI(VDI): 

1158 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1159 

1160 @override 

1161 @staticmethod 

1162 def extractUuid(path): 

1163 path = os.path.basename(path.strip()) 

1164 if not (path.endswith(vhdutil.FILE_EXTN_VHD) or \ 1164 ↛ 1166line 1164 didn't jump to line 1166, because the condition on line 1164 was never true

1165 path.endswith(vhdutil.FILE_EXTN_RAW)): 

1166 return None 

1167 uuid = path.replace(vhdutil.FILE_EXTN_VHD, "").replace( \ 

1168 vhdutil.FILE_EXTN_RAW, "") 

1169 # TODO: validate UUID format 

1170 return uuid 

1171 

1172 def __init__(self, sr, uuid, raw): 

1173 VDI.__init__(self, sr, uuid, raw) 

1174 if self.raw: 1174 ↛ 1175line 1174 didn't jump to line 1175, because the condition on line 1174 was never true

1175 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_RAW) 

1176 else: 

1177 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1178 

1179 @override 

1180 def load(self, info=None) -> None: 

1181 if not info: 

1182 if not util.pathexists(self.path): 

1183 raise util.SMException("%s not found" % self.path) 

1184 try: 

1185 info = vhdutil.getVHDInfo(self.path, self.extractUuid) 

1186 except util.SMException: 

1187 Util.log(" [VDI %s: failed to read VHD metadata]" % self.uuid) 

1188 return 

1189 self.parent = None 

1190 self.children = [] 

1191 self.parentUuid = info.parentUuid 

1192 self.sizeVirt = info.sizeVirt 

1193 self._sizeVHD = info.sizePhys 

1194 self._sizeAllocated = info.sizeAllocated 

1195 self._hidden = info.hidden 

1196 self.scanError = False 

1197 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1198 (self.uuid, vhdutil.FILE_EXTN_VHD)) 

1199 

1200 @override 

1201 def rename(self, uuid) -> None: 

1202 oldPath = self.path 

1203 VDI.rename(self, uuid) 

1204 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1205 self.path = os.path.join(self.sr.path, self.fileName) 

1206 assert(not util.pathexists(self.path)) 

1207 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1208 os.rename(oldPath, self.path) 

1209 

1210 @override 

1211 def delete(self) -> None: 

1212 if len(self.children) > 0: 1212 ↛ 1213line 1212 didn't jump to line 1213, because the condition on line 1212 was never true

1213 raise util.SMException("VDI %s has children, can't delete" % \ 

1214 self.uuid) 

1215 try: 

1216 self.sr.lock() 

1217 try: 

1218 os.unlink(self.path) 

1219 self.sr.forgetVDI(self.uuid) 

1220 finally: 

1221 self.sr.unlock() 

1222 except OSError: 

1223 raise util.SMException("os.unlink(%s) failed" % self.path) 

1224 VDI.delete(self) 

1225 

1226 @override 

1227 def getAllocatedSize(self) -> int: 

1228 if self._sizeAllocated == -1: 1228 ↛ 1229line 1228 didn't jump to line 1229, because the condition on line 1228 was never true

1229 self._sizeAllocated = vhdutil.getAllocatedSize(self.path) 

1230 return self._sizeAllocated 

1231 

1232 

1233class LVHDVDI(VDI): 

1234 """Object representing a VDI in an LVHD SR""" 

1235 

1236 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1237 DRIVER_NAME_RAW = "aio" 

1238 

1239 @override 

1240 def load(self, info=None) -> None: 

1241 # `info` is always set. `None` default value is only here to match parent method. 

1242 assert info, "No info given to LVHDVDI.load" 

1243 self.parent = None 

1244 self.children = [] 

1245 self._sizeVHD = -1 

1246 self._sizeAllocated = -1 

1247 self.scanError = info.scanError 

1248 self.sizeLV = info.sizeLV 

1249 self.sizeVirt = info.sizeVirt 

1250 self.fileName = info.lvName 

1251 self.lvActive = info.lvActive 

1252 self.lvOpen = info.lvOpen 

1253 self.lvReadonly = info.lvReadonly 

1254 self._hidden = info.hidden 

1255 self.parentUuid = info.parentUuid 

1256 self.path = os.path.join(self.sr.path, self.fileName) 

1257 

1258 @override 

1259 @staticmethod 

1260 def extractUuid(path): 

1261 return lvhdutil.extractUuid(path) 

1262 

1263 @override 

1264 def getDriverName(self) -> str: 

1265 if self.raw: 

1266 return self.DRIVER_NAME_RAW 

1267 return self.DRIVER_NAME_VHD 

1268 

1269 def inflate(self, size): 

1270 """inflate the LV containing the VHD to 'size'""" 

1271 if self.raw: 

1272 return 

1273 self._activate() 

1274 self.sr.lock() 

1275 try: 

1276 lvhdutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, size) 

1277 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1278 finally: 

1279 self.sr.unlock() 

1280 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1281 self._sizeVHD = -1 

1282 self._sizeAllocated = -1 

1283 

1284 def deflate(self): 

1285 """deflate the LV containing the VHD to minimum""" 

1286 if self.raw: 

1287 return 

1288 self._activate() 

1289 self.sr.lock() 

1290 try: 

1291 lvhdutil.deflate(self.sr.lvmCache, self.fileName, self.getSizeVHD()) 

1292 finally: 

1293 self.sr.unlock() 

1294 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1295 self._sizeVHD = -1 

1296 self._sizeAllocated = -1 

1297 

1298 def inflateFully(self): 

1299 self.inflate(lvhdutil.calcSizeVHDLV(self.sizeVirt)) 

1300 

1301 def inflateParentForCoalesce(self): 

1302 """Inflate the parent only as much as needed for the purposes of 

1303 coalescing""" 

1304 if self.parent.raw: 

1305 return 

1306 inc = self._calcExtraSpaceForCoalescing() 

1307 if inc > 0: 

1308 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1309 self.parent.inflate(self.parent.sizeLV + inc) 

1310 

1311 @override 

1312 def updateBlockInfo(self) -> Optional[str]: 

1313 if not self.raw: 

1314 return VDI.updateBlockInfo(self) 

1315 return None 

1316 

1317 @override 

1318 def rename(self, uuid) -> None: 

1319 oldUuid = self.uuid 

1320 oldLVName = self.fileName 

1321 VDI.rename(self, uuid) 

1322 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + self.uuid 

1323 if self.raw: 

1324 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + self.uuid 

1325 self.path = os.path.join(self.sr.path, self.fileName) 

1326 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1327 

1328 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1329 if self.sr.lvActivator.get(oldUuid, False): 

1330 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1331 

1332 ns = lvhdutil.NS_PREFIX_LVM + self.sr.uuid 

1333 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1334 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1335 RefCounter.reset(oldUuid, ns) 

1336 

1337 @override 

1338 def delete(self) -> None: 

1339 if len(self.children) > 0: 

1340 raise util.SMException("VDI %s has children, can't delete" % \ 

1341 self.uuid) 

1342 self.sr.lock() 

1343 try: 

1344 self.sr.lvmCache.remove(self.fileName) 

1345 self.sr.forgetVDI(self.uuid) 

1346 finally: 

1347 self.sr.unlock() 

1348 RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

1349 VDI.delete(self) 

1350 

1351 @override 

1352 def getSizeVHD(self) -> int: 

1353 if self._sizeVHD == -1: 

1354 self._loadInfoSizeVHD() 

1355 return self._sizeVHD 

1356 

1357 def _loadInfoSizeVHD(self): 

1358 """Get the physical utilization of the VHD file. We do it individually 

1359 (and not using the VHD batch scanner) as an optimization: this info is 

1360 relatively expensive and we need it only for VDI's involved in 

1361 coalescing.""" 

1362 if self.raw: 

1363 return 

1364 self._activate() 

1365 self._sizeVHD = vhdutil.getSizePhys(self.path) 

1366 if self._sizeVHD <= 0: 

1367 raise util.SMException("phys size of %s = %d" % \ 

1368 (self, self._sizeVHD)) 

1369 

1370 @override 

1371 def getAllocatedSize(self) -> int: 

1372 if self._sizeAllocated == -1: 

1373 self._loadInfoSizeAllocated() 

1374 return self._sizeAllocated 

1375 

1376 def _loadInfoSizeAllocated(self): 

1377 """ 

1378 Get the allocated size of the VHD volume. 

1379 """ 

1380 if self.raw: 

1381 return 

1382 self._activate() 

1383 self._sizeAllocated = vhdutil.getAllocatedSize(self.path) 

1384 

1385 @override 

1386 def _loadInfoHidden(self) -> None: 

1387 if self.raw: 

1388 self._hidden = self.sr.lvmCache.getHidden(self.fileName) 

1389 else: 

1390 VDI._loadInfoHidden(self) 

1391 

1392 @override 

1393 def _setHidden(self, hidden=True) -> None: 

1394 if self.raw: 

1395 self._hidden = None 

1396 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1397 self._hidden = hidden 

1398 else: 

1399 VDI._setHidden(self, hidden) 

1400 

1401 @override 

1402 def __str__(self) -> str: 

1403 strType = "VHD" 

1404 if self.raw: 

1405 strType = "RAW" 

1406 strHidden = "" 

1407 if self.isHidden(): 

1408 strHidden = "*" 

1409 strSizeVHD = "" 

1410 if self._sizeVHD > 0: 

1411 strSizeVHD = Util.num2str(self._sizeVHD) 

1412 strSizeAllocated = "" 

1413 if self._sizeAllocated >= 0: 

1414 strSizeAllocated = Util.num2str(self._sizeAllocated) 

1415 strActive = "n" 

1416 if self.lvActive: 

1417 strActive = "a" 

1418 if self.lvOpen: 

1419 strActive += "o" 

1420 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1421 Util.num2str(self.sizeVirt), strSizeVHD, strSizeAllocated, 

1422 Util.num2str(self.sizeLV), strActive) 

1423 

1424 @override 

1425 def validate(self, fast=False) -> None: 

1426 if not self.raw: 

1427 VDI.validate(self, fast) 

1428 

1429 @override 

1430 def _doCoalesce(self) -> None: 

1431 """LVHD parents must first be activated, inflated, and made writable""" 

1432 try: 

1433 self._activateChain() 

1434 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1435 self.parent.validate() 

1436 self.inflateParentForCoalesce() 

1437 VDI._doCoalesce(self) 

1438 finally: 

1439 self.parent._loadInfoSizeVHD() 

1440 self.parent.deflate() 

1441 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1442 

1443 @override 

1444 def _setParent(self, parent) -> None: 

1445 self._activate() 

1446 if self.lvReadonly: 

1447 self.sr.lvmCache.setReadonly(self.fileName, False) 

1448 

1449 try: 

1450 vhdutil.setParent(self.path, parent.path, parent.raw) 

1451 finally: 

1452 if self.lvReadonly: 

1453 self.sr.lvmCache.setReadonly(self.fileName, True) 

1454 self._deactivate() 

1455 self.parent = parent 

1456 self.parentUuid = parent.uuid 

1457 parent.children.append(self) 

1458 try: 

1459 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1460 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1461 (self.uuid, self.parentUuid)) 

1462 except: 

1463 Util.log("Failed to update the vhd-parent with %s for child %s" % \ 

1464 (self.parentUuid, self.uuid)) 

1465 

1466 def _activate(self): 

1467 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1468 

1469 def _activateChain(self): 

1470 vdi = self 

1471 while vdi: 

1472 vdi._activate() 

1473 vdi = vdi.parent 

1474 

1475 def _deactivate(self): 

1476 self.sr.lvActivator.deactivate(self.uuid, False) 

1477 

1478 @override 

1479 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1480 "ensure the virtual size of 'self' is at least 'size'" 

1481 self._activate() 

1482 if not self.raw: 

1483 VDI._increaseSizeVirt(self, size, atomic) 

1484 return 

1485 

1486 # raw VDI case 

1487 offset = self.sizeLV 

1488 if self.sizeVirt < size: 

1489 oldSize = self.sizeLV 

1490 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1491 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1492 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1493 offset = oldSize 

1494 unfinishedZero = False 

1495 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1496 if jval: 

1497 unfinishedZero = True 

1498 offset = int(jval) 

1499 length = self.sizeLV - offset 

1500 if not length: 

1501 return 

1502 

1503 if unfinishedZero: 

1504 Util.log(" ==> Redoing unfinished zeroing out") 

1505 else: 

1506 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1507 str(offset)) 

1508 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1509 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1510 func = lambda: util.zeroOut(self.path, offset, length) 

1511 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1512 VDI.POLL_INTERVAL, 0) 

1513 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1514 

1515 @override 

1516 def _setSizeVirt(self, size) -> None: 

1517 """WARNING: do not call this method directly unless all VDIs in the 

1518 subtree are guaranteed to be unplugged (and remain so for the duration 

1519 of the operation): this operation is only safe for offline VHDs""" 

1520 self._activate() 

1521 jFile = lvhdutil.createVHDJournalLV(self.sr.lvmCache, self.uuid, 

1522 vhdutil.MAX_VHD_JOURNAL_SIZE) 

1523 try: 

1524 lvhdutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, 

1525 size, jFile) 

1526 finally: 

1527 lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid) 

1528 

1529 @override 

1530 def _queryVHDBlocks(self) -> bytes: 

1531 self._activate() 

1532 return VDI._queryVHDBlocks(self) 

1533 

1534 @override 

1535 def _calcExtraSpaceForCoalescing(self) -> int: 

1536 if self.parent.raw: 

1537 return 0 # raw parents are never deflated in the first place 

1538 sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData()) 

1539 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1540 return sizeCoalesced - self.parent.sizeLV 

1541 

1542 @override 

1543 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1544 """How much extra space in the SR will be required to 

1545 [live-]leaf-coalesce this VDI""" 

1546 # we can deflate the leaf to minimize the space requirements 

1547 deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD()) 

1548 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1549 

1550 @override 

1551 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1552 return self._calcExtraSpaceForCoalescing() + \ 

1553 lvhdutil.calcSizeLV(self.getSizeVHD()) 

1554 

1555 

1556class LinstorVDI(VDI): 

1557 """Object representing a VDI in a LINSTOR SR""" 

1558 

1559 VOLUME_LOCK_TIMEOUT = 30 

1560 

1561 @override 

1562 def load(self, info=None) -> None: 

1563 self.parentUuid = info.parentUuid 

1564 self.scanError = True 

1565 self.parent = None 

1566 self.children = [] 

1567 

1568 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1569 self.path = self.sr._linstor.build_device_path(self.fileName) 

1570 

1571 if not info: 

1572 try: 

1573 info = self.sr._vhdutil.get_vhd_info(self.uuid) 

1574 except util.SMException: 

1575 Util.log( 

1576 ' [VDI {}: failed to read VHD metadata]'.format(self.uuid) 

1577 ) 

1578 return 

1579 

1580 self.parentUuid = info.parentUuid 

1581 self.sizeVirt = info.sizeVirt 

1582 self._sizeVHD = -1 

1583 self._sizeAllocated = -1 

1584 self.drbd_size = -1 

1585 self._hidden = info.hidden 

1586 self.scanError = False 

1587 self.vdi_type = vhdutil.VDI_TYPE_VHD 

1588 

1589 @override 

1590 def getSizeVHD(self, fetch=False) -> int: 

1591 if self._sizeVHD < 0 or fetch: 

1592 self._sizeVHD = self.sr._vhdutil.get_size_phys(self.uuid) 

1593 return self._sizeVHD 

1594 

1595 def getDrbdSize(self, fetch=False): 

1596 if self.drbd_size < 0 or fetch: 

1597 self.drbd_size = self.sr._vhdutil.get_drbd_size(self.uuid) 

1598 return self.drbd_size 

1599 

1600 @override 

1601 def getAllocatedSize(self) -> int: 

1602 if self._sizeAllocated == -1: 

1603 if not self.raw: 

1604 self._sizeAllocated = self.sr._vhdutil.get_allocated_size(self.uuid) 

1605 return self._sizeAllocated 

1606 

1607 def inflate(self, size): 

1608 if self.raw: 

1609 return 

1610 self.sr.lock() 

1611 try: 

1612 # Ensure we use the real DRBD size and not the cached one. 

1613 # Why? Because this attribute can be changed if volume is resized by user. 

1614 self.drbd_size = self.getDrbdSize(fetch=True) 

1615 self.sr._vhdutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) 

1616 finally: 

1617 self.sr.unlock() 

1618 self.drbd_size = -1 

1619 self._sizeVHD = -1 

1620 self._sizeAllocated = -1 

1621 

1622 def deflate(self): 

1623 if self.raw: 

1624 return 

1625 self.sr.lock() 

1626 try: 

1627 # Ensure we use the real sizes and not the cached info. 

1628 self.drbd_size = self.getDrbdSize(fetch=True) 

1629 self._sizeVHD = self.getSizeVHD(fetch=True) 

1630 self.sr._vhdutil.force_deflate(self.path, self._sizeVHD, self.drbd_size, zeroize=False) 

1631 finally: 

1632 self.sr.unlock() 

1633 self.drbd_size = -1 

1634 self._sizeVHD = -1 

1635 self._sizeAllocated = -1 

1636 

1637 def inflateFully(self): 

1638 if not self.raw: 

1639 self.inflate(LinstorVhdUtil.compute_volume_size(self.sizeVirt, self.vdi_type)) 

1640 

1641 @override 

1642 def rename(self, uuid) -> None: 

1643 Util.log('Renaming {} -> {} (path={})'.format( 

1644 self.uuid, uuid, self.path 

1645 )) 

1646 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1647 VDI.rename(self, uuid) 

1648 

1649 @override 

1650 def delete(self) -> None: 

1651 if len(self.children) > 0: 

1652 raise util.SMException( 

1653 'VDI {} has children, can\'t delete'.format(self.uuid) 

1654 ) 

1655 self.sr.lock() 

1656 try: 

1657 self.sr._linstor.destroy_volume(self.uuid) 

1658 self.sr.forgetVDI(self.uuid) 

1659 finally: 

1660 self.sr.unlock() 

1661 VDI.delete(self) 

1662 

1663 @override 

1664 def validate(self, fast=False) -> None: 

1665 if not self.raw and not self.sr._vhdutil.check(self.uuid, fast=fast): 

1666 raise util.SMException('VHD {} corrupted'.format(self)) 

1667 

1668 @override 

1669 def pause(self, failfast=False) -> None: 

1670 self.sr._linstor.ensure_volume_is_not_locked( 

1671 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1672 ) 

1673 return super(LinstorVDI, self).pause(failfast) 

1674 

1675 @override 

1676 def coalesce(self) -> int: 

1677 # Note: We raise `SMException` here to skip the current coalesce in case of failure. 

1678 # Using another exception we can't execute the next coalesce calls. 

1679 return self.sr._vhdutil.force_coalesce(self.path) * 512 

1680 

1681 @override 

1682 def getParent(self) -> str: 

1683 return self.sr._vhdutil.get_parent( 

1684 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1685 ) 

1686 

1687 @override 

1688 def repair(self, parent_uuid) -> None: 

1689 self.sr._vhdutil.force_repair( 

1690 self.sr._linstor.get_device_path(parent_uuid) 

1691 ) 

1692 

1693 @override 

1694 def _relinkSkip(self) -> None: 

1695 abortFlag = IPCFlag(self.sr.uuid) 

1696 for child in self.children: 

1697 if abortFlag.test(FLAG_TYPE_ABORT): 

1698 raise AbortException('Aborting due to signal') 

1699 Util.log( 

1700 ' Relinking {} from {} to {}'.format( 

1701 child, self, self.parent 

1702 ) 

1703 ) 

1704 

1705 session = child.sr.xapi.session 

1706 sr_uuid = child.sr.uuid 

1707 vdi_uuid = child.uuid 

1708 try: 

1709 self.sr._linstor.ensure_volume_is_not_locked( 

1710 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1711 ) 

1712 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1713 child._setParent(self.parent) 

1714 finally: 

1715 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1716 self.children = [] 

1717 

1718 @override 

1719 def _setParent(self, parent) -> None: 

1720 self.sr._linstor.get_device_path(self.uuid) 

1721 self.sr._vhdutil.force_parent(self.path, parent.path) 

1722 self.parent = parent 

1723 self.parentUuid = parent.uuid 

1724 parent.children.append(self) 

1725 try: 

1726 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1727 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1728 (self.uuid, self.parentUuid)) 

1729 except: 

1730 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1731 (self.uuid, self.parentUuid)) 

1732 

1733 @override 

1734 def _doCoalesce(self) -> None: 

1735 try: 

1736 self._activateChain() 

1737 self.parent.validate() 

1738 self._inflateParentForCoalesce() 

1739 VDI._doCoalesce(self) 

1740 finally: 

1741 self.parent.deflate() 

1742 

1743 def _activateChain(self): 

1744 vdi = self 

1745 while vdi: 

1746 try: 

1747 p = self.sr._linstor.get_device_path(vdi.uuid) 

1748 except Exception as e: 

1749 # Use SMException to skip coalesce. 

1750 # Otherwise the GC is stopped... 

1751 raise util.SMException(str(e)) 

1752 vdi = vdi.parent 

1753 

1754 @override 

1755 def _setHidden(self, hidden=True) -> None: 

1756 HIDDEN_TAG = 'hidden' 

1757 

1758 if self.raw: 

1759 self._hidden = None 

1760 self.sr._linstor.update_volume_metadata(self.uuid, { 

1761 HIDDEN_TAG: hidden 

1762 }) 

1763 self._hidden = hidden 

1764 else: 

1765 VDI._setHidden(self, hidden) 

1766 

1767 @override 

1768 def _increaseSizeVirt(self, size, atomic=True): 

1769 if self.raw: 

1770 offset = self.drbd_size 

1771 if self.sizeVirt < size: 

1772 oldSize = self.drbd_size 

1773 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size) 

1774 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size)) 

1775 self.sr._linstor.resize_volume(self.uuid, self.drbd_size) 

1776 offset = oldSize 

1777 unfinishedZero = False 

1778 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid) 

1779 if jval: 

1780 unfinishedZero = True 

1781 offset = int(jval) 

1782 length = self.drbd_size - offset 

1783 if not length: 

1784 return 

1785 

1786 if unfinishedZero: 

1787 Util.log(" ==> Redoing unfinished zeroing out") 

1788 else: 

1789 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset)) 

1790 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1791 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1792 func = lambda: util.zeroOut(self.path, offset, length) 

1793 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) 

1794 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid) 

1795 return 

1796 

1797 if self.sizeVirt >= size: 

1798 return 

1799 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ 

1800 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1801 

1802 msize = self.sr._vhdutil.get_max_resize_size(self.uuid) * 1024 * 1024 

1803 if (size <= msize): 

1804 self.sr._vhdutil.set_size_virt_fast(self.path, size) 

1805 else: 

1806 if atomic: 

1807 vdiList = self._getAllSubtree() 

1808 self.sr.lock() 

1809 try: 

1810 self.sr.pauseVDIs(vdiList) 

1811 try: 

1812 self._setSizeVirt(size) 

1813 finally: 

1814 self.sr.unpauseVDIs(vdiList) 

1815 finally: 

1816 self.sr.unlock() 

1817 else: 

1818 self._setSizeVirt(size) 

1819 

1820 self.sizeVirt = self.sr._vhdutil.get_size_virt(self.uuid) 

1821 

1822 @override 

1823 def _setSizeVirt(self, size) -> None: 

1824 jfile = self.uuid + '-jvhd' 

1825 self.sr._linstor.create_volume( 

1826 jfile, vhdutil.MAX_VHD_JOURNAL_SIZE, persistent=False, volume_name=jfile 

1827 ) 

1828 try: 

1829 self.inflate(LinstorVhdUtil.compute_volume_size(size, self.vdi_type)) 

1830 self.sr._vhdutil.set_size_virt(size, jfile) 

1831 finally: 

1832 try: 

1833 self.sr._linstor.destroy_volume(jfile) 

1834 except Exception: 

1835 # We can ignore it, in any case this volume is not persistent. 

1836 pass 

1837 

1838 @override 

1839 def _queryVHDBlocks(self) -> bytes: 

1840 return self.sr._vhdutil.get_block_bitmap(self.uuid) 

1841 

1842 def _inflateParentForCoalesce(self): 

1843 if self.parent.raw: 

1844 return 

1845 inc = self._calcExtraSpaceForCoalescing() 

1846 if inc > 0: 

1847 self.parent.inflate(self.parent.getDrbdSize() + inc) 

1848 

1849 @override 

1850 def _calcExtraSpaceForCoalescing(self) -> int: 

1851 if self.parent.raw: 

1852 return 0 

1853 size_coalesced = LinstorVhdUtil.compute_volume_size( 

1854 self._getCoalescedSizeData(), self.vdi_type 

1855 ) 

1856 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) 

1857 return size_coalesced - self.parent.getDrbdSize() 

1858 

1859 @override 

1860 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1861 assert self.getDrbdSize() > 0 

1862 assert self.getSizeVHD() > 0 

1863 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) 

1864 assert deflate_diff >= 0 

1865 return self._calcExtraSpaceForCoalescing() - deflate_diff 

1866 

1867 @override 

1868 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1869 assert self.getSizeVHD() > 0 

1870 return self._calcExtraSpaceForCoalescing() + \ 

1871 LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) 

1872 

1873################################################################################ 

1874# 

1875# SR 

1876# 

1877class SR(object): 

1878 class LogFilter: 

1879 def __init__(self, sr): 

1880 self.sr = sr 

1881 self.stateLogged = False 

1882 self.prevState = {} 

1883 self.currState = {} 

1884 

1885 def logState(self): 

1886 changes = "" 

1887 self.currState.clear() 

1888 for vdi in self.sr.vdiTrees: 

1889 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

1890 if not self.prevState.get(vdi.uuid) or \ 

1891 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

1892 changes += self.currState[vdi.uuid] 

1893 

1894 for uuid in self.prevState: 

1895 if not self.currState.get(uuid): 

1896 changes += "Tree %s gone\n" % uuid 

1897 

1898 result = "SR %s (%d VDIs in %d VHD trees): " % \ 

1899 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

1900 

1901 if len(changes) > 0: 

1902 if self.stateLogged: 

1903 result += "showing only VHD trees that changed:" 

1904 result += "\n%s" % changes 

1905 else: 

1906 result += "no changes" 

1907 

1908 for line in result.split("\n"): 

1909 Util.log("%s" % line) 

1910 self.prevState.clear() 

1911 for key, val in self.currState.items(): 

1912 self.prevState[key] = val 

1913 self.stateLogged = True 

1914 

1915 def logNewVDI(self, uuid): 

1916 if self.stateLogged: 

1917 Util.log("Found new VDI when scanning: %s" % uuid) 

1918 

1919 def _getTreeStr(self, vdi, indent=8): 

1920 treeStr = "%s%s\n" % (" " * indent, vdi) 

1921 for child in vdi.children: 

1922 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

1923 return treeStr 

1924 

1925 TYPE_FILE = "file" 

1926 TYPE_LVHD = "lvhd" 

1927 TYPE_LINSTOR = "linstor" 

1928 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

1929 

1930 LOCK_RETRY_INTERVAL = 3 

1931 LOCK_RETRY_ATTEMPTS = 20 

1932 LOCK_RETRY_ATTEMPTS_LOCK = 100 

1933 

1934 SCAN_RETRY_ATTEMPTS = 3 

1935 

1936 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

1937 TMP_RENAME_PREFIX = "OLD_" 

1938 

1939 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

1940 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

1941 

1942 @staticmethod 

1943 def getInstance(uuid, xapiSession, createLock=True, force=False): 

1944 xapi = XAPI(xapiSession, uuid) 

1945 type = normalizeType(xapi.srRecord["type"]) 

1946 if type == SR.TYPE_FILE: 

1947 return FileSR(uuid, xapi, createLock, force) 

1948 elif type == SR.TYPE_LVHD: 

1949 return LVHDSR(uuid, xapi, createLock, force) 

1950 elif type == SR.TYPE_LINSTOR: 

1951 return LinstorSR(uuid, xapi, createLock, force) 

1952 raise util.SMException("SR type %s not recognized" % type) 

1953 

1954 def __init__(self, uuid, xapi, createLock, force): 

1955 self.logFilter = self.LogFilter(self) 

1956 self.uuid = uuid 

1957 self.path = "" 

1958 self.name = "" 

1959 self.vdis = {} 

1960 self.vdiTrees = [] 

1961 self.journaler = None 

1962 self.xapi = xapi 

1963 self._locked = 0 

1964 self._srLock = None 

1965 if createLock: 1965 ↛ 1966line 1965 didn't jump to line 1966, because the condition on line 1965 was never true

1966 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, self.uuid) 

1967 else: 

1968 Util.log("Requested no SR locking") 

1969 self.name = self.xapi.srRecord["name_label"] 

1970 self._failedCoalesceTargets = [] 

1971 

1972 if not self.xapi.isPluggedHere(): 

1973 if force: 1973 ↛ 1974line 1973 didn't jump to line 1974, because the condition on line 1973 was never true

1974 Util.log("SR %s not attached on this host, ignoring" % uuid) 

1975 else: 

1976 if not self.wait_for_plug(): 

1977 raise util.SMException("SR %s not attached on this host" % uuid) 

1978 

1979 if force: 1979 ↛ 1980line 1979 didn't jump to line 1980, because the condition on line 1979 was never true

1980 Util.log("Not checking if we are Master (SR %s)" % uuid) 

1981 elif not self.xapi.isMaster(): 1981 ↛ 1982line 1981 didn't jump to line 1982, because the condition on line 1981 was never true

1982 raise util.SMException("This host is NOT master, will not run") 

1983 

1984 self.no_space_candidates = {} 

1985 

1986 def msg_cleared(self, xapi_session, msg_ref): 

1987 try: 

1988 msg = xapi_session.xenapi.message.get_record(msg_ref) 

1989 except XenAPI.Failure: 

1990 return True 

1991 

1992 return msg is None 

1993 

1994 def check_no_space_candidates(self): 

1995 xapi_session = self.xapi.getSession() 

1996 

1997 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE) 

1998 if self.no_space_candidates: 

1999 if msg_id is None or self.msg_cleared(xapi_session, msg_id): 

2000 util.SMlog("Could not coalesce due to a lack of space " 

2001 f"in SR {self.uuid}") 

2002 msg_body = ("Unable to perform data coalesce due to a lack " 

2003 f"of space in SR {self.uuid}") 

2004 msg_id = xapi_session.xenapi.message.create( 

2005 'SM_GC_NO_SPACE', 

2006 3, 

2007 "SR", 

2008 self.uuid, 

2009 msg_body) 

2010 xapi_session.xenapi.SR.remove_from_sm_config( 

2011 self.xapi.srRef, VDI.DB_GC_NO_SPACE) 

2012 xapi_session.xenapi.SR.add_to_sm_config( 

2013 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id) 

2014 

2015 for candidate in self.no_space_candidates.values(): 

2016 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id) 

2017 elif msg_id is not None: 

2018 # Everything was coalescable, remove the message 

2019 xapi_session.xenapi.message.destroy(msg_id) 

2020 

2021 def clear_no_space_msg(self, vdi): 

2022 msg_id = None 

2023 try: 

2024 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE) 

2025 except XenAPI.Failure: 

2026 pass 

2027 

2028 self.no_space_candidates.pop(vdi.uuid, None) 

2029 if msg_id is not None: 2029 ↛ exitline 2029 didn't return from function 'clear_no_space_msg', because the condition on line 2029 was never false

2030 vdi.delConfig(VDI.DB_GC_NO_SPACE) 

2031 

2032 

2033 def wait_for_plug(self): 

2034 for _ in range(1, 10): 

2035 time.sleep(2) 

2036 if self.xapi.isPluggedHere(): 

2037 return True 

2038 return False 

2039 

2040 def gcEnabled(self, refresh=True): 

2041 if refresh: 

2042 self.xapi.srRecord = \ 

2043 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

2044 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

2045 Util.log("GC is disabled for this SR, abort") 

2046 return False 

2047 return True 

2048 

2049 def scan(self, force=False) -> None: 

2050 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

2051 update VDI objects if they already exist""" 

2052 pass 

2053 

2054 def scanLocked(self, force=False): 

2055 self.lock() 

2056 try: 

2057 self.scan(force) 

2058 finally: 

2059 self.unlock() 

2060 

2061 def getVDI(self, uuid): 

2062 return self.vdis.get(uuid) 

2063 

2064 def hasWork(self): 

2065 if len(self.findGarbage()) > 0: 

2066 return True 

2067 if self.findCoalesceable(): 

2068 return True 

2069 if self.findLeafCoalesceable(): 

2070 return True 

2071 if self.needUpdateBlockInfo(): 

2072 return True 

2073 return False 

2074 

2075 def findCoalesceable(self): 

2076 """Find a coalesceable VDI. Return a vdi that should be coalesced 

2077 (choosing one among all coalesceable candidates according to some 

2078 criteria) or None if there is no VDI that could be coalesced""" 

2079 

2080 candidates = [] 

2081 

2082 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

2083 if srSwitch == "false": 

2084 Util.log("Coalesce disabled for this SR") 

2085 return candidates 

2086 

2087 # finish any VDI for which a relink journal entry exists first 

2088 journals = self.journaler.getAll(VDI.JRN_RELINK) 

2089 for uuid in journals: 

2090 vdi = self.getVDI(uuid) 

2091 if vdi and vdi not in self._failedCoalesceTargets: 

2092 return vdi 

2093 

2094 for vdi in self.vdis.values(): 

2095 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

2096 candidates.append(vdi) 

2097 Util.log("%s is coalescable" % vdi.uuid) 

2098 

2099 self.xapi.update_task_progress("coalescable", len(candidates)) 

2100 

2101 # pick one in the tallest tree 

2102 treeHeight = dict() 

2103 for c in candidates: 

2104 height = c.getTreeRoot().getTreeHeight() 

2105 if treeHeight.get(height): 

2106 treeHeight[height].append(c) 

2107 else: 

2108 treeHeight[height] = [c] 

2109 

2110 freeSpace = self.getFreeSpace() 

2111 heights = list(treeHeight.keys()) 

2112 heights.sort(reverse=True) 

2113 for h in heights: 

2114 for c in treeHeight[h]: 

2115 spaceNeeded = c._calcExtraSpaceForCoalescing() 

2116 if spaceNeeded <= freeSpace: 

2117 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

2118 self.clear_no_space_msg(c) 

2119 return c 

2120 else: 

2121 self.no_space_candidates[c.uuid] = c 

2122 Util.log("No space to coalesce %s (free space: %d)" % \ 

2123 (c, freeSpace)) 

2124 return None 

2125 

2126 def getSwitch(self, key): 

2127 return self.xapi.srRecord["other_config"].get(key) 

2128 

2129 def forbiddenBySwitch(self, switch, condition, fail_msg): 

2130 srSwitch = self.getSwitch(switch) 

2131 ret = False 

2132 if srSwitch: 

2133 ret = srSwitch == condition 

2134 

2135 if ret: 

2136 Util.log(fail_msg) 

2137 

2138 return ret 

2139 

2140 def leafCoalesceForbidden(self): 

2141 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

2142 "false", 

2143 "Coalesce disabled for this SR") or 

2144 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

2145 VDI.LEAFCLSC_DISABLED, 

2146 "Leaf-coalesce disabled for this SR")) 

2147 

2148 def findLeafCoalesceable(self): 

2149 """Find leaf-coalesceable VDIs in each VHD tree""" 

2150 

2151 candidates = [] 

2152 if self.leafCoalesceForbidden(): 

2153 return candidates 

2154 

2155 self.gatherLeafCoalesceable(candidates) 

2156 

2157 self.xapi.update_task_progress("coalescable", len(candidates)) 

2158 

2159 freeSpace = self.getFreeSpace() 

2160 for candidate in candidates: 

2161 # check the space constraints to see if leaf-coalesce is actually 

2162 # feasible for this candidate 

2163 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

2164 spaceNeededLive = spaceNeeded 

2165 if spaceNeeded > freeSpace: 

2166 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

2167 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

2168 spaceNeeded = spaceNeededLive 

2169 

2170 if spaceNeeded <= freeSpace: 

2171 Util.log("Leaf-coalesce candidate: %s" % candidate) 

2172 self.clear_no_space_msg(candidate) 

2173 return candidate 

2174 else: 

2175 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

2176 (candidate, freeSpace)) 

2177 if spaceNeededLive <= freeSpace: 

2178 Util.log("...but enough space if skip snap-coalesce") 

2179 candidate.setConfig(VDI.DB_LEAFCLSC, 

2180 VDI.LEAFCLSC_OFFLINE) 

2181 self.no_space_candidates[candidate.uuid] = candidate 

2182 

2183 return None 

2184 

2185 def gatherLeafCoalesceable(self, candidates): 

2186 for vdi in self.vdis.values(): 

2187 if not vdi.isLeafCoalesceable(): 

2188 continue 

2189 if vdi in self._failedCoalesceTargets: 

2190 continue 

2191 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

2192 Util.log("Skipping reset-on-boot %s" % vdi) 

2193 continue 

2194 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

2195 Util.log("Skipping allow_caching=true %s" % vdi) 

2196 continue 

2197 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

2198 Util.log("Leaf-coalesce disabled for %s" % vdi) 

2199 continue 

2200 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

2201 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

2202 continue 

2203 candidates.append(vdi) 

2204 

2205 def coalesce(self, vdi, dryRun=False): 

2206 """Coalesce vdi onto parent""" 

2207 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

2208 if dryRun: 2208 ↛ 2209line 2208 didn't jump to line 2209, because the condition on line 2208 was never true

2209 return 

2210 

2211 try: 

2212 self._coalesce(vdi) 

2213 except util.SMException as e: 

2214 if isinstance(e, AbortException): 2214 ↛ 2215line 2214 didn't jump to line 2215, because the condition on line 2214 was never true

2215 self.cleanup() 

2216 raise 

2217 else: 

2218 self._failedCoalesceTargets.append(vdi) 

2219 Util.logException("coalesce") 

2220 Util.log("Coalesce failed, skipping") 

2221 self.cleanup() 

2222 

2223 def coalesceLeaf(self, vdi, dryRun=False): 

2224 """Leaf-coalesce vdi onto parent""" 

2225 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

2226 if dryRun: 

2227 return 

2228 

2229 try: 

2230 uuid = vdi.uuid 

2231 try: 

2232 # "vdi" object will no longer be valid after this call 

2233 self._coalesceLeaf(vdi) 

2234 finally: 

2235 vdi = self.getVDI(uuid) 

2236 if vdi: 

2237 vdi.delConfig(vdi.DB_LEAFCLSC) 

2238 except AbortException: 

2239 self.cleanup() 

2240 raise 

2241 except (util.SMException, XenAPI.Failure) as e: 

2242 self._failedCoalesceTargets.append(vdi) 

2243 Util.logException("leaf-coalesce") 

2244 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

2245 self.cleanup() 

2246 

2247 def garbageCollect(self, dryRun=False): 

2248 vdiList = self.findGarbage() 

2249 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

2250 for vdi in vdiList: 

2251 Util.log(" %s" % vdi) 

2252 if not dryRun: 

2253 self.deleteVDIs(vdiList) 

2254 self.cleanupJournals(dryRun) 

2255 

2256 def findGarbage(self): 

2257 vdiList = [] 

2258 for vdi in self.vdiTrees: 

2259 vdiList.extend(vdi.getAllPrunable()) 

2260 return vdiList 

2261 

2262 def deleteVDIs(self, vdiList) -> None: 

2263 for vdi in vdiList: 

2264 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

2265 raise AbortException("Aborting due to signal") 

2266 Util.log("Deleting unlinked VDI %s" % vdi) 

2267 self.deleteVDI(vdi) 

2268 

2269 def deleteVDI(self, vdi) -> None: 

2270 assert(len(vdi.children) == 0) 

2271 del self.vdis[vdi.uuid] 

2272 if vdi.parent: 2272 ↛ 2274line 2272 didn't jump to line 2274, because the condition on line 2272 was never false

2273 vdi.parent.children.remove(vdi) 

2274 if vdi in self.vdiTrees: 2274 ↛ 2275line 2274 didn't jump to line 2275, because the condition on line 2274 was never true

2275 self.vdiTrees.remove(vdi) 

2276 vdi.delete() 

2277 

2278 def forgetVDI(self, vdiUuid) -> None: 

2279 self.xapi.forgetVDI(self.uuid, vdiUuid) 

2280 

2281 def pauseVDIs(self, vdiList) -> None: 

2282 paused = [] 

2283 failed = False 

2284 for vdi in vdiList: 

2285 try: 

2286 vdi.pause() 

2287 paused.append(vdi) 

2288 except: 

2289 Util.logException("pauseVDIs") 

2290 failed = True 

2291 break 

2292 

2293 if failed: 

2294 self.unpauseVDIs(paused) 

2295 raise util.SMException("Failed to pause VDIs") 

2296 

2297 def unpauseVDIs(self, vdiList): 

2298 failed = False 

2299 for vdi in vdiList: 

2300 try: 

2301 vdi.unpause() 

2302 except: 

2303 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

2304 failed = True 

2305 if failed: 

2306 raise util.SMException("Failed to unpause VDIs") 

2307 

2308 def getFreeSpace(self) -> int: 

2309 return 0 

2310 

2311 def cleanup(self): 

2312 Util.log("In cleanup") 

2313 return 

2314 

2315 @override 

2316 def __str__(self) -> str: 

2317 if self.name: 

2318 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

2319 else: 

2320 ret = "%s" % self.uuid 

2321 return ret 

2322 

2323 def lock(self): 

2324 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

2325 signal to avoid deadlocking (trying to acquire the SR lock while the 

2326 lock is held by a process that is trying to abort us)""" 

2327 if not self._srLock: 

2328 return 

2329 

2330 if self._locked == 0: 

2331 abortFlag = IPCFlag(self.uuid) 

2332 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

2333 if self._srLock.acquireNoblock(): 

2334 self._locked += 1 

2335 return 

2336 if abortFlag.test(FLAG_TYPE_ABORT): 

2337 raise AbortException("Abort requested") 

2338 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2339 raise util.SMException("Unable to acquire the SR lock") 

2340 

2341 self._locked += 1 

2342 

2343 def unlock(self): 

2344 if not self._srLock: 2344 ↛ 2346line 2344 didn't jump to line 2346, because the condition on line 2344 was never false

2345 return 

2346 assert(self._locked > 0) 

2347 self._locked -= 1 

2348 if self._locked == 0: 

2349 self._srLock.release() 

2350 

2351 def needUpdateBlockInfo(self) -> bool: 

2352 for vdi in self.vdis.values(): 

2353 if vdi.scanError or len(vdi.children) == 0: 

2354 continue 

2355 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2356 return True 

2357 return False 

2358 

2359 def updateBlockInfo(self) -> None: 

2360 for vdi in self.vdis.values(): 

2361 if vdi.scanError or len(vdi.children) == 0: 

2362 continue 

2363 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2364 vdi.updateBlockInfo() 

2365 

2366 def cleanupCoalesceJournals(self): 

2367 """Remove stale coalesce VDI indicators""" 

2368 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2369 for uuid, jval in entries.items(): 

2370 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2371 

2372 def cleanupJournals(self, dryRun=False): 

2373 """delete journal entries for non-existing VDIs""" 

2374 for t in [LVHDVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2375 entries = self.journaler.getAll(t) 

2376 for uuid, jval in entries.items(): 

2377 if self.getVDI(uuid): 

2378 continue 

2379 if t == SR.JRN_CLONE: 

2380 baseUuid, clonUuid = jval.split("_") 

2381 if self.getVDI(baseUuid): 

2382 continue 

2383 Util.log(" Deleting stale '%s' journal entry for %s " 

2384 "(%s)" % (t, uuid, jval)) 

2385 if not dryRun: 

2386 self.journaler.remove(t, uuid) 

2387 

2388 def cleanupCache(self, maxAge=-1) -> int: 

2389 return 0 

2390 

2391 def _coalesce(self, vdi): 

2392 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2392 ↛ 2395line 2392 didn't jump to line 2395, because the condition on line 2392 was never true

2393 # this means we had done the actual coalescing already and just 

2394 # need to finish relinking and/or refreshing the children 

2395 Util.log("==> Coalesce apparently already done: skipping") 

2396 else: 

2397 # JRN_COALESCE is used to check which VDI is being coalesced in 

2398 # order to decide whether to abort the coalesce. We remove the 

2399 # journal as soon as the VHD coalesce step is done, because we 

2400 # don't expect the rest of the process to take long 

2401 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2402 vdi._doCoalesce() 

2403 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2404 

2405 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2406 

2407 # we now need to relink the children: lock the SR to prevent ops 

2408 # like SM.clone from manipulating the VDIs we'll be relinking and 

2409 # rescan the SR first in case the children changed since the last 

2410 # scan 

2411 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2412 

2413 self.lock() 

2414 try: 

2415 vdi.parent._tagChildrenForRelink() 

2416 self.scan() 

2417 vdi._relinkSkip() 

2418 finally: 

2419 self.unlock() 

2420 # Reload the children to leave things consistent 

2421 vdi.parent._reloadChildren(vdi) 

2422 

2423 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2424 self.deleteVDI(vdi) 

2425 

2426 class CoalesceTracker: 

2427 GRACE_ITERATIONS = 2 

2428 MAX_ITERATIONS_NO_PROGRESS = 3 

2429 MAX_ITERATIONS = 10 

2430 MAX_INCREASE_FROM_MINIMUM = 1.2 

2431 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2432 " --> Final size {finSize}" 

2433 

2434 def __init__(self, sr): 

2435 self.itsNoProgress = 0 

2436 self.its = 0 

2437 self.minSize = float("inf") 

2438 self.history = [] 

2439 self.reason = "" 

2440 self.startSize = None 

2441 self.finishSize = None 

2442 self.sr = sr 

2443 self.grace_remaining = self.GRACE_ITERATIONS 

2444 

2445 def abortCoalesce(self, prevSize, curSize): 

2446 self.its += 1 

2447 self.history.append(self.HISTORY_STRING.format(its=self.its, 

2448 initSize=prevSize, 

2449 finSize=curSize)) 

2450 

2451 self.finishSize = curSize 

2452 

2453 if self.startSize is None: 

2454 self.startSize = prevSize 

2455 

2456 if curSize < self.minSize: 

2457 self.minSize = curSize 

2458 

2459 if prevSize < self.minSize: 

2460 self.minSize = prevSize 

2461 

2462 if self.its == 1: 

2463 # Skip evaluating conditions on first iteration 

2464 return False 

2465 

2466 if prevSize < curSize: 

2467 self.itsNoProgress += 1 

2468 Util.log("No progress, attempt:" 

2469 " {attempt}".format(attempt=self.itsNoProgress)) 

2470 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2471 else: 

2472 # We made progress 

2473 return False 

2474 

2475 if self.its > self.MAX_ITERATIONS: 

2476 max = self.MAX_ITERATIONS 

2477 self.reason = \ 

2478 "Max iterations ({max}) exceeded".format(max=max) 

2479 return True 

2480 

2481 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS: 

2482 max = self.MAX_ITERATIONS_NO_PROGRESS 

2483 self.reason = \ 

2484 "No progress made for {max} iterations".format(max=max) 

2485 return True 

2486 

2487 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2488 if curSize > maxSizeFromMin: 

2489 self.grace_remaining -= 1 

2490 if self.grace_remaining == 0: 

2491 self.reason = "Unexpected bump in size," \ 

2492 " compared to minimum achieved" 

2493 

2494 return True 

2495 

2496 return False 

2497 

2498 def printSizes(self): 

2499 Util.log("Starting size was {size}" 

2500 .format(size=self.startSize)) 

2501 Util.log("Final size was {size}" 

2502 .format(size=self.finishSize)) 

2503 Util.log("Minimum size achieved was {size}" 

2504 .format(size=self.minSize)) 

2505 

2506 def printReasoning(self): 

2507 Util.log("Aborted coalesce") 

2508 for hist in self.history: 

2509 Util.log(hist) 

2510 Util.log(self.reason) 

2511 self.printSizes() 

2512 

2513 def printSummary(self): 

2514 if self.its == 0: 

2515 return 

2516 

2517 if self.reason: 2517 ↛ 2518line 2517 didn't jump to line 2518, because the condition on line 2517 was never true

2518 Util.log("Aborted coalesce") 

2519 Util.log(self.reason) 

2520 else: 

2521 Util.log("Coalesce summary") 

2522 

2523 Util.log(f"Performed {self.its} iterations") 

2524 self.printSizes() 

2525 

2526 

2527 def _coalesceLeaf(self, vdi): 

2528 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2529 complete due to external changes, namely vdi_delete and vdi_snapshot 

2530 that alter leaf-coalescibility of vdi""" 

2531 tracker = self.CoalesceTracker(self) 

2532 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2533 prevSizeVHD = vdi.getSizeVHD() 

2534 if not self._snapshotCoalesce(vdi): 2534 ↛ 2535line 2534 didn't jump to line 2535, because the condition on line 2534 was never true

2535 return False 

2536 if tracker.abortCoalesce(prevSizeVHD, vdi.getSizeVHD()): 

2537 tracker.printReasoning() 

2538 raise util.SMException("VDI {uuid} could not be coalesced" 

2539 .format(uuid=vdi.uuid)) 

2540 tracker.printSummary() 

2541 return self._liveLeafCoalesce(vdi) 

2542 

2543 def calcStorageSpeed(self, startTime, endTime, vhdSize): 

2544 speed = None 

2545 total_time = endTime - startTime 

2546 if total_time > 0: 

2547 speed = float(vhdSize) / float(total_time) 

2548 return speed 

2549 

2550 def writeSpeedToFile(self, speed): 

2551 content = [] 

2552 speedFile = None 

2553 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2554 self.lock() 

2555 try: 

2556 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2557 lines = "" 

2558 if not os.path.isfile(path): 

2559 lines = str(speed) + "\n" 

2560 else: 

2561 speedFile = open(path, "r+") 

2562 content = speedFile.readlines() 

2563 content.append(str(speed) + "\n") 

2564 if len(content) > N_RUNNING_AVERAGE: 

2565 del content[0] 

2566 lines = "".join(content) 

2567 

2568 util.atomicFileWrite(path, VAR_RUN, lines) 

2569 finally: 

2570 if speedFile is not None: 

2571 speedFile.close() 

2572 Util.log("Closing file: {myfile}".format(myfile=path)) 

2573 self.unlock() 

2574 

2575 def recordStorageSpeed(self, startTime, endTime, vhdSize): 

2576 speed = self.calcStorageSpeed(startTime, endTime, vhdSize) 

2577 if speed is None: 

2578 return 

2579 

2580 self.writeSpeedToFile(speed) 

2581 

2582 def getStorageSpeed(self): 

2583 speedFile = None 

2584 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2585 self.lock() 

2586 try: 

2587 speed = None 

2588 if os.path.isfile(path): 

2589 speedFile = open(path) 

2590 content = speedFile.readlines() 

2591 try: 

2592 content = [float(i) for i in content] 

2593 except ValueError: 

2594 Util.log("Something bad in the speed log:{log}". 

2595 format(log=speedFile.readlines())) 

2596 return speed 

2597 

2598 if len(content): 

2599 speed = sum(content) / float(len(content)) 

2600 if speed <= 0: 2600 ↛ 2602line 2600 didn't jump to line 2602, because the condition on line 2600 was never true

2601 # Defensive, should be impossible. 

2602 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2603 format(speed=speed, uuid=self.uuid)) 

2604 speed = None 

2605 else: 

2606 Util.log("Speed file empty for SR: {uuid}". 

2607 format(uuid=self.uuid)) 

2608 else: 

2609 Util.log("Speed log missing for SR: {uuid}". 

2610 format(uuid=self.uuid)) 

2611 return speed 

2612 finally: 

2613 if not (speedFile is None): 

2614 speedFile.close() 

2615 self.unlock() 

2616 

2617 def _snapshotCoalesce(self, vdi): 

2618 # Note that because we are not holding any locks here, concurrent SM 

2619 # operations may change this tree under our feet. In particular, vdi 

2620 # can be deleted, or it can be snapshotted. 

2621 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2622 Util.log("Single-snapshotting %s" % vdi) 

2623 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2624 try: 

2625 ret = self.xapi.singleSnapshotVDI(vdi) 

2626 Util.log("Single-snapshot returned: %s" % ret) 

2627 except XenAPI.Failure as e: 

2628 if util.isInvalidVDI(e): 

2629 Util.log("The VDI appears to have been concurrently deleted") 

2630 return False 

2631 raise 

2632 self.scanLocked() 

2633 tempSnap = vdi.parent 

2634 if not tempSnap.isCoalesceable(): 

2635 Util.log("The VDI appears to have been concurrently snapshotted") 

2636 return False 

2637 Util.log("Coalescing parent %s" % tempSnap) 

2638 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2639 vhdSize = vdi.getSizeVHD() 

2640 self._coalesce(tempSnap) 

2641 if not vdi.isLeafCoalesceable(): 

2642 Util.log("The VDI tree appears to have been altered since") 

2643 return False 

2644 return True 

2645 

2646 def _liveLeafCoalesce(self, vdi) -> bool: 

2647 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2648 self.lock() 

2649 try: 

2650 self.scan() 

2651 if not self.getVDI(vdi.uuid): 

2652 Util.log("The VDI appears to have been deleted meanwhile") 

2653 return False 

2654 if not vdi.isLeafCoalesceable(): 

2655 Util.log("The VDI is no longer leaf-coalesceable") 

2656 return False 

2657 

2658 uuid = vdi.uuid 

2659 vdi.pause(failfast=True) 

2660 try: 

2661 try: 

2662 # "vdi" object will no longer be valid after this call 

2663 self._doCoalesceLeaf(vdi) 

2664 except: 

2665 Util.logException("_doCoalesceLeaf") 

2666 self._handleInterruptedCoalesceLeaf() 

2667 raise 

2668 finally: 

2669 vdi = self.getVDI(uuid) 

2670 if vdi: 

2671 vdi.ensureUnpaused() 

2672 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2673 if vdiOld: 

2674 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2675 self.deleteVDI(vdiOld) 

2676 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2677 finally: 

2678 self.cleanup() 

2679 self.unlock() 

2680 self.logFilter.logState() 

2681 return True 

2682 

2683 def _doCoalesceLeaf(self, vdi): 

2684 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2685 offline/atomic context""" 

2686 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2687 self._prepareCoalesceLeaf(vdi) 

2688 vdi.parent._setHidden(False) 

2689 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2690 vdi.validate(True) 

2691 vdi.parent.validate(True) 

2692 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2693 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2694 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 

2695 Util.log("Leaf-coalesce forced, will not use timeout") 

2696 timeout = 0 

2697 vdi._coalesceVHD(timeout) 

2698 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2699 vdi.parent.validate(True) 

2700 #vdi._verifyContents(timeout / 2) 

2701 

2702 # rename 

2703 vdiUuid = vdi.uuid 

2704 oldName = vdi.fileName 

2705 origParentUuid = vdi.parent.uuid 

2706 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2707 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2708 vdi.parent.rename(vdiUuid) 

2709 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2710 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2711 

2712 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2713 # garbage 

2714 

2715 # update the VDI record 

2716 vdi.parent.delConfig(VDI.DB_VHD_PARENT) 

2717 if vdi.parent.raw: 

2718 vdi.parent.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_RAW) 

2719 vdi.parent.delConfig(VDI.DB_VHD_BLOCKS) 

2720 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2721 

2722 self._updateNode(vdi) 

2723 

2724 # delete the obsolete leaf & inflate the parent (in that order, to 

2725 # minimize free space requirements) 

2726 parent = vdi.parent 

2727 vdi._setHidden(True) 

2728 vdi.parent.children = [] 

2729 vdi.parent = None 

2730 

2731 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2732 freeSpace = self.getFreeSpace() 

2733 if freeSpace < extraSpace: 

2734 # don't delete unless we need the space: deletion is time-consuming 

2735 # because it requires contacting the slaves, and we're paused here 

2736 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2737 self.deleteVDI(vdi) 

2738 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2739 

2740 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2741 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2742 

2743 self.forgetVDI(origParentUuid) 

2744 self._finishCoalesceLeaf(parent) 

2745 self._updateSlavesOnResize(parent) 

2746 

2747 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

2748 assert(not parent.raw) # raw parents not supported 

2749 extra = child.getSizeVHD() - parent.getSizeVHD() 

2750 if extra < 0: 

2751 extra = 0 

2752 return extra 

2753 

2754 def _prepareCoalesceLeaf(self, vdi) -> None: 

2755 pass 

2756 

2757 def _updateNode(self, vdi) -> None: 

2758 pass 

2759 

2760 def _finishCoalesceLeaf(self, parent) -> None: 

2761 pass 

2762 

2763 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

2764 pass 

2765 

2766 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None: 

2767 pass 

2768 

2769 def _updateSlavesOnResize(self, vdi) -> None: 

2770 pass 

2771 

2772 def _removeStaleVDIs(self, uuidsPresent) -> None: 

2773 for uuid in list(self.vdis.keys()): 

2774 if not uuid in uuidsPresent: 

2775 Util.log("VDI %s disappeared since last scan" % \ 

2776 self.vdis[uuid]) 

2777 del self.vdis[uuid] 

2778 

2779 def _handleInterruptedCoalesceLeaf(self) -> None: 

2780 """An interrupted leaf-coalesce operation may leave the VHD tree in an 

2781 inconsistent state. If the old-leaf VDI is still present, we revert the 

2782 operation (in case the original error is persistent); otherwise we must 

2783 finish the operation""" 

2784 pass 

2785 

2786 def _buildTree(self, force): 

2787 self.vdiTrees = [] 

2788 for vdi in self.vdis.values(): 

2789 if vdi.parentUuid: 

2790 parent = self.getVDI(vdi.parentUuid) 

2791 if not parent: 

2792 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2793 self.vdiTrees.append(vdi) 

2794 continue 

2795 if force: 

2796 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2797 (vdi.parentUuid, vdi.uuid)) 

2798 self.vdiTrees.append(vdi) 

2799 continue 

2800 else: 

2801 raise util.SMException("Parent VDI %s of %s not " \ 

2802 "found" % (vdi.parentUuid, vdi.uuid)) 

2803 vdi.parent = parent 

2804 parent.children.append(vdi) 

2805 else: 

2806 self.vdiTrees.append(vdi) 

2807 

2808 

2809class FileSR(SR): 

2810 TYPE = SR.TYPE_FILE 

2811 CACHE_FILE_EXT = ".vhdcache" 

2812 # cache cleanup actions 

2813 CACHE_ACTION_KEEP = 0 

2814 CACHE_ACTION_REMOVE = 1 

2815 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

2816 

2817 def __init__(self, uuid, xapi, createLock, force): 

2818 SR.__init__(self, uuid, xapi, createLock, force) 

2819 self.path = "/var/run/sr-mount/%s" % self.uuid 

2820 self.journaler = fjournaler.Journaler(self.path) 

2821 

2822 @override 

2823 def scan(self, force=False) -> None: 

2824 if not util.pathexists(self.path): 

2825 raise util.SMException("directory %s not found!" % self.uuid) 

2826 vhds = self._scan(force) 

2827 for uuid, vhdInfo in vhds.items(): 

2828 vdi = self.getVDI(uuid) 

2829 if not vdi: 

2830 self.logFilter.logNewVDI(uuid) 

2831 vdi = FileVDI(self, uuid, False) 

2832 self.vdis[uuid] = vdi 

2833 vdi.load(vhdInfo) 

2834 uuidsPresent = list(vhds.keys()) 

2835 rawList = [x for x in os.listdir(self.path) if x.endswith(vhdutil.FILE_EXTN_RAW)] 

2836 for rawName in rawList: 

2837 uuid = FileVDI.extractUuid(rawName) 

2838 uuidsPresent.append(uuid) 

2839 vdi = self.getVDI(uuid) 

2840 if not vdi: 

2841 self.logFilter.logNewVDI(uuid) 

2842 vdi = FileVDI(self, uuid, True) 

2843 self.vdis[uuid] = vdi 

2844 self._removeStaleVDIs(uuidsPresent) 

2845 self._buildTree(force) 

2846 self.logFilter.logState() 

2847 self._handleInterruptedCoalesceLeaf() 

2848 

2849 @override 

2850 def getFreeSpace(self) -> int: 

2851 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

2852 

2853 @override 

2854 def deleteVDIs(self, vdiList) -> None: 

2855 rootDeleted = False 

2856 for vdi in vdiList: 

2857 if not vdi.parent: 

2858 rootDeleted = True 

2859 break 

2860 SR.deleteVDIs(self, vdiList) 

2861 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

2862 self.xapi.markCacheSRsDirty() 

2863 

2864 @override 

2865 def cleanupCache(self, maxAge=-1) -> int: 

2866 """Clean up IntelliCache cache files. Caches for leaf nodes are 

2867 removed when the leaf node no longer exists or its allow-caching 

2868 attribute is not set. Caches for parent nodes are removed when the 

2869 parent node no longer exists or it hasn't been used in more than 

2870 <maxAge> hours. 

2871 Return number of caches removed. 

2872 """ 

2873 numRemoved = 0 

2874 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

2875 Util.log("Found %d cache files" % len(cacheFiles)) 

2876 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

2877 for cacheFile in cacheFiles: 

2878 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

2879 action = self.CACHE_ACTION_KEEP 

2880 rec = self.xapi.getRecordVDI(uuid) 

2881 if not rec: 

2882 Util.log("Cache %s: VDI doesn't exist" % uuid) 

2883 action = self.CACHE_ACTION_REMOVE 

2884 elif rec["managed"] and not rec["allow_caching"]: 

2885 Util.log("Cache %s: caching disabled" % uuid) 

2886 action = self.CACHE_ACTION_REMOVE 

2887 elif not rec["managed"] and maxAge >= 0: 

2888 lastAccess = datetime.datetime.fromtimestamp( \ 

2889 os.path.getatime(os.path.join(self.path, cacheFile))) 

2890 if lastAccess < cutoff: 

2891 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

2892 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

2893 

2894 if action == self.CACHE_ACTION_KEEP: 

2895 Util.log("Keeping cache %s" % uuid) 

2896 continue 

2897 

2898 lockId = uuid 

2899 parentUuid = None 

2900 if rec and rec["managed"]: 

2901 parentUuid = rec["sm_config"].get("vhd-parent") 

2902 if parentUuid: 

2903 lockId = parentUuid 

2904 

2905 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

2906 cacheLock.acquire() 

2907 try: 

2908 if self._cleanupCache(uuid, action): 

2909 numRemoved += 1 

2910 finally: 

2911 cacheLock.release() 

2912 return numRemoved 

2913 

2914 def _cleanupCache(self, uuid, action): 

2915 assert(action != self.CACHE_ACTION_KEEP) 

2916 rec = self.xapi.getRecordVDI(uuid) 

2917 if rec and rec["allow_caching"]: 

2918 Util.log("Cache %s appears to have become valid" % uuid) 

2919 return False 

2920 

2921 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

2922 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

2923 if tapdisk: 

2924 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

2925 Util.log("Cache %s still in use" % uuid) 

2926 return False 

2927 Util.log("Shutting down tapdisk for %s" % fullPath) 

2928 tapdisk.shutdown() 

2929 

2930 Util.log("Deleting file %s" % fullPath) 

2931 os.unlink(fullPath) 

2932 return True 

2933 

2934 def _isCacheFileName(self, name): 

2935 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

2936 name.endswith(self.CACHE_FILE_EXT) 

2937 

2938 def _scan(self, force): 

2939 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2940 error = False 

2941 pattern = os.path.join(self.path, "*%s" % vhdutil.FILE_EXTN_VHD) 

2942 vhds = vhdutil.getAllVHDs(pattern, FileVDI.extractUuid) 

2943 for uuid, vhdInfo in vhds.items(): 

2944 if vhdInfo.error: 

2945 error = True 

2946 break 

2947 if not error: 

2948 return vhds 

2949 Util.log("Scan error on attempt %d" % i) 

2950 if force: 

2951 return vhds 

2952 raise util.SMException("Scan error") 

2953 

2954 @override 

2955 def deleteVDI(self, vdi) -> None: 

2956 self._checkSlaves(vdi) 

2957 SR.deleteVDI(self, vdi) 

2958 

2959 def _checkSlaves(self, vdi): 

2960 onlineHosts = self.xapi.getOnlineHosts() 

2961 abortFlag = IPCFlag(self.uuid) 

2962 for pbdRecord in self.xapi.getAttachedPBDs(): 

2963 hostRef = pbdRecord["host"] 

2964 if hostRef == self.xapi._hostRef: 

2965 continue 

2966 if abortFlag.test(FLAG_TYPE_ABORT): 

2967 raise AbortException("Aborting due to signal") 

2968 try: 

2969 self._checkSlave(hostRef, vdi) 

2970 except util.CommandException: 

2971 if hostRef in onlineHosts: 

2972 raise 

2973 

2974 def _checkSlave(self, hostRef, vdi): 

2975 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

2976 Util.log("Checking with slave: %s" % repr(call)) 

2977 _host = self.xapi.session.xenapi.host 

2978 text = _host.call_plugin( * call) 

2979 

2980 @override 

2981 def _handleInterruptedCoalesceLeaf(self) -> None: 

2982 entries = self.journaler.getAll(VDI.JRN_LEAF) 

2983 for uuid, parentUuid in entries.items(): 

2984 fileList = os.listdir(self.path) 

2985 childName = uuid + vhdutil.FILE_EXTN_VHD 

2986 tmpChildName = self.TMP_RENAME_PREFIX + uuid + vhdutil.FILE_EXTN_VHD 

2987 parentName1 = parentUuid + vhdutil.FILE_EXTN_VHD 

2988 parentName2 = parentUuid + vhdutil.FILE_EXTN_RAW 

2989 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

2990 if parentPresent or tmpChildName in fileList: 

2991 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

2992 else: 

2993 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

2994 self.journaler.remove(VDI.JRN_LEAF, uuid) 

2995 vdi = self.getVDI(uuid) 

2996 if vdi: 

2997 vdi.ensureUnpaused() 

2998 

2999 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3000 Util.log("*** UNDO LEAF-COALESCE") 

3001 parent = self.getVDI(parentUuid) 

3002 if not parent: 

3003 parent = self.getVDI(childUuid) 

3004 if not parent: 

3005 raise util.SMException("Neither %s nor %s found" % \ 

3006 (parentUuid, childUuid)) 

3007 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3008 parent.rename(parentUuid) 

3009 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3010 

3011 child = self.getVDI(childUuid) 

3012 if not child: 

3013 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3014 if not child: 

3015 raise util.SMException("Neither %s nor %s found" % \ 

3016 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3017 Util.log("Renaming child back to %s" % childUuid) 

3018 child.rename(childUuid) 

3019 Util.log("Updating the VDI record") 

3020 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3021 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3022 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3023 

3024 if child.isHidden(): 

3025 child._setHidden(False) 

3026 if not parent.isHidden(): 

3027 parent._setHidden(True) 

3028 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3029 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3030 Util.log("*** leaf-coalesce undo successful") 

3031 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3032 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3033 

3034 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3035 Util.log("*** FINISH LEAF-COALESCE") 

3036 vdi = self.getVDI(childUuid) 

3037 if not vdi: 

3038 raise util.SMException("VDI %s not found" % childUuid) 

3039 try: 

3040 self.forgetVDI(parentUuid) 

3041 except XenAPI.Failure: 

3042 pass 

3043 self._updateSlavesOnResize(vdi) 

3044 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3045 Util.log("*** finished leaf-coalesce successfully") 

3046 

3047 

3048class LVHDSR(SR): 

3049 TYPE = SR.TYPE_LVHD 

3050 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

3051 

3052 def __init__(self, uuid, xapi, createLock, force): 

3053 SR.__init__(self, uuid, xapi, createLock, force) 

3054 self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid) 

3055 self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName) 

3056 

3057 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid) 

3058 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref) 

3059 lvm_conf = other_conf.get('lvm-conf') if other_conf else None 

3060 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf) 

3061 

3062 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

3063 self.journaler = journaler.Journaler(self.lvmCache) 

3064 

3065 @override 

3066 def deleteVDI(self, vdi) -> None: 

3067 if self.lvActivator.get(vdi.uuid, False): 

3068 self.lvActivator.deactivate(vdi.uuid, False) 

3069 self._checkSlaves(vdi) 

3070 SR.deleteVDI(self, vdi) 

3071 

3072 @override 

3073 def forgetVDI(self, vdiUuid) -> None: 

3074 SR.forgetVDI(self, vdiUuid) 

3075 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

3076 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

3077 

3078 @override 

3079 def getFreeSpace(self) -> int: 

3080 stats = lvutil._getVGstats(self.vgName) 

3081 return stats['physical_size'] - stats['physical_utilisation'] 

3082 

3083 @override 

3084 def cleanup(self): 

3085 if not self.lvActivator.deactivateAll(): 

3086 Util.log("ERROR deactivating LVs while cleaning up") 

3087 

3088 @override 

3089 def needUpdateBlockInfo(self) -> bool: 

3090 for vdi in self.vdis.values(): 

3091 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

3092 continue 

3093 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

3094 return True 

3095 return False 

3096 

3097 @override 

3098 def updateBlockInfo(self) -> None: 

3099 numUpdated = 0 

3100 for vdi in self.vdis.values(): 

3101 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

3102 continue 

3103 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

3104 vdi.updateBlockInfo() 

3105 numUpdated += 1 

3106 if numUpdated: 

3107 # deactivate the LVs back sooner rather than later. If we don't 

3108 # now, by the time this thread gets to deactivations, another one 

3109 # might have leaf-coalesced a node and deleted it, making the child 

3110 # inherit the refcount value and preventing the correct decrement 

3111 self.cleanup() 

3112 

3113 @override 

3114 def scan(self, force=False) -> None: 

3115 vdis = self._scan(force) 

3116 for uuid, vdiInfo in vdis.items(): 

3117 vdi = self.getVDI(uuid) 

3118 if not vdi: 

3119 self.logFilter.logNewVDI(uuid) 

3120 vdi = LVHDVDI(self, uuid, 

3121 vdiInfo.vdiType == vhdutil.VDI_TYPE_RAW) 

3122 self.vdis[uuid] = vdi 

3123 vdi.load(vdiInfo) 

3124 self._removeStaleVDIs(vdis.keys()) 

3125 self._buildTree(force) 

3126 self.logFilter.logState() 

3127 self._handleInterruptedCoalesceLeaf() 

3128 

3129 def _scan(self, force): 

3130 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3131 error = False 

3132 self.lvmCache.refresh() 

3133 vdis = lvhdutil.getVDIInfo(self.lvmCache) 

3134 for uuid, vdiInfo in vdis.items(): 

3135 if vdiInfo.scanError: 

3136 error = True 

3137 break 

3138 if not error: 

3139 return vdis 

3140 Util.log("Scan error, retrying (%d)" % i) 

3141 if force: 

3142 return vdis 

3143 raise util.SMException("Scan error") 

3144 

3145 @override 

3146 def _removeStaleVDIs(self, uuidsPresent) -> None: 

3147 for uuid in list(self.vdis.keys()): 

3148 if not uuid in uuidsPresent: 

3149 Util.log("VDI %s disappeared since last scan" % \ 

3150 self.vdis[uuid]) 

3151 del self.vdis[uuid] 

3152 if self.lvActivator.get(uuid, False): 

3153 self.lvActivator.remove(uuid, False) 

3154 

3155 @override 

3156 def _liveLeafCoalesce(self, vdi) -> bool: 

3157 """If the parent is raw and the child was resized (virt. size), then 

3158 we'll need to resize the parent, which can take a while due to zeroing 

3159 out of the extended portion of the LV. Do it before pausing the child 

3160 to avoid a protracted downtime""" 

3161 if vdi.parent.raw and vdi.sizeVirt > vdi.parent.sizeVirt: 

3162 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3163 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

3164 

3165 return SR._liveLeafCoalesce(self, vdi) 

3166 

3167 @override 

3168 def _prepareCoalesceLeaf(self, vdi) -> None: 

3169 vdi._activateChain() 

3170 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3171 vdi.deflate() 

3172 vdi.inflateParentForCoalesce() 

3173 

3174 @override 

3175 def _updateNode(self, vdi) -> None: 

3176 # fix the refcounts: the remaining node should inherit the binary 

3177 # refcount from the leaf (because if it was online, it should remain 

3178 # refcounted as such), but the normal refcount from the parent (because 

3179 # this node is really the parent node) - minus 1 if it is online (since 

3180 # non-leaf nodes increment their normal counts when they are online and 

3181 # we are now a leaf, storing that 1 in the binary refcount). 

3182 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

3183 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

3184 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

3185 pCnt = pCnt - cBcnt 

3186 assert(pCnt >= 0) 

3187 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

3188 

3189 @override 

3190 def _finishCoalesceLeaf(self, parent) -> None: 

3191 if not parent.isSnapshot() or parent.isAttachedRW(): 

3192 parent.inflateFully() 

3193 else: 

3194 parent.deflate() 

3195 

3196 @override 

3197 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3198 return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV 

3199 

3200 @override 

3201 def _handleInterruptedCoalesceLeaf(self) -> None: 

3202 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3203 for uuid, parentUuid in entries.items(): 

3204 childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid 

3205 tmpChildLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

3206 self.TMP_RENAME_PREFIX + uuid 

3207 parentLV1 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + parentUuid 

3208 parentLV2 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + parentUuid 

3209 parentPresent = (self.lvmCache.checkLV(parentLV1) or \ 

3210 self.lvmCache.checkLV(parentLV2)) 

3211 if parentPresent or self.lvmCache.checkLV(tmpChildLV): 

3212 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3213 else: 

3214 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3215 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3216 vdi = self.getVDI(uuid) 

3217 if vdi: 

3218 vdi.ensureUnpaused() 

3219 

3220 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3221 Util.log("*** UNDO LEAF-COALESCE") 

3222 parent = self.getVDI(parentUuid) 

3223 if not parent: 

3224 parent = self.getVDI(childUuid) 

3225 if not parent: 

3226 raise util.SMException("Neither %s nor %s found" % \ 

3227 (parentUuid, childUuid)) 

3228 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3229 parent.rename(parentUuid) 

3230 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3231 

3232 child = self.getVDI(childUuid) 

3233 if not child: 

3234 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3235 if not child: 

3236 raise util.SMException("Neither %s nor %s found" % \ 

3237 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3238 Util.log("Renaming child back to %s" % childUuid) 

3239 child.rename(childUuid) 

3240 Util.log("Updating the VDI record") 

3241 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3242 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3243 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3244 

3245 # refcount (best effort - assume that it had succeeded if the 

3246 # second rename succeeded; if not, this adjustment will be wrong, 

3247 # leading to a non-deactivation of the LV) 

3248 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

3249 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

3250 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

3251 pCnt = pCnt + cBcnt 

3252 RefCounter.set(parent.uuid, pCnt, 0, ns) 

3253 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

3254 

3255 parent.deflate() 

3256 child.inflateFully() 

3257 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

3258 if child.isHidden(): 

3259 child._setHidden(False) 

3260 if not parent.isHidden(): 

3261 parent._setHidden(True) 

3262 if not parent.lvReadonly: 

3263 self.lvmCache.setReadonly(parent.fileName, True) 

3264 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3265 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3266 Util.log("*** leaf-coalesce undo successful") 

3267 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3268 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3269 

3270 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3271 Util.log("*** FINISH LEAF-COALESCE") 

3272 vdi = self.getVDI(childUuid) 

3273 if not vdi: 

3274 raise util.SMException("VDI %s not found" % childUuid) 

3275 vdi.inflateFully() 

3276 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

3277 try: 

3278 self.forgetVDI(parentUuid) 

3279 except XenAPI.Failure: 

3280 pass 

3281 self._updateSlavesOnResize(vdi) 

3282 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3283 Util.log("*** finished leaf-coalesce successfully") 

3284 

3285 def _checkSlaves(self, vdi): 

3286 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

3287 try to check all slaves, including those that the Agent believes are 

3288 offline, but ignore failures for offline hosts. This is to avoid cases 

3289 where the Agent thinks a host is offline but the host is up.""" 

3290 args = {"vgName": self.vgName, 

3291 "action1": "deactivateNoRefcount", 

3292 "lvName1": vdi.fileName, 

3293 "action2": "cleanupLockAndRefcount", 

3294 "uuid2": vdi.uuid, 

3295 "ns2": lvhdutil.NS_PREFIX_LVM + self.uuid} 

3296 onlineHosts = self.xapi.getOnlineHosts() 

3297 abortFlag = IPCFlag(self.uuid) 

3298 for pbdRecord in self.xapi.getAttachedPBDs(): 

3299 hostRef = pbdRecord["host"] 

3300 if hostRef == self.xapi._hostRef: 

3301 continue 

3302 if abortFlag.test(FLAG_TYPE_ABORT): 

3303 raise AbortException("Aborting due to signal") 

3304 Util.log("Checking with slave %s (path %s)" % ( 

3305 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

3306 try: 

3307 self.xapi.ensureInactive(hostRef, args) 

3308 except XenAPI.Failure: 

3309 if hostRef in onlineHosts: 

3310 raise 

3311 

3312 @override 

3313 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

3314 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

3315 if not slaves: 

3316 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

3317 child) 

3318 return 

3319 

3320 tmpName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

3321 self.TMP_RENAME_PREFIX + child.uuid 

3322 args = {"vgName": self.vgName, 

3323 "action1": "deactivateNoRefcount", 

3324 "lvName1": tmpName, 

3325 "action2": "deactivateNoRefcount", 

3326 "lvName2": child.fileName, 

3327 "action3": "refresh", 

3328 "lvName3": child.fileName, 

3329 "action4": "refresh", 

3330 "lvName4": parent.fileName} 

3331 for slave in slaves: 

3332 Util.log("Updating %s, %s, %s on slave %s" % \ 

3333 (tmpName, child.fileName, parent.fileName, 

3334 self.xapi.getRecordHost(slave)['hostname'])) 

3335 text = self.xapi.session.xenapi.host.call_plugin( \ 

3336 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3337 Util.log("call-plugin returned: '%s'" % text) 

3338 

3339 @override 

3340 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None: 

3341 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

3342 if not slaves: 

3343 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

3344 return 

3345 

3346 args = {"vgName": self.vgName, 

3347 "action1": "deactivateNoRefcount", 

3348 "lvName1": oldNameLV, 

3349 "action2": "refresh", 

3350 "lvName2": vdi.fileName, 

3351 "action3": "cleanupLockAndRefcount", 

3352 "uuid3": origParentUuid, 

3353 "ns3": lvhdutil.NS_PREFIX_LVM + self.uuid} 

3354 for slave in slaves: 

3355 Util.log("Updating %s to %s on slave %s" % \ 

3356 (oldNameLV, vdi.fileName, 

3357 self.xapi.getRecordHost(slave)['hostname'])) 

3358 text = self.xapi.session.xenapi.host.call_plugin( \ 

3359 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3360 Util.log("call-plugin returned: '%s'" % text) 

3361 

3362 @override 

3363 def _updateSlavesOnResize(self, vdi) -> None: 

3364 uuids = [x.uuid for x in vdi.getAllLeaves()] 

3365 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

3366 if not slaves: 

3367 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

3368 return 

3369 lvhdutil.lvRefreshOnSlaves(self.xapi.session, self.uuid, self.vgName, 

3370 vdi.fileName, vdi.uuid, slaves) 

3371 

3372 

3373class LinstorSR(SR): 

3374 TYPE = SR.TYPE_LINSTOR 

3375 

3376 def __init__(self, uuid, xapi, createLock, force): 

3377 if not LINSTOR_AVAILABLE: 

3378 raise util.SMException( 

3379 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

3380 ) 

3381 

3382 SR.__init__(self, uuid, xapi, createLock, force) 

3383 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

3384 self._reloadLinstor(journaler_only=True) 

3385 

3386 @override 

3387 def deleteVDI(self, vdi) -> None: 

3388 self._checkSlaves(vdi) 

3389 SR.deleteVDI(self, vdi) 

3390 

3391 @override 

3392 def getFreeSpace(self) -> int: 

3393 return self._linstor.max_volume_size_allowed 

3394 

3395 @override 

3396 def scan(self, force=False) -> None: 

3397 all_vdi_info = self._scan(force) 

3398 for uuid, vdiInfo in all_vdi_info.items(): 

3399 # When vdiInfo is None, the VDI is RAW. 

3400 vdi = self.getVDI(uuid) 

3401 if not vdi: 

3402 self.logFilter.logNewVDI(uuid) 

3403 vdi = LinstorVDI(self, uuid, not vdiInfo) 

3404 self.vdis[uuid] = vdi 

3405 if vdiInfo: 

3406 vdi.load(vdiInfo) 

3407 self._removeStaleVDIs(all_vdi_info.keys()) 

3408 self._buildTree(force) 

3409 self.logFilter.logState() 

3410 self._handleInterruptedCoalesceLeaf() 

3411 

3412 @override 

3413 def pauseVDIs(self, vdiList) -> None: 

3414 self._linstor.ensure_volume_list_is_not_locked( 

3415 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3416 ) 

3417 return super(LinstorSR, self).pauseVDIs(vdiList) 

3418 

3419 def _reloadLinstor(self, journaler_only=False): 

3420 session = self.xapi.session 

3421 host_ref = util.get_this_host_ref(session) 

3422 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3423 

3424 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3425 if pbd is None: 

3426 raise util.SMException('Failed to find PBD') 

3427 

3428 dconf = session.xenapi.PBD.get_device_config(pbd) 

3429 group_name = dconf['group-name'] 

3430 

3431 controller_uri = get_controller_uri() 

3432 self.journaler = LinstorJournaler( 

3433 controller_uri, group_name, logger=util.SMlog 

3434 ) 

3435 

3436 if journaler_only: 

3437 return 

3438 

3439 self._linstor = LinstorVolumeManager( 

3440 controller_uri, 

3441 group_name, 

3442 repair=True, 

3443 logger=util.SMlog 

3444 ) 

3445 self._vhdutil = LinstorVhdUtil(session, self._linstor) 

3446 

3447 def _scan(self, force): 

3448 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3449 self._reloadLinstor() 

3450 error = False 

3451 try: 

3452 all_vdi_info = self._load_vdi_info() 

3453 for uuid, vdiInfo in all_vdi_info.items(): 

3454 if vdiInfo and vdiInfo.error: 

3455 error = True 

3456 break 

3457 if not error: 

3458 return all_vdi_info 

3459 Util.log('Scan error, retrying ({})'.format(i)) 

3460 except Exception as e: 

3461 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3462 Util.log(traceback.format_exc()) 

3463 

3464 if force: 

3465 return all_vdi_info 

3466 raise util.SMException('Scan error') 

3467 

3468 def _load_vdi_info(self): 

3469 all_vdi_info = {} 

3470 

3471 # TODO: Ensure metadata contains the right info. 

3472 

3473 all_volume_info = self._linstor.get_volumes_with_info() 

3474 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3475 for vdi_uuid, volume_info in all_volume_info.items(): 

3476 try: 

3477 volume_metadata = volumes_metadata[vdi_uuid] 

3478 if not volume_info.name and not list(volume_metadata.items()): 

3479 continue # Ignore it, probably deleted. 

3480 

3481 if vdi_uuid.startswith('DELETED_'): 

3482 # Assume it's really a RAW volume of a failed snap without VHD header/footer. 

3483 # We must remove this VDI now without adding it in the VDI list. 

3484 # Otherwise `Relinking` calls and other actions can be launched on it. 

3485 # We don't want that... 

3486 Util.log('Deleting bad VDI {}'.format(vdi_uuid)) 

3487 

3488 self.lock() 

3489 try: 

3490 self._linstor.destroy_volume(vdi_uuid) 

3491 try: 

3492 self.forgetVDI(vdi_uuid) 

3493 except: 

3494 pass 

3495 except Exception as e: 

3496 Util.log('Cannot delete bad VDI: {}'.format(e)) 

3497 finally: 

3498 self.unlock() 

3499 continue 

3500 

3501 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

3502 volume_name = self._linstor.get_volume_name(vdi_uuid) 

3503 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX): 

3504 # Always RAW! 

3505 info = None 

3506 elif vdi_type == vhdutil.VDI_TYPE_VHD: 

3507 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3508 else: 

3509 # Ensure it's not a VHD... 

3510 try: 

3511 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3512 except: 

3513 try: 

3514 self._vhdutil.force_repair( 

3515 self._linstor.get_device_path(vdi_uuid) 

3516 ) 

3517 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3518 except: 

3519 info = None 

3520 

3521 except Exception as e: 

3522 Util.log( 

3523 ' [VDI {}: failed to load VDI info]: {}' 

3524 .format(vdi_uuid, e) 

3525 ) 

3526 info = vhdutil.VHDInfo(vdi_uuid) 

3527 info.error = 1 

3528 

3529 all_vdi_info[vdi_uuid] = info 

3530 

3531 return all_vdi_info 

3532 

3533 @override 

3534 def _prepareCoalesceLeaf(self, vdi) -> None: 

3535 vdi._activateChain() 

3536 vdi.deflate() 

3537 vdi._inflateParentForCoalesce() 

3538 

3539 @override 

3540 def _finishCoalesceLeaf(self, parent) -> None: 

3541 if not parent.isSnapshot() or parent.isAttachedRW(): 

3542 parent.inflateFully() 

3543 else: 

3544 parent.deflate() 

3545 

3546 @override 

3547 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3548 return LinstorVhdUtil.compute_volume_size(parent.sizeVirt, parent.vdi_type) - parent.getDrbdSize() 

3549 

3550 def _hasValidDevicePath(self, uuid): 

3551 try: 

3552 self._linstor.get_device_path(uuid) 

3553 except Exception: 

3554 # TODO: Maybe log exception. 

3555 return False 

3556 return True 

3557 

3558 @override 

3559 def _liveLeafCoalesce(self, vdi) -> bool: 

3560 self.lock() 

3561 try: 

3562 self._linstor.ensure_volume_is_not_locked( 

3563 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3564 ) 

3565 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3566 finally: 

3567 self.unlock() 

3568 

3569 @override 

3570 def _handleInterruptedCoalesceLeaf(self) -> None: 

3571 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3572 for uuid, parentUuid in entries.items(): 

3573 if self._hasValidDevicePath(parentUuid) or \ 

3574 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3575 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3576 else: 

3577 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3578 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3579 vdi = self.getVDI(uuid) 

3580 if vdi: 

3581 vdi.ensureUnpaused() 

3582 

3583 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3584 Util.log('*** UNDO LEAF-COALESCE') 

3585 parent = self.getVDI(parentUuid) 

3586 if not parent: 

3587 parent = self.getVDI(childUuid) 

3588 if not parent: 

3589 raise util.SMException( 

3590 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3591 ) 

3592 Util.log( 

3593 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3594 ) 

3595 parent.rename(parentUuid) 

3596 

3597 child = self.getVDI(childUuid) 

3598 if not child: 

3599 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3600 if not child: 

3601 raise util.SMException( 

3602 'Neither {} nor {} found'.format( 

3603 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3604 ) 

3605 ) 

3606 Util.log('Renaming child back to {}'.format(childUuid)) 

3607 child.rename(childUuid) 

3608 Util.log('Updating the VDI record') 

3609 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3610 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3611 

3612 # TODO: Maybe deflate here. 

3613 

3614 if child.isHidden(): 

3615 child._setHidden(False) 

3616 if not parent.isHidden(): 

3617 parent._setHidden(True) 

3618 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3619 Util.log('*** leaf-coalesce undo successful') 

3620 

3621 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3622 Util.log('*** FINISH LEAF-COALESCE') 

3623 vdi = self.getVDI(childUuid) 

3624 if not vdi: 

3625 raise util.SMException('VDI {} not found'.format(childUuid)) 

3626 # TODO: Maybe inflate. 

3627 try: 

3628 self.forgetVDI(parentUuid) 

3629 except XenAPI.Failure: 

3630 pass 

3631 self._updateSlavesOnResize(vdi) 

3632 Util.log('*** finished leaf-coalesce successfully') 

3633 

3634 def _checkSlaves(self, vdi): 

3635 try: 

3636 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3637 for openers in all_openers.values(): 

3638 for opener in openers.values(): 

3639 if opener['process-name'] != 'tapdisk': 

3640 raise util.SMException( 

3641 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3642 ) 

3643 except LinstorVolumeManagerError as e: 

3644 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3645 raise 

3646 

3647 

3648################################################################################ 

3649# 

3650# Helpers 

3651# 

3652def daemonize(): 

3653 pid = os.fork() 

3654 if pid: 

3655 os.waitpid(pid, 0) 

3656 Util.log("New PID [%d]" % pid) 

3657 return False 

3658 os.chdir("/") 

3659 os.setsid() 

3660 pid = os.fork() 

3661 if pid: 

3662 Util.log("Will finish as PID [%d]" % pid) 

3663 os._exit(0) 

3664 for fd in [0, 1, 2]: 

3665 try: 

3666 os.close(fd) 

3667 except OSError: 

3668 pass 

3669 # we need to fill those special fd numbers or pread won't work 

3670 sys.stdin = open("/dev/null", 'r') 

3671 sys.stderr = open("/dev/null", 'w') 

3672 sys.stdout = open("/dev/null", 'w') 

3673 # As we're a new process we need to clear the lock objects 

3674 lock.Lock.clearAll() 

3675 return True 

3676 

3677 

3678def normalizeType(type): 

3679 if type in LVHDSR.SUBTYPES: 

3680 type = SR.TYPE_LVHD 

3681 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3682 # temporary while LVHD is symlinked as LVM 

3683 type = SR.TYPE_LVHD 

3684 if type in [ 

3685 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3686 "moosefs", "xfs", "zfs", "largeblock" 

3687 ]: 

3688 type = SR.TYPE_FILE 

3689 if type in ["linstor"]: 

3690 type = SR.TYPE_LINSTOR 

3691 if type not in SR.TYPES: 

3692 raise util.SMException("Unsupported SR type: %s" % type) 

3693 return type 

3694 

3695GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3696 

3697 

3698def _gc_init_file(sr_uuid): 

3699 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3700 

3701 

3702def _create_init_file(sr_uuid): 

3703 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3704 with open(os.path.join( 

3705 NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init'), 'w+') as f: 

3706 f.write('1') 

3707 

3708 

3709def _gcLoopPause(sr, dryRun=False, immediate=False): 

3710 if immediate: 

3711 return 

3712 

3713 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3714 # point will just return. Otherwise, fall back on an abortable sleep. 

3715 

3716 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3717 

3718 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3718 ↛ exitline 3718 didn't jump to the function exit

3719 lambda *args: None) 

3720 elif os.path.exists(_gc_init_file(sr.uuid)): 

3721 def abortTest(): 

3722 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3723 

3724 # If time.sleep hangs we are in deep trouble, however for 

3725 # completeness we set the timeout of the abort thread to 

3726 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3727 Util.log("GC active, about to go quiet") 

3728 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3728 ↛ exitline 3728 didn't run the lambda on line 3728

3729 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3730 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3731 Util.log("GC active, quiet period ended") 

3732 

3733 

3734def _gcLoop(sr, dryRun=False, immediate=False): 

3735 if not lockGCActive.acquireNoblock(): 3735 ↛ 3736line 3735 didn't jump to line 3736, because the condition on line 3735 was never true

3736 Util.log("Another GC instance already active, exiting") 

3737 return 

3738 

3739 # Check we're still attached after acquiring locks 

3740 if not sr.xapi.isPluggedHere(): 

3741 Util.log("SR no longer attached, exiting") 

3742 return 

3743 

3744 # Clean up Intellicache files 

3745 sr.cleanupCache() 

3746 

3747 # Track how many we do 

3748 coalesced = 0 

3749 task_status = "success" 

3750 try: 

3751 # Check if any work needs to be done 

3752 if not sr.xapi.isPluggedHere(): 3752 ↛ 3753line 3752 didn't jump to line 3753, because the condition on line 3752 was never true

3753 Util.log("SR no longer attached, exiting") 

3754 return 

3755 sr.scanLocked() 

3756 if not sr.hasWork(): 

3757 Util.log("No work, exiting") 

3758 return 

3759 sr.xapi.create_task( 

3760 "Garbage Collection", 

3761 "Garbage collection for SR %s" % sr.uuid) 

3762 _gcLoopPause(sr, dryRun, immediate=immediate) 

3763 while True: 

3764 if SIGTERM: 

3765 Util.log("Term requested") 

3766 return 

3767 

3768 if not sr.xapi.isPluggedHere(): 3768 ↛ 3769line 3768 didn't jump to line 3769, because the condition on line 3768 was never true

3769 Util.log("SR no longer attached, exiting") 

3770 break 

3771 sr.scanLocked() 

3772 if not sr.hasWork(): 

3773 Util.log("No work, exiting") 

3774 break 

3775 

3776 if not lockGCRunning.acquireNoblock(): 3776 ↛ 3777line 3776 didn't jump to line 3777, because the condition on line 3776 was never true

3777 Util.log("Unable to acquire GC running lock.") 

3778 return 

3779 try: 

3780 if not sr.gcEnabled(): 3780 ↛ 3781line 3780 didn't jump to line 3781, because the condition on line 3780 was never true

3781 break 

3782 

3783 sr.xapi.update_task_progress("done", coalesced) 

3784 

3785 sr.cleanupCoalesceJournals() 

3786 # Create the init file here in case startup is waiting on it 

3787 _create_init_file(sr.uuid) 

3788 sr.scanLocked() 

3789 sr.updateBlockInfo() 

3790 

3791 howmany = len(sr.findGarbage()) 

3792 if howmany > 0: 

3793 Util.log("Found %d orphaned vdis" % howmany) 

3794 sr.lock() 

3795 try: 

3796 sr.garbageCollect(dryRun) 

3797 finally: 

3798 sr.unlock() 

3799 sr.xapi.srUpdate() 

3800 

3801 candidate = sr.findCoalesceable() 

3802 if candidate: 

3803 util.fistpoint.activate( 

3804 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

3805 sr.coalesce(candidate, dryRun) 

3806 sr.xapi.srUpdate() 

3807 coalesced += 1 

3808 continue 

3809 

3810 candidate = sr.findLeafCoalesceable() 

3811 if candidate: 3811 ↛ 3818line 3811 didn't jump to line 3818, because the condition on line 3811 was never false

3812 sr.coalesceLeaf(candidate, dryRun) 

3813 sr.xapi.srUpdate() 

3814 coalesced += 1 

3815 continue 

3816 

3817 finally: 

3818 lockGCRunning.release() 3818 ↛ 3823line 3818 didn't jump to line 3823, because the break on line 3781 wasn't executed

3819 except: 

3820 task_status = "failure" 

3821 raise 

3822 finally: 

3823 sr.xapi.set_task_status(task_status) 

3824 Util.log("GC process exiting, no work left") 

3825 _create_init_file(sr.uuid) 

3826 lockGCActive.release() 

3827 

3828 

3829def _xapi_enabled(session, hostref): 

3830 host = session.xenapi.host.get_record(hostref) 

3831 return host['enabled'] 

3832 

3833 

3834def _ensure_xapi_initialised(session): 

3835 """ 

3836 Don't want to start GC until Xapi is fully initialised 

3837 """ 

3838 local_session = None 

3839 if session is None: 

3840 local_session = util.get_localAPI_session() 

3841 session = local_session 

3842 

3843 try: 

3844 hostref = session.xenapi.host.get_by_uuid(util.get_this_host()) 

3845 while not _xapi_enabled(session, hostref): 

3846 util.SMlog("Xapi not ready, GC waiting") 

3847 time.sleep(15) 

3848 finally: 

3849 if local_session is not None: 

3850 local_session.xenapi.session.logout() 

3851 

3852def _gc(session, srUuid, dryRun=False, immediate=False): 

3853 init(srUuid) 

3854 _ensure_xapi_initialised(session) 

3855 sr = SR.getInstance(srUuid, session) 

3856 if not sr.gcEnabled(False): 3856 ↛ 3857line 3856 didn't jump to line 3857, because the condition on line 3856 was never true

3857 return 

3858 

3859 try: 

3860 _gcLoop(sr, dryRun, immediate=immediate) 

3861 finally: 

3862 sr.check_no_space_candidates() 

3863 sr.cleanup() 

3864 sr.logFilter.logState() 

3865 del sr.xapi 

3866 

3867 

3868def _abort(srUuid, soft=False): 

3869 """Aborts an GC/coalesce. 

3870 

3871 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

3872 soft: If set to True and there is a pending abort signal, the function 

3873 doesn't do anything. If set to False, a new abort signal is issued. 

3874 

3875 returns: If soft is set to False, we return True holding lockGCActive. If 

3876 soft is set to False and an abort signal is pending, we return False 

3877 without holding lockGCActive. An exception is raised in case of error.""" 

3878 Util.log("=== SR %s: abort ===" % (srUuid)) 

3879 init(srUuid) 

3880 if not lockGCActive.acquireNoblock(): 

3881 gotLock = False 

3882 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

3883 abortFlag = IPCFlag(srUuid) 

3884 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

3885 return False 

3886 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

3887 gotLock = lockGCActive.acquireNoblock() 

3888 if gotLock: 

3889 break 

3890 time.sleep(SR.LOCK_RETRY_INTERVAL) 

3891 abortFlag.clear(FLAG_TYPE_ABORT) 

3892 if not gotLock: 

3893 raise util.CommandException(code=errno.ETIMEDOUT, 

3894 reason="SR %s: error aborting existing process" % srUuid) 

3895 return True 

3896 

3897 

3898def init(srUuid): 

3899 global lockGCRunning 

3900 if not lockGCRunning: 3900 ↛ 3901line 3900 didn't jump to line 3901, because the condition on line 3900 was never true

3901 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid) 

3902 global lockGCActive 

3903 if not lockGCActive: 3903 ↛ 3904line 3903 didn't jump to line 3904, because the condition on line 3903 was never true

3904 lockGCActive = LockActive(srUuid) 

3905 

3906 

3907class LockActive: 

3908 """ 

3909 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired 

3910 if another process holds the SR lock. 

3911 """ 

3912 def __init__(self, srUuid): 

3913 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

3914 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, srUuid) 

3915 

3916 def acquireNoblock(self): 

3917 self._srLock.acquire() 

3918 

3919 try: 

3920 return self._lock.acquireNoblock() 

3921 finally: 

3922 self._srLock.release() 

3923 

3924 def release(self): 

3925 self._lock.release() 

3926 

3927 

3928def usage(): 

3929 output = """Garbage collect and/or coalesce VHDs in a VHD-based SR 

3930 

3931Parameters: 

3932 -u --uuid UUID SR UUID 

3933 and one of: 

3934 -g --gc garbage collect, coalesce, and repeat while there is work 

3935 -G --gc_force garbage collect once, aborting any current operations 

3936 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

3937 max_age hours 

3938 -a --abort abort any currently running operation (GC or coalesce) 

3939 -q --query query the current state (GC'ing, coalescing or not running) 

3940 -x --disable disable GC/coalesce (will be in effect until you exit) 

3941 -t --debug see Debug below 

3942 

3943Options: 

3944 -b --background run in background (return immediately) (valid for -g only) 

3945 -f --force continue in the presence of VHDs with errors (when doing 

3946 GC, this might cause removal of any such VHDs) (only valid 

3947 for -G) (DANGEROUS) 

3948 

3949Debug: 

3950 The --debug parameter enables manipulation of LVHD VDIs for debugging 

3951 purposes. ** NEVER USE IT ON A LIVE VM ** 

3952 The following parameters are required: 

3953 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

3954 "deflate". 

3955 -v --vdi_uuid VDI UUID 

3956 """ 

3957 #-d --dry-run don't actually perform any SR-modifying operations 

3958 print(output) 

3959 Util.log("(Invalid usage)") 

3960 sys.exit(1) 

3961 

3962 

3963############################################################################## 

3964# 

3965# API 

3966# 

3967def abort(srUuid, soft=False): 

3968 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

3969 """ 

3970 if _abort(srUuid, soft): 

3971 Util.log("abort: releasing the process lock") 

3972 lockGCActive.release() 

3973 return True 

3974 else: 

3975 return False 

3976 

3977 

3978def gc(session, srUuid, inBackground, dryRun=False): 

3979 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

3980 immediately if inBackground=True. 

3981 

3982 The following algorithm is used: 

3983 1. If we are already GC'ing in this SR, return 

3984 2. If we are already coalescing a VDI pair: 

3985 a. Scan the SR and determine if the VDI pair is GC'able 

3986 b. If the pair is not GC'able, return 

3987 c. If the pair is GC'able, abort coalesce 

3988 3. Scan the SR 

3989 4. If there is nothing to collect, nor to coalesce, return 

3990 5. If there is something to collect, GC all, then goto 3 

3991 6. If there is something to coalesce, coalesce one pair, then goto 3 

3992 """ 

3993 Util.log("=== SR %s: gc ===" % srUuid) 

3994 

3995 signal.signal(signal.SIGTERM, receiveSignal) 

3996 

3997 if inBackground: 

3998 if daemonize(): 3998 ↛ exitline 3998 didn't return from function 'gc', because the condition on line 3998 was never false

3999 # we are now running in the background. Catch & log any errors 

4000 # because there is no other way to propagate them back at this 

4001 # point 

4002 

4003 try: 

4004 _gc(None, srUuid, dryRun) 

4005 except AbortException: 

4006 Util.log("Aborted") 

4007 except Exception: 

4008 Util.logException("gc") 

4009 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

4010 os._exit(0) 

4011 else: 

4012 _gc(session, srUuid, dryRun, immediate=True) 

4013 

4014 

4015def start_gc(session, sr_uuid): 

4016 """ 

4017 This function is used to try to start a backgrounded GC session by forking 

4018 the current process. If using the systemd version, call start_gc_service() instead. 

4019 """ 

4020 # don't bother if an instance already running (this is just an 

4021 # optimization to reduce the overhead of forking a new process if we 

4022 # don't have to, but the process will check the lock anyways) 

4023 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid) 

4024 if not lockRunning.acquireNoblock(): 

4025 if should_preempt(session, sr_uuid): 

4026 util.SMlog("Aborting currently-running coalesce of garbage VDI") 

4027 try: 

4028 if not abort(sr_uuid, soft=True): 

4029 util.SMlog("The GC has already been scheduled to re-start") 

4030 except util.CommandException as e: 

4031 if e.code != errno.ETIMEDOUT: 

4032 raise 

4033 util.SMlog('failed to abort the GC') 

4034 else: 

4035 util.SMlog("A GC instance already running, not kicking") 

4036 return 

4037 else: 

4038 lockRunning.release() 

4039 

4040 util.SMlog(f"Starting GC file is {__file__}") 

4041 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'], 

4042 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4043 

4044def start_gc_service(sr_uuid, wait=False): 

4045 """ 

4046 This starts the templated systemd service which runs GC on the given SR UUID. 

4047 If the service was already started, this is a no-op. 

4048 

4049 Because the service is a one-shot with RemainAfterExit=no, when called with 

4050 wait=True this will run the service synchronously and will not return until the 

4051 run has finished. This is used to force a run of the GC instead of just kicking it 

4052 in the background. 

4053 """ 

4054 sr_uuid_esc = sr_uuid.replace("-", "\\x2d") 

4055 util.SMlog(f"Kicking SMGC@{sr_uuid}...") 

4056 cmd=[ "/usr/bin/systemctl", "--quiet" ] 

4057 if not wait: 4057 ↛ 4059line 4057 didn't jump to line 4059, because the condition on line 4057 was never false

4058 cmd.append("--no-block") 

4059 cmd += ["start", f"SMGC@{sr_uuid_esc}"] 

4060 subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4061 

4062 

4063def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

4064 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

4065 the SR lock is held. 

4066 The following algorithm is used: 

4067 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

4068 2. Scan the SR 

4069 3. GC 

4070 4. return 

4071 """ 

4072 Util.log("=== SR %s: gc_force ===" % srUuid) 

4073 init(srUuid) 

4074 sr = SR.getInstance(srUuid, session, lockSR, True) 

4075 if not lockGCActive.acquireNoblock(): 

4076 abort(srUuid) 

4077 else: 

4078 Util.log("Nothing was running, clear to proceed") 

4079 

4080 if force: 

4081 Util.log("FORCED: will continue even if there are VHD errors") 

4082 sr.scanLocked(force) 

4083 sr.cleanupCoalesceJournals() 

4084 

4085 try: 

4086 sr.cleanupCache() 

4087 sr.garbageCollect(dryRun) 

4088 finally: 

4089 sr.cleanup() 

4090 sr.logFilter.logState() 

4091 lockGCActive.release() 

4092 

4093 

4094def get_state(srUuid): 

4095 """Return whether GC/coalesce is currently running or not. This asks systemd for 

4096 the state of the templated SMGC service and will return True if it is "activating" 

4097 or "running" (for completeness, as in practice it will never achieve the latter state) 

4098 """ 

4099 sr_uuid_esc = srUuid.replace("-", "\\x2d") 

4100 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"] 

4101 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4102 state = result.stdout.decode('utf-8').rstrip() 

4103 if state == "activating" or state == "running": 

4104 return True 

4105 return False 

4106 

4107 

4108def should_preempt(session, srUuid): 

4109 sr = SR.getInstance(srUuid, session) 

4110 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

4111 if len(entries) == 0: 

4112 return False 

4113 elif len(entries) > 1: 

4114 raise util.SMException("More than one coalesce entry: " + str(entries)) 

4115 sr.scanLocked() 

4116 coalescedUuid = entries.popitem()[0] 

4117 garbage = sr.findGarbage() 

4118 for vdi in garbage: 

4119 if vdi.uuid == coalescedUuid: 

4120 return True 

4121 return False 

4122 

4123 

4124def get_coalesceable_leaves(session, srUuid, vdiUuids): 

4125 coalesceable = [] 

4126 sr = SR.getInstance(srUuid, session) 

4127 sr.scanLocked() 

4128 for uuid in vdiUuids: 

4129 vdi = sr.getVDI(uuid) 

4130 if not vdi: 

4131 raise util.SMException("VDI %s not found" % uuid) 

4132 if vdi.isLeafCoalesceable(): 

4133 coalesceable.append(uuid) 

4134 return coalesceable 

4135 

4136 

4137def cache_cleanup(session, srUuid, maxAge): 

4138 sr = SR.getInstance(srUuid, session) 

4139 return sr.cleanupCache(maxAge) 

4140 

4141 

4142def debug(sr_uuid, cmd, vdi_uuid): 

4143 Util.log("Debug command: %s" % cmd) 

4144 sr = SR.getInstance(sr_uuid, None) 

4145 if not isinstance(sr, LVHDSR): 

4146 print("Error: not an LVHD SR") 

4147 return 

4148 sr.scanLocked() 

4149 vdi = sr.getVDI(vdi_uuid) 

4150 if not vdi: 

4151 print("Error: VDI %s not found") 

4152 return 

4153 print("Running %s on SR %s" % (cmd, sr)) 

4154 print("VDI before: %s" % vdi) 

4155 if cmd == "activate": 

4156 vdi._activate() 

4157 print("VDI file: %s" % vdi.path) 

4158 if cmd == "deactivate": 

4159 ns = lvhdutil.NS_PREFIX_LVM + sr.uuid 

4160 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

4161 if cmd == "inflate": 

4162 vdi.inflateFully() 

4163 sr.cleanup() 

4164 if cmd == "deflate": 

4165 vdi.deflate() 

4166 sr.cleanup() 

4167 sr.scanLocked() 

4168 print("VDI after: %s" % vdi) 

4169 

4170 

4171def abort_optional_reenable(uuid): 

4172 print("Disabling GC/coalesce for %s" % uuid) 

4173 ret = _abort(uuid) 

4174 input("Press enter to re-enable...") 

4175 print("GC/coalesce re-enabled") 

4176 lockGCRunning.release() 

4177 if ret: 

4178 lockGCActive.release() 

4179 

4180 

4181############################################################################## 

4182# 

4183# CLI 

4184# 

4185def main(): 

4186 action = "" 

4187 maxAge = 0 

4188 uuid = "" 

4189 background = False 

4190 force = False 

4191 dryRun = False 

4192 debug_cmd = "" 

4193 vdi_uuid = "" 

4194 shortArgs = "gGc:aqxu:bfdt:v:" 

4195 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

4196 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

4197 

4198 try: 

4199 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

4200 except getopt.GetoptError: 

4201 usage() 

4202 for o, a in opts: 

4203 if o in ("-g", "--gc"): 

4204 action = "gc" 

4205 if o in ("-G", "--gc_force"): 

4206 action = "gc_force" 

4207 if o in ("-c", "--clean_cache"): 

4208 action = "clean_cache" 

4209 maxAge = int(a) 

4210 if o in ("-a", "--abort"): 

4211 action = "abort" 

4212 if o in ("-q", "--query"): 

4213 action = "query" 

4214 if o in ("-x", "--disable"): 

4215 action = "disable" 

4216 if o in ("-u", "--uuid"): 

4217 uuid = a 

4218 if o in ("-b", "--background"): 

4219 background = True 

4220 if o in ("-f", "--force"): 

4221 force = True 

4222 if o in ("-d", "--dry-run"): 

4223 Util.log("Dry run mode") 

4224 dryRun = True 

4225 if o in ("-t", "--debug"): 

4226 action = "debug" 

4227 debug_cmd = a 

4228 if o in ("-v", "--vdi_uuid"): 

4229 vdi_uuid = a 

4230 

4231 if not action or not uuid: 

4232 usage() 

4233 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

4234 action != "debug" and (debug_cmd or vdi_uuid): 

4235 usage() 

4236 

4237 if action != "query" and action != "debug": 

4238 print("All output goes to log") 

4239 

4240 if action == "gc": 

4241 gc(None, uuid, background, dryRun) 

4242 elif action == "gc_force": 

4243 gc_force(None, uuid, force, dryRun, True) 

4244 elif action == "clean_cache": 

4245 cache_cleanup(None, uuid, maxAge) 

4246 elif action == "abort": 

4247 abort(uuid) 

4248 elif action == "query": 

4249 print("Currently running: %s" % get_state(uuid)) 

4250 elif action == "disable": 

4251 abort_optional_reenable(uuid) 

4252 elif action == "debug": 

4253 debug(uuid, debug_cmd, vdi_uuid) 

4254 

4255 

4256if __name__ == '__main__': 4256 ↛ 4257line 4256 didn't jump to line 4257, because the condition on line 4256 was never true

4257 main()