Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect VHD-based SR's in the background 

19# 

20 

21from sm_typing import Optional, override 

22 

23import os 

24import os.path 

25import sys 

26import time 

27import signal 

28import subprocess 

29import getopt 

30import datetime 

31import traceback 

32import base64 

33import zlib 

34import errno 

35import stat 

36 

37import XenAPI # pylint: disable=import-error 

38import util 

39import lvutil 

40import vhdutil 

41import lvhdutil 

42import lvmcache 

43import journaler 

44import fjournaler 

45import lock 

46import blktap2 

47import xs_errors 

48from refcounter import RefCounter 

49from ipc import IPCFlag 

50from lvmanager import LVActivator 

51from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

52from functools import reduce 

53from time import monotonic as _time 

54 

55try: 

56 from linstorjournaler import LinstorJournaler 

57 from linstorvhdutil import LinstorVhdUtil 

58 from linstorvolumemanager import get_controller_uri 

59 from linstorvolumemanager import LinstorVolumeManager 

60 from linstorvolumemanager import LinstorVolumeManagerError 

61 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX 

62 

63 LINSTOR_AVAILABLE = True 

64except ImportError: 

65 LINSTOR_AVAILABLE = False 

66 

67# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

68# possible due to lvhd_stop_using_() not working correctly. However, we leave 

69# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

70# record for use by the offline tool (which makes the operation safe by pausing 

71# the VM first) 

72AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

73 

74FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

75 

76# process "lock", used simply as an indicator that a process already exists 

77# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

78# check for the fact by trying the lock). 

79lockGCRunning = None 

80 

81# process "lock" to indicate that the GC process has been activated but may not 

82# yet be running, stops a second process from being started. 

83LOCK_TYPE_GC_ACTIVE = "gc_active" 

84lockGCActive = None 

85 

86# Default coalesce error rate limit, in messages per minute. A zero value 

87# disables throttling, and a negative value disables error reporting. 

88DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

89 

90COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

91COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

92VAR_RUN = "/var/run/" 

93SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

94 

95N_RUNNING_AVERAGE = 10 

96 

97NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

98 

99# Signal Handler 

100SIGTERM = False 

101 

102 

103class AbortException(util.SMException): 

104 pass 

105 

106 

107def receiveSignal(signalNumber, frame): 

108 global SIGTERM 

109 

110 util.SMlog("GC: recieved SIGTERM") 

111 SIGTERM = True 

112 return 

113 

114 

115################################################################################ 

116# 

117# Util 

118# 

119class Util: 

120 RET_RC = 1 

121 RET_STDOUT = 2 

122 RET_STDERR = 4 

123 

124 UUID_LEN = 36 

125 

126 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

127 

128 @staticmethod 

129 def log(text) -> None: 

130 util.SMlog(text, ident="SMGC") 

131 

132 @staticmethod 

133 def logException(tag): 

134 info = sys.exc_info() 

135 if info[0] == SystemExit: 135 ↛ 137line 135 didn't jump to line 137, because the condition on line 135 was never true

136 # this should not be happening when catching "Exception", but it is 

137 sys.exit(0) 

138 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

139 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

140 Util.log(" ***********************") 

141 Util.log(" * E X C E P T I O N *") 

142 Util.log(" ***********************") 

143 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

144 Util.log(tb) 

145 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

146 

147 @staticmethod 

148 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

149 "Execute a subprocess, then return its return code, stdout, stderr" 

150 proc = subprocess.Popen(args, 

151 stdin=subprocess.PIPE, \ 

152 stdout=subprocess.PIPE, \ 

153 stderr=subprocess.PIPE, \ 

154 shell=True, \ 

155 close_fds=True) 

156 (stdout, stderr) = proc.communicate(inputtext) 

157 stdout = str(stdout) 

158 stderr = str(stderr) 

159 rc = proc.returncode 

160 if log: 

161 Util.log("`%s`: %s" % (args, rc)) 

162 if type(expectedRC) != type([]): 

163 expectedRC = [expectedRC] 

164 if not rc in expectedRC: 

165 reason = stderr.strip() 

166 if stdout.strip(): 

167 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

168 Util.log("Failed: %s" % reason) 

169 raise util.CommandException(rc, args, reason) 

170 

171 if ret == Util.RET_RC: 

172 return rc 

173 if ret == Util.RET_STDERR: 

174 return stderr 

175 return stdout 

176 

177 @staticmethod 

178 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): 

179 """execute func in a separate thread and kill it if abortTest signals 

180 so""" 

181 abortSignaled = abortTest() # check now before we clear resultFlag 

182 resultFlag = IPCFlag(ns) 

183 resultFlag.clearAll() 

184 pid = os.fork() 

185 if pid: 

186 startTime = _time() 

187 try: 

188 while True: 

189 if resultFlag.test("success"): 

190 Util.log(" Child process completed successfully") 

191 resultFlag.clear("success") 

192 return 

193 if resultFlag.test("failure"): 

194 resultFlag.clear("failure") 

195 raise util.SMException("Child process exited with error") 

196 if abortTest() or abortSignaled or SIGTERM: 

197 os.killpg(pid, signal.SIGKILL) 

198 raise AbortException("Aborting due to signal") 

199 if timeOut and _time() - startTime > timeOut: 

200 os.killpg(pid, signal.SIGKILL) 

201 resultFlag.clearAll() 

202 raise util.SMException("Timed out") 

203 time.sleep(pollInterval) 

204 finally: 

205 wait_pid = 0 

206 rc = -1 

207 count = 0 

208 while wait_pid == 0 and count < 10: 

209 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

210 if wait_pid == 0: 

211 time.sleep(2) 

212 count += 1 

213 

214 if wait_pid == 0: 

215 Util.log("runAbortable: wait for process completion timed out") 

216 else: 

217 os.setpgrp() 

218 try: 

219 if func() == ret: 

220 resultFlag.set("success") 

221 else: 

222 resultFlag.set("failure") 

223 except Exception as e: 

224 Util.log("Child process failed with : (%s)" % e) 

225 resultFlag.set("failure") 

226 Util.logException("This exception has occured") 

227 os._exit(0) 

228 

229 @staticmethod 

230 def num2str(number): 

231 for prefix in ("G", "M", "K"): 231 ↛ 234line 231 didn't jump to line 234, because the loop on line 231 didn't complete

232 if number >= Util.PREFIX[prefix]: 

233 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

234 return "%s" % number 

235 

236 @staticmethod 

237 def numBits(val): 

238 count = 0 

239 while val: 

240 count += val & 1 

241 val = val >> 1 

242 return count 

243 

244 @staticmethod 

245 def countBits(bitmap1, bitmap2): 

246 """return bit count in the bitmap produced by ORing the two bitmaps""" 

247 len1 = len(bitmap1) 

248 len2 = len(bitmap2) 

249 lenLong = len1 

250 lenShort = len2 

251 bitmapLong = bitmap1 

252 if len2 > len1: 

253 lenLong = len2 

254 lenShort = len1 

255 bitmapLong = bitmap2 

256 

257 count = 0 

258 for i in range(lenShort): 

259 val = bitmap1[i] | bitmap2[i] 

260 count += Util.numBits(val) 

261 

262 for i in range(i + 1, lenLong): 

263 val = bitmapLong[i] 

264 count += Util.numBits(val) 

265 return count 

266 

267 @staticmethod 

268 def getThisScript(): 

269 thisScript = util.get_real_path(__file__) 

270 if thisScript.endswith(".pyc"): 

271 thisScript = thisScript[:-1] 

272 return thisScript 

273 

274 

275################################################################################ 

276# 

277# XAPI 

278# 

279class XAPI: 

280 USER = "root" 

281 PLUGIN_ON_SLAVE = "on-slave" 

282 

283 CONFIG_SM = 0 

284 CONFIG_OTHER = 1 

285 CONFIG_ON_BOOT = 2 

286 CONFIG_ALLOW_CACHING = 3 

287 

288 CONFIG_NAME = { 

289 CONFIG_SM: "sm-config", 

290 CONFIG_OTHER: "other-config", 

291 CONFIG_ON_BOOT: "on-boot", 

292 CONFIG_ALLOW_CACHING: "allow_caching" 

293 } 

294 

295 class LookupError(util.SMException): 

296 pass 

297 

298 @staticmethod 

299 def getSession(): 

300 session = XenAPI.xapi_local() 

301 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

302 return session 

303 

304 def __init__(self, session, srUuid): 

305 self.sessionPrivate = False 

306 self.session = session 

307 if self.session is None: 

308 self.session = self.getSession() 

309 self.sessionPrivate = True 

310 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

311 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

312 self.hostUuid = util.get_this_host() 

313 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

314 self.task = None 

315 self.task_progress = {"coalescable": 0, "done": 0} 

316 

317 def __del__(self): 

318 if self.sessionPrivate: 

319 self.session.xenapi.session.logout() 

320 

321 @property 

322 def srRef(self): 

323 return self._srRef 

324 

325 def isPluggedHere(self): 

326 pbds = self.getAttachedPBDs() 

327 for pbdRec in pbds: 

328 if pbdRec["host"] == self._hostRef: 

329 return True 

330 return False 

331 

332 def poolOK(self): 

333 host_recs = self.session.xenapi.host.get_all_records() 

334 for host_ref, host_rec in host_recs.items(): 

335 if not host_rec["enabled"]: 

336 Util.log("Host %s not enabled" % host_rec["uuid"]) 

337 return False 

338 return True 

339 

340 def isMaster(self): 

341 if self.srRecord["shared"]: 

342 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

343 return pool["master"] == self._hostRef 

344 else: 

345 pbds = self.getAttachedPBDs() 

346 if len(pbds) < 1: 

347 raise util.SMException("Local SR not attached") 

348 elif len(pbds) > 1: 

349 raise util.SMException("Local SR multiply attached") 

350 return pbds[0]["host"] == self._hostRef 

351 

352 def getAttachedPBDs(self): 

353 """Return PBD records for all PBDs of this SR that are currently 

354 attached""" 

355 attachedPBDs = [] 

356 pbds = self.session.xenapi.PBD.get_all_records() 

357 for pbdRec in pbds.values(): 

358 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

359 attachedPBDs.append(pbdRec) 

360 return attachedPBDs 

361 

362 def getOnlineHosts(self): 

363 return util.get_online_hosts(self.session) 

364 

365 def ensureInactive(self, hostRef, args): 

366 text = self.session.xenapi.host.call_plugin( \ 

367 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

368 Util.log("call-plugin returned: '%s'" % text) 

369 

370 def getRecordHost(self, hostRef): 

371 return self.session.xenapi.host.get_record(hostRef) 

372 

373 def _getRefVDI(self, uuid): 

374 return self.session.xenapi.VDI.get_by_uuid(uuid) 

375 

376 def getRefVDI(self, vdi): 

377 return self._getRefVDI(vdi.uuid) 

378 

379 def getRecordVDI(self, uuid): 

380 try: 

381 ref = self._getRefVDI(uuid) 

382 return self.session.xenapi.VDI.get_record(ref) 

383 except XenAPI.Failure: 

384 return None 

385 

386 def singleSnapshotVDI(self, vdi): 

387 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

388 {"type": "internal"}) 

389 

390 def forgetVDI(self, srUuid, vdiUuid): 

391 """Forget the VDI, but handle the case where the VDI has already been 

392 forgotten (i.e. ignore errors)""" 

393 try: 

394 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

395 self.session.xenapi.VDI.forget(vdiRef) 

396 except XenAPI.Failure: 

397 pass 

398 

399 def getConfigVDI(self, vdi, key): 

400 kind = vdi.CONFIG_TYPE[key] 

401 if kind == self.CONFIG_SM: 

402 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

403 elif kind == self.CONFIG_OTHER: 

404 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

405 elif kind == self.CONFIG_ON_BOOT: 

406 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

407 elif kind == self.CONFIG_ALLOW_CACHING: 

408 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

409 else: 

410 assert(False) 

411 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

412 return cfg 

413 

414 def removeFromConfigVDI(self, vdi, key): 

415 kind = vdi.CONFIG_TYPE[key] 

416 if kind == self.CONFIG_SM: 

417 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

418 elif kind == self.CONFIG_OTHER: 

419 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

420 else: 

421 assert(False) 

422 

423 def addToConfigVDI(self, vdi, key, val): 

424 kind = vdi.CONFIG_TYPE[key] 

425 if kind == self.CONFIG_SM: 

426 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

427 elif kind == self.CONFIG_OTHER: 

428 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

429 else: 

430 assert(False) 

431 

432 def isSnapshot(self, vdi): 

433 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

434 

435 def markCacheSRsDirty(self): 

436 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

437 'field "local_cache_enabled" = "true"') 

438 for sr_ref in sr_refs: 

439 Util.log("Marking SR %s dirty" % sr_ref) 

440 util.set_dirty(self.session, sr_ref) 

441 

442 def srUpdate(self): 

443 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

444 abortFlag = IPCFlag(self.srRecord["uuid"]) 

445 task = self.session.xenapi.Async.SR.update(self._srRef) 

446 cancelTask = True 

447 try: 

448 for i in range(60): 

449 status = self.session.xenapi.task.get_status(task) 

450 if not status == "pending": 

451 Util.log("SR.update_asynch status changed to [%s]" % status) 

452 cancelTask = False 

453 return 

454 if abortFlag.test(FLAG_TYPE_ABORT): 

455 Util.log("Abort signalled during srUpdate, cancelling task...") 

456 try: 

457 self.session.xenapi.task.cancel(task) 

458 cancelTask = False 

459 Util.log("Task cancelled") 

460 except: 

461 pass 

462 return 

463 time.sleep(1) 

464 finally: 

465 if cancelTask: 

466 self.session.xenapi.task.cancel(task) 

467 self.session.xenapi.task.destroy(task) 

468 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

469 

470 def update_task(self): 

471 self.session.xenapi.task.set_other_config( 

472 self.task, 

473 { 

474 "applies_to": self._srRef 

475 }) 

476 total = self.task_progress['coalescable'] + self.task_progress['done'] 

477 if (total > 0): 

478 self.session.xenapi.task.set_progress( 

479 self.task, float(self.task_progress['done']) / total) 

480 

481 def create_task(self, label, description): 

482 self.task = self.session.xenapi.task.create(label, description) 

483 self.update_task() 

484 

485 def update_task_progress(self, key, value): 

486 self.task_progress[key] = value 

487 if self.task: 

488 self.update_task() 

489 

490 def set_task_status(self, status): 

491 if self.task: 

492 self.session.xenapi.task.set_status(self.task, status) 

493 

494 

495################################################################################ 

496# 

497# VDI 

498# 

499class VDI(object): 

500 """Object representing a VDI of a VHD-based SR""" 

501 

502 POLL_INTERVAL = 1 

503 POLL_TIMEOUT = 30 

504 DEVICE_MAJOR = 202 

505 DRIVER_NAME_VHD = "vhd" 

506 

507 # config keys & values 

508 DB_VHD_PARENT = "vhd-parent" 

509 DB_VDI_TYPE = "vdi_type" 

510 DB_VHD_BLOCKS = "vhd-blocks" 

511 DB_VDI_PAUSED = "paused" 

512 DB_VDI_RELINKING = "relinking" 

513 DB_VDI_ACTIVATING = "activating" 

514 DB_GC = "gc" 

515 DB_COALESCE = "coalesce" 

516 DB_LEAFCLSC = "leaf-coalesce" # config key 

517 DB_GC_NO_SPACE = "gc_no_space" 

518 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

519 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

520 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

521 # no space to snap-coalesce or unable to keep 

522 # up with VDI. This is not used by the SM, it 

523 # might be used by external components. 

524 DB_ONBOOT = "on-boot" 

525 ONBOOT_RESET = "reset" 

526 DB_ALLOW_CACHING = "allow_caching" 

527 

528 CONFIG_TYPE = { 

529 DB_VHD_PARENT: XAPI.CONFIG_SM, 

530 DB_VDI_TYPE: XAPI.CONFIG_SM, 

531 DB_VHD_BLOCKS: XAPI.CONFIG_SM, 

532 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

533 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

534 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

535 DB_GC: XAPI.CONFIG_OTHER, 

536 DB_COALESCE: XAPI.CONFIG_OTHER, 

537 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

538 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

539 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

540 DB_GC_NO_SPACE: XAPI.CONFIG_SM 

541 } 

542 

543 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

544 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

545 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

546 # feasibility of leaf coalesce 

547 

548 JRN_RELINK = "relink" # journal entry type for relinking children 

549 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

550 JRN_LEAF = "leaf" # used in coalesce-leaf 

551 

552 STR_TREE_INDENT = 4 

553 

554 def __init__(self, sr, uuid, raw): 

555 self.sr = sr 

556 self.scanError = True 

557 self.uuid = uuid 

558 self.raw = raw 

559 self.fileName = "" 

560 self.parentUuid = "" 

561 self.sizeVirt = -1 

562 self._sizeVHD = -1 

563 self._sizeAllocated = -1 

564 self.hidden = False 

565 self.parent = None 

566 self.children = [] 

567 self._vdiRef = None 

568 self._clearRef() 

569 

570 @staticmethod 

571 def extractUuid(path): 

572 raise NotImplementedError("Implement in sub class") 

573 

574 def load(self, info=None) -> None: 

575 """Load VDI info""" 

576 pass 

577 

578 def getDriverName(self) -> str: 

579 return self.DRIVER_NAME_VHD 

580 

581 def getRef(self): 

582 if self._vdiRef is None: 

583 self._vdiRef = self.sr.xapi.getRefVDI(self) 

584 return self._vdiRef 

585 

586 def getConfig(self, key, default=None): 

587 config = self.sr.xapi.getConfigVDI(self, key) 

588 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 588 ↛ 589line 588 didn't jump to line 589, because the condition on line 588 was never true

589 val = config 

590 else: 

591 val = config.get(key) 

592 if val: 

593 return val 

594 return default 

595 

596 def setConfig(self, key, val): 

597 self.sr.xapi.removeFromConfigVDI(self, key) 

598 self.sr.xapi.addToConfigVDI(self, key, val) 

599 Util.log("Set %s = %s for %s" % (key, val, self)) 

600 

601 def delConfig(self, key): 

602 self.sr.xapi.removeFromConfigVDI(self, key) 

603 Util.log("Removed %s from %s" % (key, self)) 

604 

605 def ensureUnpaused(self): 

606 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

607 Util.log("Unpausing VDI %s" % self) 

608 self.unpause() 

609 

610 def pause(self, failfast=False) -> None: 

611 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

612 self.uuid, failfast): 

613 raise util.SMException("Failed to pause VDI %s" % self) 

614 

615 def _report_tapdisk_unpause_error(self): 

616 try: 

617 xapi = self.sr.xapi.session.xenapi 

618 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

619 msg_name = "failed to unpause tapdisk" 

620 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

621 "VMs using this tapdisk have lost access " \ 

622 "to the corresponding disk(s)" % self.uuid 

623 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

624 except Exception as e: 

625 util.SMlog("failed to generate message: %s" % e) 

626 

627 def unpause(self): 

628 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

629 self.uuid): 

630 self._report_tapdisk_unpause_error() 

631 raise util.SMException("Failed to unpause VDI %s" % self) 

632 

633 def refresh(self, ignoreNonexistent=True): 

634 """Pause-unpause in one step""" 

635 self.sr.lock() 

636 try: 

637 try: 

638 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 638 ↛ 640line 638 didn't jump to line 640, because the condition on line 638 was never true

639 self.sr.uuid, self.uuid): 

640 self._report_tapdisk_unpause_error() 

641 raise util.SMException("Failed to refresh %s" % self) 

642 except XenAPI.Failure as e: 

643 if util.isInvalidVDI(e) and ignoreNonexistent: 

644 Util.log("VDI %s not found, ignoring" % self) 

645 return 

646 raise 

647 finally: 

648 self.sr.unlock() 

649 

650 def isSnapshot(self): 

651 return self.sr.xapi.isSnapshot(self) 

652 

653 def isAttachedRW(self): 

654 return util.is_attached_rw( 

655 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

656 

657 def getVHDBlocks(self): 

658 val = self.updateBlockInfo() 

659 bitmap = zlib.decompress(base64.b64decode(val)) 

660 return bitmap 

661 

662 def isCoalesceable(self): 

663 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

664 return not self.scanError and \ 

665 self.parent and \ 

666 len(self.parent.children) == 1 and \ 

667 self.hidden and \ 

668 len(self.children) > 0 

669 

670 def isLeafCoalesceable(self): 

671 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

672 return not self.scanError and \ 

673 self.parent and \ 

674 len(self.parent.children) == 1 and \ 

675 not self.hidden and \ 

676 len(self.children) == 0 

677 

678 def canLiveCoalesce(self, speed): 

679 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

680 isLeafCoalesceable() already""" 

681 feasibleSize = False 

682 allowedDownTime = \ 

683 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

684 vhd_size = self.getAllocatedSize() 

685 if speed: 

686 feasibleSize = \ 

687 vhd_size // speed < allowedDownTime 

688 else: 

689 feasibleSize = \ 

690 vhd_size < self.LIVE_LEAF_COALESCE_MAX_SIZE 

691 

692 return (feasibleSize or 

693 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

694 

695 def getAllPrunable(self): 

696 if len(self.children) == 0: # base case 

697 # it is possible to have a hidden leaf that was recently coalesced 

698 # onto its parent, its children already relinked but not yet 

699 # reloaded - in which case it may not be garbage collected yet: 

700 # some tapdisks could still be using the file. 

701 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

702 return [] 

703 if not self.scanError and self.hidden: 

704 return [self] 

705 return [] 

706 

707 thisPrunable = True 

708 vdiList = [] 

709 for child in self.children: 

710 childList = child.getAllPrunable() 

711 vdiList.extend(childList) 

712 if child not in childList: 

713 thisPrunable = False 

714 

715 # We can destroy the current VDI if all childs are hidden BUT the 

716 # current VDI must be hidden too to do that! 

717 # Example in this case (after a failed live leaf coalesce): 

718 # 

719 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

720 # SMGC: [32436] b5458d61(1.000G/4.127M) 

721 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

722 # 

723 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

724 # Normally we are not in this function when the delete action is 

725 # executed but in `_liveLeafCoalesce`. 

726 

727 if not self.scanError and not self.hidden and thisPrunable: 

728 vdiList.append(self) 

729 return vdiList 

730 

731 def getSizeVHD(self) -> int: 

732 return self._sizeVHD 

733 

734 def getAllocatedSize(self) -> int: 

735 return self._sizeAllocated 

736 

737 def getTreeRoot(self): 

738 "Get the root of the tree that self belongs to" 

739 root = self 

740 while root.parent: 

741 root = root.parent 

742 return root 

743 

744 def getTreeHeight(self): 

745 "Get the height of the subtree rooted at self" 

746 if len(self.children) == 0: 

747 return 1 

748 

749 maxChildHeight = 0 

750 for child in self.children: 

751 childHeight = child.getTreeHeight() 

752 if childHeight > maxChildHeight: 

753 maxChildHeight = childHeight 

754 

755 return maxChildHeight + 1 

756 

757 def getAllLeaves(self): 

758 "Get all leaf nodes in the subtree rooted at self" 

759 if len(self.children) == 0: 

760 return [self] 

761 

762 leaves = [] 

763 for child in self.children: 

764 leaves.extend(child.getAllLeaves()) 

765 return leaves 

766 

767 def updateBlockInfo(self) -> Optional[str]: 

768 val = base64.b64encode(self._queryVHDBlocks()).decode() 

769 self.setConfig(VDI.DB_VHD_BLOCKS, val) 

770 return val 

771 

772 def rename(self, uuid) -> None: 

773 "Rename the VDI file" 

774 assert(not self.sr.vdis.get(uuid)) 

775 self._clearRef() 

776 oldUuid = self.uuid 

777 self.uuid = uuid 

778 self.children = [] 

779 # updating the children themselves is the responsibility of the caller 

780 del self.sr.vdis[oldUuid] 

781 self.sr.vdis[self.uuid] = self 

782 

783 def delete(self) -> None: 

784 "Physically delete the VDI" 

785 lock.Lock.cleanup(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

786 lock.Lock.cleanupAll(self.uuid) 

787 self._clear() 

788 

789 def getParent(self) -> str: 

790 return vhdutil.getParent(self.path, lambda x: x.strip()) 790 ↛ exitline 790 didn't run the lambda on line 790

791 

792 def repair(self, parent) -> None: 

793 vhdutil.repair(parent) 

794 

795 @override 

796 def __str__(self) -> str: 

797 strHidden = "" 

798 if self.hidden: 798 ↛ 799line 798 didn't jump to line 799, because the condition on line 798 was never true

799 strHidden = "*" 

800 strSizeVirt = "?" 

801 if self.sizeVirt > 0: 801 ↛ 802line 801 didn't jump to line 802, because the condition on line 801 was never true

802 strSizeVirt = Util.num2str(self.sizeVirt) 

803 strSizeVHD = "?" 

804 if self._sizeVHD > 0: 804 ↛ 805line 804 didn't jump to line 805, because the condition on line 804 was never true

805 strSizeVHD = "/%s" % Util.num2str(self._sizeVHD) 

806 strSizeAllocated = "?" 

807 if self._sizeAllocated >= 0: 

808 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated) 

809 strType = "" 

810 if self.raw: 

811 strType = "[RAW]" 

812 strSizeVHD = "" 

813 

814 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

815 strSizeVHD, strSizeAllocated, strType) 

816 

817 def validate(self, fast=False) -> None: 

818 if not vhdutil.check(self.path, fast=fast): 818 ↛ 819line 818 didn't jump to line 819, because the condition on line 818 was never true

819 raise util.SMException("VHD %s corrupted" % self) 

820 

821 def _clear(self): 

822 self.uuid = "" 

823 self.path = "" 

824 self.parentUuid = "" 

825 self.parent = None 

826 self._clearRef() 

827 

828 def _clearRef(self): 

829 self._vdiRef = None 

830 

831 def _doCoalesce(self) -> None: 

832 """Coalesce self onto parent. Only perform the actual coalescing of 

833 VHD, but not the subsequent relinking. We'll do that as the next step, 

834 after reloading the entire SR in case things have changed while we 

835 were coalescing""" 

836 self.validate() 

837 self.parent.validate(True) 

838 self.parent._increaseSizeVirt(self.sizeVirt) 

839 self.sr._updateSlavesOnResize(self.parent) 

840 self._coalesceVHD(0) 

841 self.parent.validate(True) 

842 #self._verifyContents(0) 

843 self.parent.updateBlockInfo() 

844 

845 def _verifyContents(self, timeOut): 

846 Util.log(" Coalesce verification on %s" % self) 

847 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

848 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

849 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

850 Util.log(" Coalesce verification succeeded") 

851 

852 def _runTapdiskDiff(self): 

853 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

854 (self.getDriverName(), self.path, \ 

855 self.parent.getDriverName(), self.parent.path) 

856 Util.doexec(cmd, 0) 

857 return True 

858 

859 @staticmethod 

860 def _reportCoalesceError(vdi, ce): 

861 """Reports a coalesce error to XenCenter. 

862 

863 vdi: the VDI object on which the coalesce error occured 

864 ce: the CommandException that was raised""" 

865 

866 msg_name = os.strerror(ce.code) 

867 if ce.code == errno.ENOSPC: 

868 # TODO We could add more information here, e.g. exactly how much 

869 # space is required for the particular coalesce, as well as actions 

870 # to be taken by the user and consequences of not taking these 

871 # actions. 

872 msg_body = 'Run out of space while coalescing.' 

873 elif ce.code == errno.EIO: 

874 msg_body = 'I/O error while coalescing.' 

875 else: 

876 msg_body = '' 

877 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

878 % (vdi.sr.uuid, msg_name, msg_body)) 

879 

880 # Create a XenCenter message, but don't spam. 

881 xapi = vdi.sr.xapi.session.xenapi 

882 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

883 oth_cfg = xapi.SR.get_other_config(sr_ref) 

884 if COALESCE_ERR_RATE_TAG in oth_cfg: 

885 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

886 else: 

887 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

888 

889 xcmsg = False 

890 if coalesce_err_rate == 0: 

891 xcmsg = True 

892 elif coalesce_err_rate > 0: 

893 now = datetime.datetime.now() 

894 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

895 if COALESCE_LAST_ERR_TAG in sm_cfg: 

896 # seconds per message (minimum distance in time between two 

897 # messages in seconds) 

898 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

899 last = datetime.datetime.fromtimestamp( 

900 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

901 if now - last >= spm: 

902 xapi.SR.remove_from_sm_config(sr_ref, 

903 COALESCE_LAST_ERR_TAG) 

904 xcmsg = True 

905 else: 

906 xcmsg = True 

907 if xcmsg: 

908 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

909 str(now.strftime('%s'))) 

910 if xcmsg: 

911 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

912 

913 def coalesce(self) -> int: 

914 # size is returned in sectors 

915 return vhdutil.coalesce(self.path) * 512 

916 

917 @staticmethod 

918 def _doCoalesceVHD(vdi): 

919 try: 

920 startTime = time.time() 

921 vhdSize = vdi.getAllocatedSize() 

922 coalesced_size = vdi.coalesce() 

923 endTime = time.time() 

924 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) 

925 except util.CommandException as ce: 

926 # We use try/except for the following piece of code because it runs 

927 # in a separate process context and errors will not be caught and 

928 # reported by anyone. 

929 try: 

930 # Report coalesce errors back to user via XC 

931 VDI._reportCoalesceError(vdi, ce) 

932 except Exception as e: 

933 util.SMlog('failed to create XenCenter message: %s' % e) 

934 raise ce 

935 except: 

936 raise 

937 

938 def _vdi_is_raw(self, vdi_path): 

939 """ 

940 Given path to vdi determine if it is raw 

941 """ 

942 uuid = self.extractUuid(vdi_path) 

943 return self.sr.vdis[uuid].raw 

944 

945 def _coalesceVHD(self, timeOut): 

946 Util.log(" Running VHD coalesce on %s" % self) 

947 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 947 ↛ exitline 947 didn't run the lambda on line 947

948 try: 

949 util.fistpoint.activate_custom_fn( 

950 "cleanup_coalesceVHD_inject_failure", 

951 util.inject_failure) 

952 Util.runAbortable(lambda: VDI._doCoalesceVHD(self), None, 

953 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

954 except: 

955 #exception at this phase could indicate a failure in vhd coalesce 

956 # or a kill of vhd coalesce by runAbortable due to timeOut 

957 # Try a repair and reraise the exception 

958 parent = "" 

959 try: 

960 parent = self.getParent() 

961 if not self._vdi_is_raw(parent): 

962 # Repair error is logged and ignored. Error reraised later 

963 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

964 'parent %s' % (self.uuid, parent)) 

965 self.repair(parent) 

966 except Exception as e: 

967 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

968 'after failed coalesce on %s, err: %s' % 

969 (parent, self.path, e)) 

970 raise 

971 

972 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

973 

974 def _relinkSkip(self) -> None: 

975 """Relink children of this VDI to point to the parent of this VDI""" 

976 abortFlag = IPCFlag(self.sr.uuid) 

977 for child in self.children: 

978 if abortFlag.test(FLAG_TYPE_ABORT): 978 ↛ 979line 978 didn't jump to line 979, because the condition on line 978 was never true

979 raise AbortException("Aborting due to signal") 

980 Util.log(" Relinking %s from %s to %s" % \ 

981 (child, self, self.parent)) 

982 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

983 child._setParent(self.parent) 

984 self.children = [] 

985 

986 def _reloadChildren(self, vdiSkip): 

987 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

988 the VHD metadata for this file in any online VDI""" 

989 abortFlag = IPCFlag(self.sr.uuid) 

990 for child in self.children: 

991 if child == vdiSkip: 

992 continue 

993 if abortFlag.test(FLAG_TYPE_ABORT): 993 ↛ 994line 993 didn't jump to line 994, because the condition on line 993 was never true

994 raise AbortException("Aborting due to signal") 

995 Util.log(" Reloading VDI %s" % child) 

996 child._reload() 

997 

998 def _reload(self): 

999 """Pause & unpause to cause blktap to reload the VHD metadata""" 

1000 for child in self.children: 1000 ↛ 1001line 1000 didn't jump to line 1001, because the loop on line 1000 never started

1001 child._reload() 

1002 

1003 # only leaves can be attached 

1004 if len(self.children) == 0: 1004 ↛ exitline 1004 didn't return from function '_reload', because the condition on line 1004 was never false

1005 try: 

1006 self.delConfig(VDI.DB_VDI_RELINKING) 

1007 except XenAPI.Failure as e: 

1008 if not util.isInvalidVDI(e): 

1009 raise 

1010 self.refresh() 

1011 

1012 def _tagChildrenForRelink(self): 

1013 if len(self.children) == 0: 

1014 retries = 0 

1015 try: 

1016 while retries < 15: 

1017 retries += 1 

1018 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

1019 Util.log("VDI %s is activating, wait to relink" % 

1020 self.uuid) 

1021 else: 

1022 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

1023 

1024 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

1025 self.delConfig(VDI.DB_VDI_RELINKING) 

1026 Util.log("VDI %s started activating while tagging" % 

1027 self.uuid) 

1028 else: 

1029 return 

1030 time.sleep(2) 

1031 

1032 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1033 except XenAPI.Failure as e: 

1034 if not util.isInvalidVDI(e): 

1035 raise 

1036 

1037 for child in self.children: 

1038 child._tagChildrenForRelink() 

1039 

1040 def _loadInfoParent(self): 

1041 ret = vhdutil.getParent(self.path, lvhdutil.extractUuid) 

1042 if ret: 

1043 self.parentUuid = ret 

1044 

1045 def _setParent(self, parent) -> None: 

1046 vhdutil.setParent(self.path, parent.path, False) 

1047 self.parent = parent 

1048 self.parentUuid = parent.uuid 

1049 parent.children.append(self) 

1050 try: 

1051 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1052 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1053 (self.uuid, self.parentUuid)) 

1054 except: 

1055 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1056 (self.uuid, self.parentUuid)) 

1057 

1058 def _loadInfoHidden(self) -> None: 

1059 hidden = vhdutil.getHidden(self.path) 

1060 self.hidden = (hidden != 0) 

1061 

1062 def _setHidden(self, hidden=True) -> None: 

1063 vhdutil.setHidden(self.path, hidden) 

1064 self.hidden = hidden 

1065 

1066 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1067 """ensure the virtual size of 'self' is at least 'size'. Note that 

1068 resizing a VHD must always be offline and atomically: the file must 

1069 not be open by anyone and no concurrent operations may take place. 

1070 Thus we use the Agent API call for performing paused atomic 

1071 operations. If the caller is already in the atomic context, it must 

1072 call with atomic = False""" 

1073 if self.sizeVirt >= size: 1073 ↛ 1075line 1073 didn't jump to line 1075, because the condition on line 1073 was never false

1074 return 

1075 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ 

1076 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1077 

1078 msize = vhdutil.getMaxResizeSize(self.path) * 1024 * 1024 

1079 if (size <= msize): 

1080 vhdutil.setSizeVirtFast(self.path, size) 

1081 else: 

1082 if atomic: 

1083 vdiList = self._getAllSubtree() 

1084 self.sr.lock() 

1085 try: 

1086 self.sr.pauseVDIs(vdiList) 

1087 try: 

1088 self._setSizeVirt(size) 

1089 finally: 

1090 self.sr.unpauseVDIs(vdiList) 

1091 finally: 

1092 self.sr.unlock() 

1093 else: 

1094 self._setSizeVirt(size) 

1095 

1096 self.sizeVirt = vhdutil.getSizeVirt(self.path) 

1097 

1098 def _setSizeVirt(self, size) -> None: 

1099 """WARNING: do not call this method directly unless all VDIs in the 

1100 subtree are guaranteed to be unplugged (and remain so for the duration 

1101 of the operation): this operation is only safe for offline VHDs""" 

1102 jFile = os.path.join(self.sr.path, self.uuid) 

1103 vhdutil.setSizeVirt(self.path, size, jFile) 

1104 

1105 def _queryVHDBlocks(self) -> bytes: 

1106 return vhdutil.getBlockBitmap(self.path) 

1107 

1108 def _getCoalescedSizeData(self): 

1109 """Get the data size of the resulting VHD if we coalesce self onto 

1110 parent. We calculate the actual size by using the VHD block allocation 

1111 information (as opposed to just adding up the two VHD sizes to get an 

1112 upper bound)""" 

1113 # make sure we don't use stale BAT info from vdi_rec since the child 

1114 # was writable all this time 

1115 self.delConfig(VDI.DB_VHD_BLOCKS) 

1116 blocksChild = self.getVHDBlocks() 

1117 blocksParent = self.parent.getVHDBlocks() 

1118 numBlocks = Util.countBits(blocksChild, blocksParent) 

1119 Util.log("Num combined blocks = %d" % numBlocks) 

1120 sizeData = numBlocks * vhdutil.VHD_BLOCK_SIZE 

1121 assert(sizeData <= self.sizeVirt) 

1122 return sizeData 

1123 

1124 def _calcExtraSpaceForCoalescing(self) -> int: 

1125 sizeData = self._getCoalescedSizeData() 

1126 sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \ 

1127 vhdutil.calcOverheadEmpty(self.sizeVirt) 

1128 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1129 return sizeCoalesced - self.parent.getSizeVHD() 

1130 

1131 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1132 """How much extra space in the SR will be required to 

1133 [live-]leaf-coalesce this VDI""" 

1134 # the space requirements are the same as for inline coalesce 

1135 return self._calcExtraSpaceForCoalescing() 

1136 

1137 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1138 """How much extra space in the SR will be required to 

1139 snapshot-coalesce this VDI""" 

1140 return self._calcExtraSpaceForCoalescing() + \ 

1141 vhdutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1142 

1143 def _getAllSubtree(self): 

1144 """Get self and all VDIs in the subtree of self as a flat list""" 

1145 vdiList = [self] 

1146 for child in self.children: 

1147 vdiList.extend(child._getAllSubtree()) 

1148 return vdiList 

1149 

1150 

1151class FileVDI(VDI): 

1152 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1153 

1154 @staticmethod 

1155 def extractUuid(path): 

1156 path = os.path.basename(path.strip()) 

1157 if not (path.endswith(vhdutil.FILE_EXTN_VHD) or \ 1157 ↛ 1159line 1157 didn't jump to line 1159, because the condition on line 1157 was never true

1158 path.endswith(vhdutil.FILE_EXTN_RAW)): 

1159 return None 

1160 uuid = path.replace(vhdutil.FILE_EXTN_VHD, "").replace( \ 

1161 vhdutil.FILE_EXTN_RAW, "") 

1162 # TODO: validate UUID format 

1163 return uuid 

1164 

1165 def __init__(self, sr, uuid, raw): 

1166 VDI.__init__(self, sr, uuid, raw) 

1167 if self.raw: 1167 ↛ 1168line 1167 didn't jump to line 1168, because the condition on line 1167 was never true

1168 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_RAW) 

1169 else: 

1170 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1171 

1172 @override 

1173 def load(self, info=None) -> None: 

1174 if not info: 

1175 if not util.pathexists(self.path): 

1176 raise util.SMException("%s not found" % self.path) 

1177 try: 

1178 info = vhdutil.getVHDInfo(self.path, self.extractUuid) 

1179 except util.SMException: 

1180 Util.log(" [VDI %s: failed to read VHD metadata]" % self.uuid) 

1181 return 

1182 self.parent = None 

1183 self.children = [] 

1184 self.parentUuid = info.parentUuid 

1185 self.sizeVirt = info.sizeVirt 

1186 self._sizeVHD = info.sizePhys 

1187 self._sizeAllocated = info.sizeAllocated 

1188 self.hidden = info.hidden 

1189 self.scanError = False 

1190 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1191 (self.uuid, vhdutil.FILE_EXTN_VHD)) 

1192 

1193 @override 

1194 def rename(self, uuid) -> None: 

1195 oldPath = self.path 

1196 VDI.rename(self, uuid) 

1197 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1198 self.path = os.path.join(self.sr.path, self.fileName) 

1199 assert(not util.pathexists(self.path)) 

1200 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1201 os.rename(oldPath, self.path) 

1202 

1203 @override 

1204 def delete(self) -> None: 

1205 if len(self.children) > 0: 1205 ↛ 1206line 1205 didn't jump to line 1206, because the condition on line 1205 was never true

1206 raise util.SMException("VDI %s has children, can't delete" % \ 

1207 self.uuid) 

1208 try: 

1209 self.sr.lock() 

1210 try: 

1211 os.unlink(self.path) 

1212 self.sr.forgetVDI(self.uuid) 

1213 finally: 

1214 self.sr.unlock() 

1215 except OSError: 

1216 raise util.SMException("os.unlink(%s) failed" % self.path) 

1217 VDI.delete(self) 

1218 

1219 @override 

1220 def getAllocatedSize(self) -> int: 

1221 if self._sizeAllocated == -1: 1221 ↛ 1222line 1221 didn't jump to line 1222, because the condition on line 1221 was never true

1222 self._sizeAllocated = vhdutil.getAllocatedSize(self.path) 

1223 return self._sizeAllocated 

1224 

1225 

1226class LVHDVDI(VDI): 

1227 """Object representing a VDI in an LVHD SR""" 

1228 

1229 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1230 DRIVER_NAME_RAW = "aio" 

1231 

1232 @override 

1233 def load(self, info=None) -> None: 

1234 # `info` is always set. `None` default value is only here to match parent method. 

1235 assert info, "No info given to LVHDVDI.load" 

1236 self.parent = None 

1237 self.children = [] 

1238 self._sizeVHD = -1 

1239 self._sizeAllocated = -1 

1240 self.scanError = info.scanError 

1241 self.sizeLV = info.sizeLV 

1242 self.sizeVirt = info.sizeVirt 

1243 self.fileName = info.lvName 

1244 self.lvActive = info.lvActive 

1245 self.lvOpen = info.lvOpen 

1246 self.lvReadonly = info.lvReadonly 

1247 self.hidden = info.hidden 

1248 self.parentUuid = info.parentUuid 

1249 self.path = os.path.join(self.sr.path, self.fileName) 

1250 

1251 @staticmethod 

1252 def extractUuid(path): 

1253 return lvhdutil.extractUuid(path) 

1254 

1255 @override 

1256 def getDriverName(self) -> str: 

1257 if self.raw: 

1258 return self.DRIVER_NAME_RAW 

1259 return self.DRIVER_NAME_VHD 

1260 

1261 def inflate(self, size): 

1262 """inflate the LV containing the VHD to 'size'""" 

1263 if self.raw: 

1264 return 

1265 self._activate() 

1266 self.sr.lock() 

1267 try: 

1268 lvhdutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, size) 

1269 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1270 finally: 

1271 self.sr.unlock() 

1272 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1273 self._sizeVHD = -1 

1274 self._sizeAllocated = -1 

1275 

1276 def deflate(self): 

1277 """deflate the LV containing the VHD to minimum""" 

1278 if self.raw: 

1279 return 

1280 self._activate() 

1281 self.sr.lock() 

1282 try: 

1283 lvhdutil.deflate(self.sr.lvmCache, self.fileName, self.getSizeVHD()) 

1284 finally: 

1285 self.sr.unlock() 

1286 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1287 self._sizeVHD = -1 

1288 self._sizeAllocated = -1 

1289 

1290 def inflateFully(self): 

1291 self.inflate(lvhdutil.calcSizeVHDLV(self.sizeVirt)) 

1292 

1293 def inflateParentForCoalesce(self): 

1294 """Inflate the parent only as much as needed for the purposes of 

1295 coalescing""" 

1296 if self.parent.raw: 

1297 return 

1298 inc = self._calcExtraSpaceForCoalescing() 

1299 if inc > 0: 

1300 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1301 self.parent.inflate(self.parent.sizeLV + inc) 

1302 

1303 @override 

1304 def updateBlockInfo(self) -> Optional[str]: 

1305 if not self.raw: 

1306 return VDI.updateBlockInfo(self) 

1307 return None 

1308 

1309 @override 

1310 def rename(self, uuid) -> None: 

1311 oldUuid = self.uuid 

1312 oldLVName = self.fileName 

1313 VDI.rename(self, uuid) 

1314 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + self.uuid 

1315 if self.raw: 

1316 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + self.uuid 

1317 self.path = os.path.join(self.sr.path, self.fileName) 

1318 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1319 

1320 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1321 if self.sr.lvActivator.get(oldUuid, False): 

1322 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1323 

1324 ns = lvhdutil.NS_PREFIX_LVM + self.sr.uuid 

1325 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1326 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1327 RefCounter.reset(oldUuid, ns) 

1328 

1329 @override 

1330 def delete(self) -> None: 

1331 if len(self.children) > 0: 

1332 raise util.SMException("VDI %s has children, can't delete" % \ 

1333 self.uuid) 

1334 self.sr.lock() 

1335 try: 

1336 self.sr.lvmCache.remove(self.fileName) 

1337 self.sr.forgetVDI(self.uuid) 

1338 finally: 

1339 self.sr.unlock() 

1340 RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

1341 VDI.delete(self) 

1342 

1343 @override 

1344 def getSizeVHD(self) -> int: 

1345 if self._sizeVHD == -1: 

1346 self._loadInfoSizeVHD() 

1347 return self._sizeVHD 

1348 

1349 def _loadInfoSizeVHD(self): 

1350 """Get the physical utilization of the VHD file. We do it individually 

1351 (and not using the VHD batch scanner) as an optimization: this info is 

1352 relatively expensive and we need it only for VDI's involved in 

1353 coalescing.""" 

1354 if self.raw: 

1355 return 

1356 self._activate() 

1357 self._sizeVHD = vhdutil.getSizePhys(self.path) 

1358 if self._sizeVHD <= 0: 

1359 raise util.SMException("phys size of %s = %d" % \ 

1360 (self, self._sizeVHD)) 

1361 

1362 @override 

1363 def getAllocatedSize(self) -> int: 

1364 if self._sizeAllocated == -1: 

1365 self._loadInfoSizeAllocated() 

1366 return self._sizeAllocated 

1367 

1368 def _loadInfoSizeAllocated(self): 

1369 """ 

1370 Get the allocated size of the VHD volume. 

1371 """ 

1372 if self.raw: 

1373 return 

1374 self._activate() 

1375 self._sizeAllocated = vhdutil.getAllocatedSize(self.path) 

1376 

1377 @override 

1378 def _loadInfoHidden(self) -> None: 

1379 if self.raw: 

1380 self.hidden = self.sr.lvmCache.getHidden(self.fileName) 

1381 else: 

1382 VDI._loadInfoHidden(self) 

1383 

1384 @override 

1385 def _setHidden(self, hidden=True) -> None: 

1386 if self.raw: 

1387 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1388 self.hidden = hidden 

1389 else: 

1390 VDI._setHidden(self, hidden) 

1391 

1392 @override 

1393 def __str__(self) -> str: 

1394 strType = "VHD" 

1395 if self.raw: 

1396 strType = "RAW" 

1397 strHidden = "" 

1398 if self.hidden: 

1399 strHidden = "*" 

1400 strSizeVHD = "" 

1401 if self._sizeVHD > 0: 

1402 strSizeVHD = Util.num2str(self._sizeVHD) 

1403 strSizeAllocated = "" 

1404 if self._sizeAllocated >= 0: 

1405 strSizeAllocated = Util.num2str(self._sizeAllocated) 

1406 strActive = "n" 

1407 if self.lvActive: 

1408 strActive = "a" 

1409 if self.lvOpen: 

1410 strActive += "o" 

1411 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1412 Util.num2str(self.sizeVirt), strSizeVHD, strSizeAllocated, 

1413 Util.num2str(self.sizeLV), strActive) 

1414 

1415 @override 

1416 def validate(self, fast=False) -> None: 

1417 if not self.raw: 

1418 VDI.validate(self, fast) 

1419 

1420 @override 

1421 def _doCoalesce(self) -> None: 

1422 """LVHD parents must first be activated, inflated, and made writable""" 

1423 try: 

1424 self._activateChain() 

1425 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1426 self.parent.validate() 

1427 self.inflateParentForCoalesce() 

1428 VDI._doCoalesce(self) 

1429 finally: 

1430 self.parent._loadInfoSizeVHD() 

1431 self.parent.deflate() 

1432 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1433 

1434 @override 

1435 def _setParent(self, parent) -> None: 

1436 self._activate() 

1437 if self.lvReadonly: 

1438 self.sr.lvmCache.setReadonly(self.fileName, False) 

1439 

1440 try: 

1441 vhdutil.setParent(self.path, parent.path, parent.raw) 

1442 finally: 

1443 if self.lvReadonly: 

1444 self.sr.lvmCache.setReadonly(self.fileName, True) 

1445 self._deactivate() 

1446 self.parent = parent 

1447 self.parentUuid = parent.uuid 

1448 parent.children.append(self) 

1449 try: 

1450 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1451 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1452 (self.uuid, self.parentUuid)) 

1453 except: 

1454 Util.log("Failed to update the vhd-parent with %s for child %s" % \ 

1455 (self.parentUuid, self.uuid)) 

1456 

1457 def _activate(self): 

1458 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1459 

1460 def _activateChain(self): 

1461 vdi = self 

1462 while vdi: 

1463 vdi._activate() 

1464 vdi = vdi.parent 

1465 

1466 def _deactivate(self): 

1467 self.sr.lvActivator.deactivate(self.uuid, False) 

1468 

1469 @override 

1470 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1471 "ensure the virtual size of 'self' is at least 'size'" 

1472 self._activate() 

1473 if not self.raw: 

1474 VDI._increaseSizeVirt(self, size, atomic) 

1475 return 

1476 

1477 # raw VDI case 

1478 offset = self.sizeLV 

1479 if self.sizeVirt < size: 

1480 oldSize = self.sizeLV 

1481 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1482 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1483 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1484 offset = oldSize 

1485 unfinishedZero = False 

1486 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1487 if jval: 

1488 unfinishedZero = True 

1489 offset = int(jval) 

1490 length = self.sizeLV - offset 

1491 if not length: 

1492 return 

1493 

1494 if unfinishedZero: 

1495 Util.log(" ==> Redoing unfinished zeroing out") 

1496 else: 

1497 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1498 str(offset)) 

1499 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1500 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1501 func = lambda: util.zeroOut(self.path, offset, length) 

1502 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1503 VDI.POLL_INTERVAL, 0) 

1504 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1505 

1506 @override 

1507 def _setSizeVirt(self, size) -> None: 

1508 """WARNING: do not call this method directly unless all VDIs in the 

1509 subtree are guaranteed to be unplugged (and remain so for the duration 

1510 of the operation): this operation is only safe for offline VHDs""" 

1511 self._activate() 

1512 jFile = lvhdutil.createVHDJournalLV(self.sr.lvmCache, self.uuid, 

1513 vhdutil.MAX_VHD_JOURNAL_SIZE) 

1514 try: 

1515 lvhdutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, 

1516 size, jFile) 

1517 finally: 

1518 lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid) 

1519 

1520 @override 

1521 def _queryVHDBlocks(self) -> bytes: 

1522 self._activate() 

1523 return VDI._queryVHDBlocks(self) 

1524 

1525 @override 

1526 def _calcExtraSpaceForCoalescing(self) -> int: 

1527 if self.parent.raw: 

1528 return 0 # raw parents are never deflated in the first place 

1529 sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData()) 

1530 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1531 return sizeCoalesced - self.parent.sizeLV 

1532 

1533 @override 

1534 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1535 """How much extra space in the SR will be required to 

1536 [live-]leaf-coalesce this VDI""" 

1537 # we can deflate the leaf to minimize the space requirements 

1538 deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD()) 

1539 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1540 

1541 @override 

1542 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1543 return self._calcExtraSpaceForCoalescing() + \ 

1544 lvhdutil.calcSizeLV(self.getSizeVHD()) 

1545 

1546 

1547class LinstorVDI(VDI): 

1548 """Object representing a VDI in a LINSTOR SR""" 

1549 

1550 VOLUME_LOCK_TIMEOUT = 30 

1551 

1552 @override 

1553 def load(self, info=None) -> None: 

1554 self.parentUuid = info.parentUuid 

1555 self.scanError = True 

1556 self.parent = None 

1557 self.children = [] 

1558 

1559 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1560 self.path = self.sr._linstor.build_device_path(self.fileName) 

1561 

1562 if not info: 

1563 try: 

1564 info = self.sr._vhdutil.get_vhd_info(self.uuid) 

1565 except util.SMException: 

1566 Util.log( 

1567 ' [VDI {}: failed to read VHD metadata]'.format(self.uuid) 

1568 ) 

1569 return 

1570 

1571 self.parentUuid = info.parentUuid 

1572 self.sizeVirt = info.sizeVirt 

1573 self._sizeVHD = -1 

1574 self._sizeAllocated = -1 

1575 self.drbd_size = -1 

1576 self.hidden = info.hidden 

1577 self.scanError = False 

1578 self.vdi_type = vhdutil.VDI_TYPE_VHD 

1579 

1580 @override 

1581 def getSizeVHD(self, fetch=False) -> int: 

1582 if self._sizeVHD < 0 or fetch: 

1583 self._sizeVHD = self.sr._vhdutil.get_size_phys(self.uuid) 

1584 return self._sizeVHD 

1585 

1586 def getDrbdSize(self, fetch=False): 

1587 if self.drbd_size < 0 or fetch: 

1588 self.drbd_size = self.sr._vhdutil.get_drbd_size(self.uuid) 

1589 return self.drbd_size 

1590 

1591 @override 

1592 def getAllocatedSize(self) -> int: 

1593 if self._sizeAllocated == -1: 

1594 if not self.raw: 

1595 self._sizeAllocated = self.sr._vhdutil.get_allocated_size(self.uuid) 

1596 return self._sizeAllocated 

1597 

1598 def inflate(self, size): 

1599 if self.raw: 

1600 return 

1601 self.sr.lock() 

1602 try: 

1603 # Ensure we use the real DRBD size and not the cached one. 

1604 # Why? Because this attribute can be changed if volume is resized by user. 

1605 self.drbd_size = self.getDrbdSize(fetch=True) 

1606 self.sr._vhdutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) 

1607 finally: 

1608 self.sr.unlock() 

1609 self.drbd_size = -1 

1610 self._sizeVHD = -1 

1611 self._sizeAllocated = -1 

1612 

1613 def deflate(self): 

1614 if self.raw: 

1615 return 

1616 self.sr.lock() 

1617 try: 

1618 # Ensure we use the real sizes and not the cached info. 

1619 self.drbd_size = self.getDrbdSize(fetch=True) 

1620 self._sizeVHD = self.getSizeVHD(fetch=True) 

1621 self.sr._vhdutil.force_deflate(self.path, self._sizeVHD, self.drbd_size, zeroize=False) 

1622 finally: 

1623 self.sr.unlock() 

1624 self.drbd_size = -1 

1625 self._sizeVHD = -1 

1626 self._sizeAllocated = -1 

1627 

1628 def inflateFully(self): 

1629 if not self.raw: 

1630 self.inflate(LinstorVhdUtil.compute_volume_size(self.sizeVirt, self.vdi_type)) 

1631 

1632 @override 

1633 def rename(self, uuid) -> None: 

1634 Util.log('Renaming {} -> {} (path={})'.format( 

1635 self.uuid, uuid, self.path 

1636 )) 

1637 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1638 VDI.rename(self, uuid) 

1639 

1640 @override 

1641 def delete(self) -> None: 

1642 if len(self.children) > 0: 

1643 raise util.SMException( 

1644 'VDI {} has children, can\'t delete'.format(self.uuid) 

1645 ) 

1646 self.sr.lock() 

1647 try: 

1648 self.sr._linstor.destroy_volume(self.uuid) 

1649 self.sr.forgetVDI(self.uuid) 

1650 finally: 

1651 self.sr.unlock() 

1652 VDI.delete(self) 

1653 

1654 @override 

1655 def validate(self, fast=False) -> None: 

1656 if not self.raw and not self.sr._vhdutil.check(self.uuid, fast=fast): 

1657 raise util.SMException('VHD {} corrupted'.format(self)) 

1658 

1659 @override 

1660 def pause(self, failfast=False) -> None: 

1661 self.sr._linstor.ensure_volume_is_not_locked( 

1662 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1663 ) 

1664 return super(LinstorVDI, self).pause(failfast) 

1665 

1666 @override 

1667 def coalesce(self) -> int: 

1668 # Note: We raise `SMException` here to skip the current coalesce in case of failure. 

1669 # Using another exception we can't execute the next coalesce calls. 

1670 return self.sr._vhdutil.force_coalesce(self.path) * 512 

1671 

1672 @override 

1673 def getParent(self) -> str: 

1674 return self.sr._vhdutil.get_parent( 

1675 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1676 ) 

1677 

1678 @override 

1679 def repair(self, parent_uuid) -> None: 

1680 self.sr._vhdutil.force_repair( 

1681 self.sr._linstor.get_device_path(parent_uuid) 

1682 ) 

1683 

1684 @override 

1685 def _relinkSkip(self) -> None: 

1686 abortFlag = IPCFlag(self.sr.uuid) 

1687 for child in self.children: 

1688 if abortFlag.test(FLAG_TYPE_ABORT): 

1689 raise AbortException('Aborting due to signal') 

1690 Util.log( 

1691 ' Relinking {} from {} to {}'.format( 

1692 child, self, self.parent 

1693 ) 

1694 ) 

1695 

1696 session = child.sr.xapi.session 

1697 sr_uuid = child.sr.uuid 

1698 vdi_uuid = child.uuid 

1699 try: 

1700 self.sr._linstor.ensure_volume_is_not_locked( 

1701 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1702 ) 

1703 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1704 child._setParent(self.parent) 

1705 finally: 

1706 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1707 self.children = [] 

1708 

1709 @override 

1710 def _setParent(self, parent) -> None: 

1711 self.sr._linstor.get_device_path(self.uuid) 

1712 self.sr._vhdutil.force_parent(self.path, parent.path) 

1713 self.parent = parent 

1714 self.parentUuid = parent.uuid 

1715 parent.children.append(self) 

1716 try: 

1717 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1718 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1719 (self.uuid, self.parentUuid)) 

1720 except: 

1721 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1722 (self.uuid, self.parentUuid)) 

1723 

1724 @override 

1725 def _doCoalesce(self) -> None: 

1726 try: 

1727 self._activateChain() 

1728 self.parent.validate() 

1729 self._inflateParentForCoalesce() 

1730 VDI._doCoalesce(self) 

1731 finally: 

1732 self.parent.deflate() 

1733 

1734 def _activateChain(self): 

1735 vdi = self 

1736 while vdi: 

1737 try: 

1738 p = self.sr._linstor.get_device_path(vdi.uuid) 

1739 except Exception as e: 

1740 # Use SMException to skip coalesce. 

1741 # Otherwise the GC is stopped... 

1742 raise util.SMException(str(e)) 

1743 vdi = vdi.parent 

1744 

1745 @override 

1746 def _setHidden(self, hidden=True) -> None: 

1747 HIDDEN_TAG = 'hidden' 

1748 

1749 if self.raw: 

1750 self.sr._linstor.update_volume_metadata(self.uuid, { 

1751 HIDDEN_TAG: hidden 

1752 }) 

1753 self.hidden = hidden 

1754 else: 

1755 VDI._setHidden(self, hidden) 

1756 

1757 @override 

1758 def _increaseSizeVirt(self, size, atomic=True): 

1759 if self.raw: 

1760 offset = self.drbd_size 

1761 if self.sizeVirt < size: 

1762 oldSize = self.drbd_size 

1763 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size) 

1764 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size)) 

1765 self.sr._linstor.resize_volume(self.uuid, self.drbd_size) 

1766 offset = oldSize 

1767 unfinishedZero = False 

1768 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid) 

1769 if jval: 

1770 unfinishedZero = True 

1771 offset = int(jval) 

1772 length = self.drbd_size - offset 

1773 if not length: 

1774 return 

1775 

1776 if unfinishedZero: 

1777 Util.log(" ==> Redoing unfinished zeroing out") 

1778 else: 

1779 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset)) 

1780 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1781 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1782 func = lambda: util.zeroOut(self.path, offset, length) 

1783 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) 

1784 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid) 

1785 return 

1786 

1787 if self.sizeVirt >= size: 

1788 return 

1789 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ 

1790 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1791 

1792 msize = self.sr._vhdutil.get_max_resize_size(self.uuid) * 1024 * 1024 

1793 if (size <= msize): 

1794 self.sr._vhdutil.set_size_virt_fast(self.path, size) 

1795 else: 

1796 if atomic: 

1797 vdiList = self._getAllSubtree() 

1798 self.sr.lock() 

1799 try: 

1800 self.sr.pauseVDIs(vdiList) 

1801 try: 

1802 self._setSizeVirt(size) 

1803 finally: 

1804 self.sr.unpauseVDIs(vdiList) 

1805 finally: 

1806 self.sr.unlock() 

1807 else: 

1808 self._setSizeVirt(size) 

1809 

1810 self.sizeVirt = self.sr._vhdutil.get_size_virt(self.uuid) 

1811 

1812 @override 

1813 def _setSizeVirt(self, size) -> None: 

1814 jfile = self.uuid + '-jvhd' 

1815 self.sr._linstor.create_volume( 

1816 jfile, vhdutil.MAX_VHD_JOURNAL_SIZE, persistent=False, volume_name=jfile 

1817 ) 

1818 try: 

1819 self.inflate(LinstorVhdUtil.compute_volume_size(size, self.vdi_type)) 

1820 self.sr._vhdutil.set_size_virt(size, jfile) 

1821 finally: 

1822 try: 

1823 self.sr._linstor.destroy_volume(jfile) 

1824 except Exception: 

1825 # We can ignore it, in any case this volume is not persistent. 

1826 pass 

1827 

1828 @override 

1829 def _queryVHDBlocks(self) -> bytes: 

1830 return self.sr._vhdutil.get_block_bitmap(self.uuid) 

1831 

1832 def _inflateParentForCoalesce(self): 

1833 if self.parent.raw: 

1834 return 

1835 inc = self._calcExtraSpaceForCoalescing() 

1836 if inc > 0: 

1837 self.parent.inflate(self.parent.getDrbdSize() + inc) 

1838 

1839 @override 

1840 def _calcExtraSpaceForCoalescing(self) -> int: 

1841 if self.parent.raw: 

1842 return 0 

1843 size_coalesced = LinstorVhdUtil.compute_volume_size( 

1844 self._getCoalescedSizeData(), self.vdi_type 

1845 ) 

1846 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) 

1847 return size_coalesced - self.parent.getDrbdSize() 

1848 

1849 @override 

1850 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1851 assert self.getDrbdSize() > 0 

1852 assert self.getSizeVHD() > 0 

1853 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) 

1854 assert deflate_diff >= 0 

1855 return self._calcExtraSpaceForCoalescing() - deflate_diff 

1856 

1857 @override 

1858 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1859 assert self.getSizeVHD() > 0 

1860 return self._calcExtraSpaceForCoalescing() + \ 

1861 LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) 

1862 

1863################################################################################ 

1864# 

1865# SR 

1866# 

1867class SR(object): 

1868 class LogFilter: 

1869 def __init__(self, sr): 

1870 self.sr = sr 

1871 self.stateLogged = False 

1872 self.prevState = {} 

1873 self.currState = {} 

1874 

1875 def logState(self): 

1876 changes = "" 

1877 self.currState.clear() 

1878 for vdi in self.sr.vdiTrees: 

1879 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

1880 if not self.prevState.get(vdi.uuid) or \ 

1881 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

1882 changes += self.currState[vdi.uuid] 

1883 

1884 for uuid in self.prevState: 

1885 if not self.currState.get(uuid): 

1886 changes += "Tree %s gone\n" % uuid 

1887 

1888 result = "SR %s (%d VDIs in %d VHD trees): " % \ 

1889 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

1890 

1891 if len(changes) > 0: 

1892 if self.stateLogged: 

1893 result += "showing only VHD trees that changed:" 

1894 result += "\n%s" % changes 

1895 else: 

1896 result += "no changes" 

1897 

1898 for line in result.split("\n"): 

1899 Util.log("%s" % line) 

1900 self.prevState.clear() 

1901 for key, val in self.currState.items(): 

1902 self.prevState[key] = val 

1903 self.stateLogged = True 

1904 

1905 def logNewVDI(self, uuid): 

1906 if self.stateLogged: 

1907 Util.log("Found new VDI when scanning: %s" % uuid) 

1908 

1909 def _getTreeStr(self, vdi, indent=8): 

1910 treeStr = "%s%s\n" % (" " * indent, vdi) 

1911 for child in vdi.children: 

1912 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

1913 return treeStr 

1914 

1915 TYPE_FILE = "file" 

1916 TYPE_LVHD = "lvhd" 

1917 TYPE_LINSTOR = "linstor" 

1918 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

1919 

1920 LOCK_RETRY_INTERVAL = 3 

1921 LOCK_RETRY_ATTEMPTS = 20 

1922 LOCK_RETRY_ATTEMPTS_LOCK = 100 

1923 

1924 SCAN_RETRY_ATTEMPTS = 3 

1925 

1926 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

1927 TMP_RENAME_PREFIX = "OLD_" 

1928 

1929 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

1930 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

1931 

1932 @staticmethod 

1933 def getInstance(uuid, xapiSession, createLock=True, force=False): 

1934 xapi = XAPI(xapiSession, uuid) 

1935 type = normalizeType(xapi.srRecord["type"]) 

1936 if type == SR.TYPE_FILE: 

1937 return FileSR(uuid, xapi, createLock, force) 

1938 elif type == SR.TYPE_LVHD: 

1939 return LVHDSR(uuid, xapi, createLock, force) 

1940 elif type == SR.TYPE_LINSTOR: 

1941 return LinstorSR(uuid, xapi, createLock, force) 

1942 raise util.SMException("SR type %s not recognized" % type) 

1943 

1944 def __init__(self, uuid, xapi, createLock, force): 

1945 self.logFilter = self.LogFilter(self) 

1946 self.uuid = uuid 

1947 self.path = "" 

1948 self.name = "" 

1949 self.vdis = {} 

1950 self.vdiTrees = [] 

1951 self.journaler = None 

1952 self.xapi = xapi 

1953 self._locked = 0 

1954 self._srLock = None 

1955 if createLock: 1955 ↛ 1956line 1955 didn't jump to line 1956, because the condition on line 1955 was never true

1956 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, self.uuid) 

1957 else: 

1958 Util.log("Requested no SR locking") 

1959 self.name = self.xapi.srRecord["name_label"] 

1960 self._failedCoalesceTargets = [] 

1961 

1962 if not self.xapi.isPluggedHere(): 

1963 if force: 1963 ↛ 1964line 1963 didn't jump to line 1964, because the condition on line 1963 was never true

1964 Util.log("SR %s not attached on this host, ignoring" % uuid) 

1965 else: 

1966 if not self.wait_for_plug(): 

1967 raise util.SMException("SR %s not attached on this host" % uuid) 

1968 

1969 if force: 1969 ↛ 1970line 1969 didn't jump to line 1970, because the condition on line 1969 was never true

1970 Util.log("Not checking if we are Master (SR %s)" % uuid) 

1971 elif not self.xapi.isMaster(): 1971 ↛ 1972line 1971 didn't jump to line 1972, because the condition on line 1971 was never true

1972 raise util.SMException("This host is NOT master, will not run") 

1973 

1974 self.no_space_candidates = {} 

1975 

1976 def msg_cleared(self, xapi_session, msg_ref): 

1977 try: 

1978 msg = xapi_session.xenapi.message.get_record(msg_ref) 

1979 except XenAPI.Failure: 

1980 return True 

1981 

1982 return msg is None 

1983 

1984 def check_no_space_candidates(self): 

1985 xapi_session = self.xapi.getSession() 

1986 

1987 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE) 

1988 if self.no_space_candidates: 

1989 if msg_id is None or self.msg_cleared(xapi_session, msg_id): 

1990 util.SMlog("Could not coalesce due to a lack of space " 

1991 f"in SR {self.uuid}") 

1992 msg_body = ("Unable to perform data coalesce due to a lack " 

1993 f"of space in SR {self.uuid}") 

1994 msg_id = xapi_session.xenapi.message.create( 

1995 'SM_GC_NO_SPACE', 

1996 3, 

1997 "SR", 

1998 self.uuid, 

1999 msg_body) 

2000 xapi_session.xenapi.SR.remove_from_sm_config( 

2001 self.xapi.srRef, VDI.DB_GC_NO_SPACE) 

2002 xapi_session.xenapi.SR.add_to_sm_config( 

2003 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id) 

2004 

2005 for candidate in self.no_space_candidates.values(): 

2006 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id) 

2007 elif msg_id is not None: 

2008 # Everything was coalescable, remove the message 

2009 xapi_session.xenapi.message.destroy(msg_id) 

2010 

2011 def clear_no_space_msg(self, vdi): 

2012 msg_id = None 

2013 try: 

2014 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE) 

2015 except XenAPI.Failure: 

2016 pass 

2017 

2018 self.no_space_candidates.pop(vdi.uuid, None) 

2019 if msg_id is not None: 2019 ↛ exitline 2019 didn't return from function 'clear_no_space_msg', because the condition on line 2019 was never false

2020 vdi.delConfig(VDI.DB_GC_NO_SPACE) 

2021 

2022 

2023 def wait_for_plug(self): 

2024 for _ in range(1, 10): 

2025 time.sleep(2) 

2026 if self.xapi.isPluggedHere(): 

2027 return True 

2028 return False 

2029 

2030 def gcEnabled(self, refresh=True): 

2031 if refresh: 

2032 self.xapi.srRecord = \ 

2033 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

2034 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

2035 Util.log("GC is disabled for this SR, abort") 

2036 return False 

2037 return True 

2038 

2039 def scan(self, force=False) -> None: 

2040 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

2041 update VDI objects if they already exist""" 

2042 pass 

2043 

2044 def scanLocked(self, force=False): 

2045 self.lock() 

2046 try: 

2047 self.scan(force) 

2048 finally: 

2049 self.unlock() 

2050 

2051 def getVDI(self, uuid): 

2052 return self.vdis.get(uuid) 

2053 

2054 def hasWork(self): 

2055 if len(self.findGarbage()) > 0: 

2056 return True 

2057 if self.findCoalesceable(): 

2058 return True 

2059 if self.findLeafCoalesceable(): 

2060 return True 

2061 if self.needUpdateBlockInfo(): 

2062 return True 

2063 return False 

2064 

2065 def findCoalesceable(self): 

2066 """Find a coalesceable VDI. Return a vdi that should be coalesced 

2067 (choosing one among all coalesceable candidates according to some 

2068 criteria) or None if there is no VDI that could be coalesced""" 

2069 

2070 candidates = [] 

2071 

2072 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

2073 if srSwitch == "false": 

2074 Util.log("Coalesce disabled for this SR") 

2075 return candidates 

2076 

2077 # finish any VDI for which a relink journal entry exists first 

2078 journals = self.journaler.getAll(VDI.JRN_RELINK) 

2079 for uuid in journals: 

2080 vdi = self.getVDI(uuid) 

2081 if vdi and vdi not in self._failedCoalesceTargets: 

2082 return vdi 

2083 

2084 for vdi in self.vdis.values(): 

2085 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

2086 candidates.append(vdi) 

2087 Util.log("%s is coalescable" % vdi.uuid) 

2088 

2089 self.xapi.update_task_progress("coalescable", len(candidates)) 

2090 

2091 # pick one in the tallest tree 

2092 treeHeight = dict() 

2093 for c in candidates: 

2094 height = c.getTreeRoot().getTreeHeight() 

2095 if treeHeight.get(height): 

2096 treeHeight[height].append(c) 

2097 else: 

2098 treeHeight[height] = [c] 

2099 

2100 freeSpace = self.getFreeSpace() 

2101 heights = list(treeHeight.keys()) 

2102 heights.sort(reverse=True) 

2103 for h in heights: 

2104 for c in treeHeight[h]: 

2105 spaceNeeded = c._calcExtraSpaceForCoalescing() 

2106 if spaceNeeded <= freeSpace: 

2107 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

2108 self.clear_no_space_msg(c) 

2109 return c 

2110 else: 

2111 self.no_space_candidates[c.uuid] = c 

2112 Util.log("No space to coalesce %s (free space: %d)" % \ 

2113 (c, freeSpace)) 

2114 return None 

2115 

2116 def getSwitch(self, key): 

2117 return self.xapi.srRecord["other_config"].get(key) 

2118 

2119 def forbiddenBySwitch(self, switch, condition, fail_msg): 

2120 srSwitch = self.getSwitch(switch) 

2121 ret = False 

2122 if srSwitch: 

2123 ret = srSwitch == condition 

2124 

2125 if ret: 

2126 Util.log(fail_msg) 

2127 

2128 return ret 

2129 

2130 def leafCoalesceForbidden(self): 

2131 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

2132 "false", 

2133 "Coalesce disabled for this SR") or 

2134 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

2135 VDI.LEAFCLSC_DISABLED, 

2136 "Leaf-coalesce disabled for this SR")) 

2137 

2138 def findLeafCoalesceable(self): 

2139 """Find leaf-coalesceable VDIs in each VHD tree""" 

2140 

2141 candidates = [] 

2142 if self.leafCoalesceForbidden(): 

2143 return candidates 

2144 

2145 self.gatherLeafCoalesceable(candidates) 

2146 

2147 self.xapi.update_task_progress("coalescable", len(candidates)) 

2148 

2149 freeSpace = self.getFreeSpace() 

2150 for candidate in candidates: 

2151 # check the space constraints to see if leaf-coalesce is actually 

2152 # feasible for this candidate 

2153 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

2154 spaceNeededLive = spaceNeeded 

2155 if spaceNeeded > freeSpace: 

2156 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

2157 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

2158 spaceNeeded = spaceNeededLive 

2159 

2160 if spaceNeeded <= freeSpace: 

2161 Util.log("Leaf-coalesce candidate: %s" % candidate) 

2162 self.clear_no_space_msg(candidate) 

2163 return candidate 

2164 else: 

2165 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

2166 (candidate, freeSpace)) 

2167 if spaceNeededLive <= freeSpace: 

2168 Util.log("...but enough space if skip snap-coalesce") 

2169 candidate.setConfig(VDI.DB_LEAFCLSC, 

2170 VDI.LEAFCLSC_OFFLINE) 

2171 self.no_space_candidates[candidate.uuid] = candidate 

2172 

2173 return None 

2174 

2175 def gatherLeafCoalesceable(self, candidates): 

2176 for vdi in self.vdis.values(): 

2177 if not vdi.isLeafCoalesceable(): 

2178 continue 

2179 if vdi in self._failedCoalesceTargets: 

2180 continue 

2181 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

2182 Util.log("Skipping reset-on-boot %s" % vdi) 

2183 continue 

2184 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

2185 Util.log("Skipping allow_caching=true %s" % vdi) 

2186 continue 

2187 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

2188 Util.log("Leaf-coalesce disabled for %s" % vdi) 

2189 continue 

2190 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

2191 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

2192 continue 

2193 candidates.append(vdi) 

2194 

2195 def coalesce(self, vdi, dryRun=False): 

2196 """Coalesce vdi onto parent""" 

2197 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

2198 if dryRun: 2198 ↛ 2199line 2198 didn't jump to line 2199, because the condition on line 2198 was never true

2199 return 

2200 

2201 try: 

2202 self._coalesce(vdi) 

2203 except util.SMException as e: 

2204 if isinstance(e, AbortException): 2204 ↛ 2205line 2204 didn't jump to line 2205, because the condition on line 2204 was never true

2205 self.cleanup() 

2206 raise 

2207 else: 

2208 self._failedCoalesceTargets.append(vdi) 

2209 Util.logException("coalesce") 

2210 Util.log("Coalesce failed, skipping") 

2211 self.cleanup() 

2212 

2213 def coalesceLeaf(self, vdi, dryRun=False): 

2214 """Leaf-coalesce vdi onto parent""" 

2215 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

2216 if dryRun: 

2217 return 

2218 

2219 try: 

2220 uuid = vdi.uuid 

2221 try: 

2222 # "vdi" object will no longer be valid after this call 

2223 self._coalesceLeaf(vdi) 

2224 finally: 

2225 vdi = self.getVDI(uuid) 

2226 if vdi: 

2227 vdi.delConfig(vdi.DB_LEAFCLSC) 

2228 except AbortException: 

2229 self.cleanup() 

2230 raise 

2231 except (util.SMException, XenAPI.Failure) as e: 

2232 self._failedCoalesceTargets.append(vdi) 

2233 Util.logException("leaf-coalesce") 

2234 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

2235 self.cleanup() 

2236 

2237 def garbageCollect(self, dryRun=False): 

2238 vdiList = self.findGarbage() 

2239 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

2240 for vdi in vdiList: 

2241 Util.log(" %s" % vdi) 

2242 if not dryRun: 

2243 self.deleteVDIs(vdiList) 

2244 self.cleanupJournals(dryRun) 

2245 

2246 def findGarbage(self): 

2247 vdiList = [] 

2248 for vdi in self.vdiTrees: 

2249 vdiList.extend(vdi.getAllPrunable()) 

2250 return vdiList 

2251 

2252 def deleteVDIs(self, vdiList) -> None: 

2253 for vdi in vdiList: 

2254 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

2255 raise AbortException("Aborting due to signal") 

2256 Util.log("Deleting unlinked VDI %s" % vdi) 

2257 self.deleteVDI(vdi) 

2258 

2259 def deleteVDI(self, vdi) -> None: 

2260 assert(len(vdi.children) == 0) 

2261 del self.vdis[vdi.uuid] 

2262 if vdi.parent: 2262 ↛ 2264line 2262 didn't jump to line 2264, because the condition on line 2262 was never false

2263 vdi.parent.children.remove(vdi) 

2264 if vdi in self.vdiTrees: 2264 ↛ 2265line 2264 didn't jump to line 2265, because the condition on line 2264 was never true

2265 self.vdiTrees.remove(vdi) 

2266 vdi.delete() 

2267 

2268 def forgetVDI(self, vdiUuid) -> None: 

2269 self.xapi.forgetVDI(self.uuid, vdiUuid) 

2270 

2271 def pauseVDIs(self, vdiList) -> None: 

2272 paused = [] 

2273 failed = False 

2274 for vdi in vdiList: 

2275 try: 

2276 vdi.pause() 

2277 paused.append(vdi) 

2278 except: 

2279 Util.logException("pauseVDIs") 

2280 failed = True 

2281 break 

2282 

2283 if failed: 

2284 self.unpauseVDIs(paused) 

2285 raise util.SMException("Failed to pause VDIs") 

2286 

2287 def unpauseVDIs(self, vdiList): 

2288 failed = False 

2289 for vdi in vdiList: 

2290 try: 

2291 vdi.unpause() 

2292 except: 

2293 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

2294 failed = True 

2295 if failed: 

2296 raise util.SMException("Failed to unpause VDIs") 

2297 

2298 def getFreeSpace(self) -> int: 

2299 return 0 

2300 

2301 def cleanup(self): 

2302 Util.log("In cleanup") 

2303 return 

2304 

2305 @override 

2306 def __str__(self) -> str: 

2307 if self.name: 

2308 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

2309 else: 

2310 ret = "%s" % self.uuid 

2311 return ret 

2312 

2313 def lock(self): 

2314 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

2315 signal to avoid deadlocking (trying to acquire the SR lock while the 

2316 lock is held by a process that is trying to abort us)""" 

2317 if not self._srLock: 

2318 return 

2319 

2320 if self._locked == 0: 

2321 abortFlag = IPCFlag(self.uuid) 

2322 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

2323 if self._srLock.acquireNoblock(): 

2324 self._locked += 1 

2325 return 

2326 if abortFlag.test(FLAG_TYPE_ABORT): 

2327 raise AbortException("Abort requested") 

2328 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2329 raise util.SMException("Unable to acquire the SR lock") 

2330 

2331 self._locked += 1 

2332 

2333 def unlock(self): 

2334 if not self._srLock: 2334 ↛ 2336line 2334 didn't jump to line 2336, because the condition on line 2334 was never false

2335 return 

2336 assert(self._locked > 0) 

2337 self._locked -= 1 

2338 if self._locked == 0: 

2339 self._srLock.release() 

2340 

2341 def needUpdateBlockInfo(self) -> bool: 

2342 for vdi in self.vdis.values(): 

2343 if vdi.scanError or len(vdi.children) == 0: 

2344 continue 

2345 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2346 return True 

2347 return False 

2348 

2349 def updateBlockInfo(self) -> None: 

2350 for vdi in self.vdis.values(): 

2351 if vdi.scanError or len(vdi.children) == 0: 

2352 continue 

2353 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2354 vdi.updateBlockInfo() 

2355 

2356 def cleanupCoalesceJournals(self): 

2357 """Remove stale coalesce VDI indicators""" 

2358 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2359 for uuid, jval in entries.items(): 

2360 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2361 

2362 def cleanupJournals(self, dryRun=False): 

2363 """delete journal entries for non-existing VDIs""" 

2364 for t in [LVHDVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2365 entries = self.journaler.getAll(t) 

2366 for uuid, jval in entries.items(): 

2367 if self.getVDI(uuid): 

2368 continue 

2369 if t == SR.JRN_CLONE: 

2370 baseUuid, clonUuid = jval.split("_") 

2371 if self.getVDI(baseUuid): 

2372 continue 

2373 Util.log(" Deleting stale '%s' journal entry for %s " 

2374 "(%s)" % (t, uuid, jval)) 

2375 if not dryRun: 

2376 self.journaler.remove(t, uuid) 

2377 

2378 def cleanupCache(self, maxAge=-1) -> int: 

2379 return 0 

2380 

2381 def _coalesce(self, vdi): 

2382 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2382 ↛ 2385line 2382 didn't jump to line 2385, because the condition on line 2382 was never true

2383 # this means we had done the actual coalescing already and just 

2384 # need to finish relinking and/or refreshing the children 

2385 Util.log("==> Coalesce apparently already done: skipping") 

2386 else: 

2387 # JRN_COALESCE is used to check which VDI is being coalesced in 

2388 # order to decide whether to abort the coalesce. We remove the 

2389 # journal as soon as the VHD coalesce step is done, because we 

2390 # don't expect the rest of the process to take long 

2391 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2392 vdi._doCoalesce() 

2393 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2394 

2395 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2396 

2397 # we now need to relink the children: lock the SR to prevent ops 

2398 # like SM.clone from manipulating the VDIs we'll be relinking and 

2399 # rescan the SR first in case the children changed since the last 

2400 # scan 

2401 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2402 

2403 self.lock() 

2404 try: 

2405 vdi.parent._tagChildrenForRelink() 

2406 self.scan() 

2407 vdi._relinkSkip() 

2408 finally: 

2409 self.unlock() 

2410 # Reload the children to leave things consistent 

2411 vdi.parent._reloadChildren(vdi) 

2412 

2413 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2414 self.deleteVDI(vdi) 

2415 

2416 class CoalesceTracker: 

2417 GRACE_ITERATIONS = 2 

2418 MAX_ITERATIONS_NO_PROGRESS = 3 

2419 MAX_ITERATIONS = 10 

2420 MAX_INCREASE_FROM_MINIMUM = 1.2 

2421 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2422 " --> Final size {finSize}" 

2423 

2424 def __init__(self, sr): 

2425 self.itsNoProgress = 0 

2426 self.its = 0 

2427 self.minSize = float("inf") 

2428 self.history = [] 

2429 self.reason = "" 

2430 self.startSize = None 

2431 self.finishSize = None 

2432 self.sr = sr 

2433 self.grace_remaining = self.GRACE_ITERATIONS 

2434 

2435 def abortCoalesce(self, prevSize, curSize): 

2436 self.its += 1 

2437 self.history.append(self.HISTORY_STRING.format(its=self.its, 

2438 initSize=prevSize, 

2439 finSize=curSize)) 

2440 

2441 self.finishSize = curSize 

2442 

2443 if self.startSize is None: 

2444 self.startSize = prevSize 

2445 

2446 if curSize < self.minSize: 

2447 self.minSize = curSize 

2448 

2449 if prevSize < self.minSize: 

2450 self.minSize = prevSize 

2451 

2452 if self.its == 1: 

2453 # Skip evaluating conditions on first iteration 

2454 return False 

2455 

2456 if prevSize < curSize: 

2457 self.itsNoProgress += 1 

2458 Util.log("No progress, attempt:" 

2459 " {attempt}".format(attempt=self.itsNoProgress)) 

2460 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2461 else: 

2462 # We made progress 

2463 return False 

2464 

2465 if self.its > self.MAX_ITERATIONS: 

2466 max = self.MAX_ITERATIONS 

2467 self.reason = \ 

2468 "Max iterations ({max}) exceeded".format(max=max) 

2469 return True 

2470 

2471 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS: 

2472 max = self.MAX_ITERATIONS_NO_PROGRESS 

2473 self.reason = \ 

2474 "No progress made for {max} iterations".format(max=max) 

2475 return True 

2476 

2477 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2478 if curSize > maxSizeFromMin: 

2479 self.grace_remaining -= 1 

2480 if self.grace_remaining == 0: 

2481 self.reason = "Unexpected bump in size," \ 

2482 " compared to minimum achieved" 

2483 

2484 return True 

2485 

2486 return False 

2487 

2488 def printSizes(self): 

2489 Util.log("Starting size was {size}" 

2490 .format(size=self.startSize)) 

2491 Util.log("Final size was {size}" 

2492 .format(size=self.finishSize)) 

2493 Util.log("Minimum size achieved was {size}" 

2494 .format(size=self.minSize)) 

2495 

2496 def printReasoning(self): 

2497 Util.log("Aborted coalesce") 

2498 for hist in self.history: 

2499 Util.log(hist) 

2500 Util.log(self.reason) 

2501 self.printSizes() 

2502 

2503 def printSummary(self): 

2504 if self.its == 0: 

2505 return 

2506 

2507 if self.reason: 2507 ↛ 2508line 2507 didn't jump to line 2508, because the condition on line 2507 was never true

2508 Util.log("Aborted coalesce") 

2509 Util.log(self.reason) 

2510 else: 

2511 Util.log("Coalesce summary") 

2512 

2513 Util.log(f"Performed {self.its} iterations") 

2514 self.printSizes() 

2515 

2516 

2517 def _coalesceLeaf(self, vdi): 

2518 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2519 complete due to external changes, namely vdi_delete and vdi_snapshot 

2520 that alter leaf-coalescibility of vdi""" 

2521 tracker = self.CoalesceTracker(self) 

2522 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2523 prevSizeVHD = vdi.getSizeVHD() 

2524 if not self._snapshotCoalesce(vdi): 2524 ↛ 2525line 2524 didn't jump to line 2525, because the condition on line 2524 was never true

2525 return False 

2526 if tracker.abortCoalesce(prevSizeVHD, vdi.getSizeVHD()): 

2527 tracker.printReasoning() 

2528 raise util.SMException("VDI {uuid} could not be coalesced" 

2529 .format(uuid=vdi.uuid)) 

2530 tracker.printSummary() 

2531 return self._liveLeafCoalesce(vdi) 

2532 

2533 def calcStorageSpeed(self, startTime, endTime, vhdSize): 

2534 speed = None 

2535 total_time = endTime - startTime 

2536 if total_time > 0: 

2537 speed = float(vhdSize) / float(total_time) 

2538 return speed 

2539 

2540 def writeSpeedToFile(self, speed): 

2541 content = [] 

2542 speedFile = None 

2543 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2544 self.lock() 

2545 try: 

2546 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2547 lines = "" 

2548 if not os.path.isfile(path): 

2549 lines = str(speed) + "\n" 

2550 else: 

2551 speedFile = open(path, "r+") 

2552 content = speedFile.readlines() 

2553 content.append(str(speed) + "\n") 

2554 if len(content) > N_RUNNING_AVERAGE: 

2555 del content[0] 

2556 lines = "".join(content) 

2557 

2558 util.atomicFileWrite(path, VAR_RUN, lines) 

2559 finally: 

2560 if speedFile is not None: 

2561 speedFile.close() 

2562 Util.log("Closing file: {myfile}".format(myfile=path)) 

2563 self.unlock() 

2564 

2565 def recordStorageSpeed(self, startTime, endTime, vhdSize): 

2566 speed = self.calcStorageSpeed(startTime, endTime, vhdSize) 

2567 if speed is None: 

2568 return 

2569 

2570 self.writeSpeedToFile(speed) 

2571 

2572 def getStorageSpeed(self): 

2573 speedFile = None 

2574 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2575 self.lock() 

2576 try: 

2577 speed = None 

2578 if os.path.isfile(path): 

2579 speedFile = open(path) 

2580 content = speedFile.readlines() 

2581 try: 

2582 content = [float(i) for i in content] 

2583 except ValueError: 

2584 Util.log("Something bad in the speed log:{log}". 

2585 format(log=speedFile.readlines())) 

2586 return speed 

2587 

2588 if len(content): 

2589 speed = sum(content) / float(len(content)) 

2590 if speed <= 0: 2590 ↛ 2592line 2590 didn't jump to line 2592, because the condition on line 2590 was never true

2591 # Defensive, should be impossible. 

2592 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2593 format(speed=speed, uuid=self.uuid)) 

2594 speed = None 

2595 else: 

2596 Util.log("Speed file empty for SR: {uuid}". 

2597 format(uuid=self.uuid)) 

2598 else: 

2599 Util.log("Speed log missing for SR: {uuid}". 

2600 format(uuid=self.uuid)) 

2601 return speed 

2602 finally: 

2603 if not (speedFile is None): 

2604 speedFile.close() 

2605 self.unlock() 

2606 

2607 def _snapshotCoalesce(self, vdi): 

2608 # Note that because we are not holding any locks here, concurrent SM 

2609 # operations may change this tree under our feet. In particular, vdi 

2610 # can be deleted, or it can be snapshotted. 

2611 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2612 Util.log("Single-snapshotting %s" % vdi) 

2613 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2614 try: 

2615 ret = self.xapi.singleSnapshotVDI(vdi) 

2616 Util.log("Single-snapshot returned: %s" % ret) 

2617 except XenAPI.Failure as e: 

2618 if util.isInvalidVDI(e): 

2619 Util.log("The VDI appears to have been concurrently deleted") 

2620 return False 

2621 raise 

2622 self.scanLocked() 

2623 tempSnap = vdi.parent 

2624 if not tempSnap.isCoalesceable(): 

2625 Util.log("The VDI appears to have been concurrently snapshotted") 

2626 return False 

2627 Util.log("Coalescing parent %s" % tempSnap) 

2628 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2629 vhdSize = vdi.getSizeVHD() 

2630 self._coalesce(tempSnap) 

2631 if not vdi.isLeafCoalesceable(): 

2632 Util.log("The VDI tree appears to have been altered since") 

2633 return False 

2634 return True 

2635 

2636 def _liveLeafCoalesce(self, vdi) -> bool: 

2637 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2638 self.lock() 

2639 try: 

2640 self.scan() 

2641 if not self.getVDI(vdi.uuid): 

2642 Util.log("The VDI appears to have been deleted meanwhile") 

2643 return False 

2644 if not vdi.isLeafCoalesceable(): 

2645 Util.log("The VDI is no longer leaf-coalesceable") 

2646 return False 

2647 

2648 uuid = vdi.uuid 

2649 vdi.pause(failfast=True) 

2650 try: 

2651 try: 

2652 # "vdi" object will no longer be valid after this call 

2653 self._doCoalesceLeaf(vdi) 

2654 except: 

2655 Util.logException("_doCoalesceLeaf") 

2656 self._handleInterruptedCoalesceLeaf() 

2657 raise 

2658 finally: 

2659 vdi = self.getVDI(uuid) 

2660 if vdi: 

2661 vdi.ensureUnpaused() 

2662 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2663 if vdiOld: 

2664 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2665 self.deleteVDI(vdiOld) 

2666 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2667 finally: 

2668 self.cleanup() 

2669 self.unlock() 

2670 self.logFilter.logState() 

2671 return True 

2672 

2673 def _doCoalesceLeaf(self, vdi): 

2674 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2675 offline/atomic context""" 

2676 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2677 self._prepareCoalesceLeaf(vdi) 

2678 vdi.parent._setHidden(False) 

2679 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2680 vdi.validate(True) 

2681 vdi.parent.validate(True) 

2682 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2683 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2684 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 

2685 Util.log("Leaf-coalesce forced, will not use timeout") 

2686 timeout = 0 

2687 vdi._coalesceVHD(timeout) 

2688 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2689 vdi.parent.validate(True) 

2690 #vdi._verifyContents(timeout / 2) 

2691 

2692 # rename 

2693 vdiUuid = vdi.uuid 

2694 oldName = vdi.fileName 

2695 origParentUuid = vdi.parent.uuid 

2696 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2697 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2698 vdi.parent.rename(vdiUuid) 

2699 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2700 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2701 

2702 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2703 # garbage 

2704 

2705 # update the VDI record 

2706 vdi.parent.delConfig(VDI.DB_VHD_PARENT) 

2707 if vdi.parent.raw: 

2708 vdi.parent.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_RAW) 

2709 vdi.parent.delConfig(VDI.DB_VHD_BLOCKS) 

2710 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2711 

2712 self._updateNode(vdi) 

2713 

2714 # delete the obsolete leaf & inflate the parent (in that order, to 

2715 # minimize free space requirements) 

2716 parent = vdi.parent 

2717 vdi._setHidden(True) 

2718 vdi.parent.children = [] 

2719 vdi.parent = None 

2720 

2721 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2722 freeSpace = self.getFreeSpace() 

2723 if freeSpace < extraSpace: 

2724 # don't delete unless we need the space: deletion is time-consuming 

2725 # because it requires contacting the slaves, and we're paused here 

2726 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2727 self.deleteVDI(vdi) 

2728 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2729 

2730 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2731 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2732 

2733 self.forgetVDI(origParentUuid) 

2734 self._finishCoalesceLeaf(parent) 

2735 self._updateSlavesOnResize(parent) 

2736 

2737 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

2738 assert(not parent.raw) # raw parents not supported 

2739 extra = child.getSizeVHD() - parent.getSizeVHD() 

2740 if extra < 0: 

2741 extra = 0 

2742 return extra 

2743 

2744 def _prepareCoalesceLeaf(self, vdi) -> None: 

2745 pass 

2746 

2747 def _updateNode(self, vdi) -> None: 

2748 pass 

2749 

2750 def _finishCoalesceLeaf(self, parent) -> None: 

2751 pass 

2752 

2753 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

2754 pass 

2755 

2756 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None: 

2757 pass 

2758 

2759 def _updateSlavesOnResize(self, vdi) -> None: 

2760 pass 

2761 

2762 def _removeStaleVDIs(self, uuidsPresent) -> None: 

2763 for uuid in list(self.vdis.keys()): 

2764 if not uuid in uuidsPresent: 

2765 Util.log("VDI %s disappeared since last scan" % \ 

2766 self.vdis[uuid]) 

2767 del self.vdis[uuid] 

2768 

2769 def _handleInterruptedCoalesceLeaf(self) -> None: 

2770 """An interrupted leaf-coalesce operation may leave the VHD tree in an 

2771 inconsistent state. If the old-leaf VDI is still present, we revert the 

2772 operation (in case the original error is persistent); otherwise we must 

2773 finish the operation""" 

2774 pass 

2775 

2776 def _buildTree(self, force): 

2777 self.vdiTrees = [] 

2778 for vdi in self.vdis.values(): 

2779 if vdi.parentUuid: 

2780 parent = self.getVDI(vdi.parentUuid) 

2781 if not parent: 

2782 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2783 self.vdiTrees.append(vdi) 

2784 continue 

2785 if force: 

2786 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2787 (vdi.parentUuid, vdi.uuid)) 

2788 self.vdiTrees.append(vdi) 

2789 continue 

2790 else: 

2791 raise util.SMException("Parent VDI %s of %s not " \ 

2792 "found" % (vdi.parentUuid, vdi.uuid)) 

2793 vdi.parent = parent 

2794 parent.children.append(vdi) 

2795 else: 

2796 self.vdiTrees.append(vdi) 

2797 

2798 

2799class FileSR(SR): 

2800 TYPE = SR.TYPE_FILE 

2801 CACHE_FILE_EXT = ".vhdcache" 

2802 # cache cleanup actions 

2803 CACHE_ACTION_KEEP = 0 

2804 CACHE_ACTION_REMOVE = 1 

2805 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

2806 

2807 def __init__(self, uuid, xapi, createLock, force): 

2808 SR.__init__(self, uuid, xapi, createLock, force) 

2809 self.path = "/var/run/sr-mount/%s" % self.uuid 

2810 self.journaler = fjournaler.Journaler(self.path) 

2811 

2812 @override 

2813 def scan(self, force=False) -> None: 

2814 if not util.pathexists(self.path): 

2815 raise util.SMException("directory %s not found!" % self.uuid) 

2816 vhds = self._scan(force) 

2817 for uuid, vhdInfo in vhds.items(): 

2818 vdi = self.getVDI(uuid) 

2819 if not vdi: 

2820 self.logFilter.logNewVDI(uuid) 

2821 vdi = FileVDI(self, uuid, False) 

2822 self.vdis[uuid] = vdi 

2823 vdi.load(vhdInfo) 

2824 uuidsPresent = list(vhds.keys()) 

2825 rawList = [x for x in os.listdir(self.path) if x.endswith(vhdutil.FILE_EXTN_RAW)] 

2826 for rawName in rawList: 

2827 uuid = FileVDI.extractUuid(rawName) 

2828 uuidsPresent.append(uuid) 

2829 vdi = self.getVDI(uuid) 

2830 if not vdi: 

2831 self.logFilter.logNewVDI(uuid) 

2832 vdi = FileVDI(self, uuid, True) 

2833 self.vdis[uuid] = vdi 

2834 self._removeStaleVDIs(uuidsPresent) 

2835 self._buildTree(force) 

2836 self.logFilter.logState() 

2837 self._handleInterruptedCoalesceLeaf() 

2838 

2839 @override 

2840 def getFreeSpace(self) -> int: 

2841 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

2842 

2843 @override 

2844 def deleteVDIs(self, vdiList) -> None: 

2845 rootDeleted = False 

2846 for vdi in vdiList: 

2847 if not vdi.parent: 

2848 rootDeleted = True 

2849 break 

2850 SR.deleteVDIs(self, vdiList) 

2851 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

2852 self.xapi.markCacheSRsDirty() 

2853 

2854 @override 

2855 def cleanupCache(self, maxAge=-1) -> int: 

2856 """Clean up IntelliCache cache files. Caches for leaf nodes are 

2857 removed when the leaf node no longer exists or its allow-caching 

2858 attribute is not set. Caches for parent nodes are removed when the 

2859 parent node no longer exists or it hasn't been used in more than 

2860 <maxAge> hours. 

2861 Return number of caches removed. 

2862 """ 

2863 numRemoved = 0 

2864 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

2865 Util.log("Found %d cache files" % len(cacheFiles)) 

2866 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

2867 for cacheFile in cacheFiles: 

2868 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

2869 action = self.CACHE_ACTION_KEEP 

2870 rec = self.xapi.getRecordVDI(uuid) 

2871 if not rec: 

2872 Util.log("Cache %s: VDI doesn't exist" % uuid) 

2873 action = self.CACHE_ACTION_REMOVE 

2874 elif rec["managed"] and not rec["allow_caching"]: 

2875 Util.log("Cache %s: caching disabled" % uuid) 

2876 action = self.CACHE_ACTION_REMOVE 

2877 elif not rec["managed"] and maxAge >= 0: 

2878 lastAccess = datetime.datetime.fromtimestamp( \ 

2879 os.path.getatime(os.path.join(self.path, cacheFile))) 

2880 if lastAccess < cutoff: 

2881 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

2882 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

2883 

2884 if action == self.CACHE_ACTION_KEEP: 

2885 Util.log("Keeping cache %s" % uuid) 

2886 continue 

2887 

2888 lockId = uuid 

2889 parentUuid = None 

2890 if rec and rec["managed"]: 

2891 parentUuid = rec["sm_config"].get("vhd-parent") 

2892 if parentUuid: 

2893 lockId = parentUuid 

2894 

2895 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

2896 cacheLock.acquire() 

2897 try: 

2898 if self._cleanupCache(uuid, action): 

2899 numRemoved += 1 

2900 finally: 

2901 cacheLock.release() 

2902 return numRemoved 

2903 

2904 def _cleanupCache(self, uuid, action): 

2905 assert(action != self.CACHE_ACTION_KEEP) 

2906 rec = self.xapi.getRecordVDI(uuid) 

2907 if rec and rec["allow_caching"]: 

2908 Util.log("Cache %s appears to have become valid" % uuid) 

2909 return False 

2910 

2911 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

2912 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

2913 if tapdisk: 

2914 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

2915 Util.log("Cache %s still in use" % uuid) 

2916 return False 

2917 Util.log("Shutting down tapdisk for %s" % fullPath) 

2918 tapdisk.shutdown() 

2919 

2920 Util.log("Deleting file %s" % fullPath) 

2921 os.unlink(fullPath) 

2922 return True 

2923 

2924 def _isCacheFileName(self, name): 

2925 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

2926 name.endswith(self.CACHE_FILE_EXT) 

2927 

2928 def _scan(self, force): 

2929 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2930 error = False 

2931 pattern = os.path.join(self.path, "*%s" % vhdutil.FILE_EXTN_VHD) 

2932 vhds = vhdutil.getAllVHDs(pattern, FileVDI.extractUuid) 

2933 for uuid, vhdInfo in vhds.items(): 

2934 if vhdInfo.error: 

2935 error = True 

2936 break 

2937 if not error: 

2938 return vhds 

2939 Util.log("Scan error on attempt %d" % i) 

2940 if force: 

2941 return vhds 

2942 raise util.SMException("Scan error") 

2943 

2944 @override 

2945 def deleteVDI(self, vdi) -> None: 

2946 self._checkSlaves(vdi) 

2947 SR.deleteVDI(self, vdi) 

2948 

2949 def _checkSlaves(self, vdi): 

2950 onlineHosts = self.xapi.getOnlineHosts() 

2951 abortFlag = IPCFlag(self.uuid) 

2952 for pbdRecord in self.xapi.getAttachedPBDs(): 

2953 hostRef = pbdRecord["host"] 

2954 if hostRef == self.xapi._hostRef: 

2955 continue 

2956 if abortFlag.test(FLAG_TYPE_ABORT): 

2957 raise AbortException("Aborting due to signal") 

2958 try: 

2959 self._checkSlave(hostRef, vdi) 

2960 except util.CommandException: 

2961 if hostRef in onlineHosts: 

2962 raise 

2963 

2964 def _checkSlave(self, hostRef, vdi): 

2965 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

2966 Util.log("Checking with slave: %s" % repr(call)) 

2967 _host = self.xapi.session.xenapi.host 

2968 text = _host.call_plugin( * call) 

2969 

2970 @override 

2971 def _handleInterruptedCoalesceLeaf(self) -> None: 

2972 entries = self.journaler.getAll(VDI.JRN_LEAF) 

2973 for uuid, parentUuid in entries.items(): 

2974 fileList = os.listdir(self.path) 

2975 childName = uuid + vhdutil.FILE_EXTN_VHD 

2976 tmpChildName = self.TMP_RENAME_PREFIX + uuid + vhdutil.FILE_EXTN_VHD 

2977 parentName1 = parentUuid + vhdutil.FILE_EXTN_VHD 

2978 parentName2 = parentUuid + vhdutil.FILE_EXTN_RAW 

2979 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

2980 if parentPresent or tmpChildName in fileList: 

2981 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

2982 else: 

2983 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

2984 self.journaler.remove(VDI.JRN_LEAF, uuid) 

2985 vdi = self.getVDI(uuid) 

2986 if vdi: 

2987 vdi.ensureUnpaused() 

2988 

2989 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2990 Util.log("*** UNDO LEAF-COALESCE") 

2991 parent = self.getVDI(parentUuid) 

2992 if not parent: 

2993 parent = self.getVDI(childUuid) 

2994 if not parent: 

2995 raise util.SMException("Neither %s nor %s found" % \ 

2996 (parentUuid, childUuid)) 

2997 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

2998 parent.rename(parentUuid) 

2999 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3000 

3001 child = self.getVDI(childUuid) 

3002 if not child: 

3003 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3004 if not child: 

3005 raise util.SMException("Neither %s nor %s found" % \ 

3006 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3007 Util.log("Renaming child back to %s" % childUuid) 

3008 child.rename(childUuid) 

3009 Util.log("Updating the VDI record") 

3010 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3011 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3012 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3013 

3014 if child.hidden: 

3015 child._setHidden(False) 

3016 if not parent.hidden: 

3017 parent._setHidden(True) 

3018 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3019 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3020 Util.log("*** leaf-coalesce undo successful") 

3021 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3022 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3023 

3024 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3025 Util.log("*** FINISH LEAF-COALESCE") 

3026 vdi = self.getVDI(childUuid) 

3027 if not vdi: 

3028 raise util.SMException("VDI %s not found" % childUuid) 

3029 try: 

3030 self.forgetVDI(parentUuid) 

3031 except XenAPI.Failure: 

3032 pass 

3033 self._updateSlavesOnResize(vdi) 

3034 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3035 Util.log("*** finished leaf-coalesce successfully") 

3036 

3037 

3038class LVHDSR(SR): 

3039 TYPE = SR.TYPE_LVHD 

3040 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

3041 

3042 def __init__(self, uuid, xapi, createLock, force): 

3043 SR.__init__(self, uuid, xapi, createLock, force) 

3044 self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid) 

3045 self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName) 

3046 

3047 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid) 

3048 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref) 

3049 lvm_conf = other_conf.get('lvm-conf') if other_conf else None 

3050 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf) 

3051 

3052 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

3053 self.journaler = journaler.Journaler(self.lvmCache) 

3054 

3055 @override 

3056 def deleteVDI(self, vdi) -> None: 

3057 if self.lvActivator.get(vdi.uuid, False): 

3058 self.lvActivator.deactivate(vdi.uuid, False) 

3059 self._checkSlaves(vdi) 

3060 SR.deleteVDI(self, vdi) 

3061 

3062 @override 

3063 def forgetVDI(self, vdiUuid) -> None: 

3064 SR.forgetVDI(self, vdiUuid) 

3065 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

3066 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

3067 

3068 @override 

3069 def getFreeSpace(self) -> int: 

3070 stats = lvutil._getVGstats(self.vgName) 

3071 return stats['physical_size'] - stats['physical_utilisation'] 

3072 

3073 @override 

3074 def cleanup(self): 

3075 if not self.lvActivator.deactivateAll(): 

3076 Util.log("ERROR deactivating LVs while cleaning up") 

3077 

3078 @override 

3079 def needUpdateBlockInfo(self) -> bool: 

3080 for vdi in self.vdis.values(): 

3081 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

3082 continue 

3083 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

3084 return True 

3085 return False 

3086 

3087 @override 

3088 def updateBlockInfo(self) -> None: 

3089 numUpdated = 0 

3090 for vdi in self.vdis.values(): 

3091 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

3092 continue 

3093 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

3094 vdi.updateBlockInfo() 

3095 numUpdated += 1 

3096 if numUpdated: 

3097 # deactivate the LVs back sooner rather than later. If we don't 

3098 # now, by the time this thread gets to deactivations, another one 

3099 # might have leaf-coalesced a node and deleted it, making the child 

3100 # inherit the refcount value and preventing the correct decrement 

3101 self.cleanup() 

3102 

3103 @override 

3104 def scan(self, force=False) -> None: 

3105 vdis = self._scan(force) 

3106 for uuid, vdiInfo in vdis.items(): 

3107 vdi = self.getVDI(uuid) 

3108 if not vdi: 

3109 self.logFilter.logNewVDI(uuid) 

3110 vdi = LVHDVDI(self, uuid, 

3111 vdiInfo.vdiType == vhdutil.VDI_TYPE_RAW) 

3112 self.vdis[uuid] = vdi 

3113 vdi.load(vdiInfo) 

3114 self._removeStaleVDIs(vdis.keys()) 

3115 self._buildTree(force) 

3116 self.logFilter.logState() 

3117 self._handleInterruptedCoalesceLeaf() 

3118 

3119 def _scan(self, force): 

3120 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3121 error = False 

3122 self.lvmCache.refresh() 

3123 vdis = lvhdutil.getVDIInfo(self.lvmCache) 

3124 for uuid, vdiInfo in vdis.items(): 

3125 if vdiInfo.scanError: 

3126 error = True 

3127 break 

3128 if not error: 

3129 return vdis 

3130 Util.log("Scan error, retrying (%d)" % i) 

3131 if force: 

3132 return vdis 

3133 raise util.SMException("Scan error") 

3134 

3135 @override 

3136 def _removeStaleVDIs(self, uuidsPresent) -> None: 

3137 for uuid in list(self.vdis.keys()): 

3138 if not uuid in uuidsPresent: 

3139 Util.log("VDI %s disappeared since last scan" % \ 

3140 self.vdis[uuid]) 

3141 del self.vdis[uuid] 

3142 if self.lvActivator.get(uuid, False): 

3143 self.lvActivator.remove(uuid, False) 

3144 

3145 @override 

3146 def _liveLeafCoalesce(self, vdi) -> bool: 

3147 """If the parent is raw and the child was resized (virt. size), then 

3148 we'll need to resize the parent, which can take a while due to zeroing 

3149 out of the extended portion of the LV. Do it before pausing the child 

3150 to avoid a protracted downtime""" 

3151 if vdi.parent.raw and vdi.sizeVirt > vdi.parent.sizeVirt: 

3152 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3153 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

3154 

3155 return SR._liveLeafCoalesce(self, vdi) 

3156 

3157 @override 

3158 def _prepareCoalesceLeaf(self, vdi) -> None: 

3159 vdi._activateChain() 

3160 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3161 vdi.deflate() 

3162 vdi.inflateParentForCoalesce() 

3163 

3164 @override 

3165 def _updateNode(self, vdi) -> None: 

3166 # fix the refcounts: the remaining node should inherit the binary 

3167 # refcount from the leaf (because if it was online, it should remain 

3168 # refcounted as such), but the normal refcount from the parent (because 

3169 # this node is really the parent node) - minus 1 if it is online (since 

3170 # non-leaf nodes increment their normal counts when they are online and 

3171 # we are now a leaf, storing that 1 in the binary refcount). 

3172 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

3173 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

3174 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

3175 pCnt = pCnt - cBcnt 

3176 assert(pCnt >= 0) 

3177 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

3178 

3179 @override 

3180 def _finishCoalesceLeaf(self, parent) -> None: 

3181 if not parent.isSnapshot() or parent.isAttachedRW(): 

3182 parent.inflateFully() 

3183 else: 

3184 parent.deflate() 

3185 

3186 @override 

3187 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3188 return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV 

3189 

3190 @override 

3191 def _handleInterruptedCoalesceLeaf(self) -> None: 

3192 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3193 for uuid, parentUuid in entries.items(): 

3194 childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid 

3195 tmpChildLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

3196 self.TMP_RENAME_PREFIX + uuid 

3197 parentLV1 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + parentUuid 

3198 parentLV2 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + parentUuid 

3199 parentPresent = (self.lvmCache.checkLV(parentLV1) or \ 

3200 self.lvmCache.checkLV(parentLV2)) 

3201 if parentPresent or self.lvmCache.checkLV(tmpChildLV): 

3202 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3203 else: 

3204 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3205 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3206 vdi = self.getVDI(uuid) 

3207 if vdi: 

3208 vdi.ensureUnpaused() 

3209 

3210 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3211 Util.log("*** UNDO LEAF-COALESCE") 

3212 parent = self.getVDI(parentUuid) 

3213 if not parent: 

3214 parent = self.getVDI(childUuid) 

3215 if not parent: 

3216 raise util.SMException("Neither %s nor %s found" % \ 

3217 (parentUuid, childUuid)) 

3218 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3219 parent.rename(parentUuid) 

3220 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3221 

3222 child = self.getVDI(childUuid) 

3223 if not child: 

3224 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3225 if not child: 

3226 raise util.SMException("Neither %s nor %s found" % \ 

3227 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3228 Util.log("Renaming child back to %s" % childUuid) 

3229 child.rename(childUuid) 

3230 Util.log("Updating the VDI record") 

3231 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3232 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3233 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3234 

3235 # refcount (best effort - assume that it had succeeded if the 

3236 # second rename succeeded; if not, this adjustment will be wrong, 

3237 # leading to a non-deactivation of the LV) 

3238 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

3239 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

3240 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

3241 pCnt = pCnt + cBcnt 

3242 RefCounter.set(parent.uuid, pCnt, 0, ns) 

3243 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

3244 

3245 parent.deflate() 

3246 child.inflateFully() 

3247 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

3248 if child.hidden: 

3249 child._setHidden(False) 

3250 if not parent.hidden: 

3251 parent._setHidden(True) 

3252 if not parent.lvReadonly: 

3253 self.lvmCache.setReadonly(parent.fileName, True) 

3254 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3255 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3256 Util.log("*** leaf-coalesce undo successful") 

3257 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3258 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3259 

3260 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3261 Util.log("*** FINISH LEAF-COALESCE") 

3262 vdi = self.getVDI(childUuid) 

3263 if not vdi: 

3264 raise util.SMException("VDI %s not found" % childUuid) 

3265 vdi.inflateFully() 

3266 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

3267 try: 

3268 self.forgetVDI(parentUuid) 

3269 except XenAPI.Failure: 

3270 pass 

3271 self._updateSlavesOnResize(vdi) 

3272 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3273 Util.log("*** finished leaf-coalesce successfully") 

3274 

3275 def _checkSlaves(self, vdi): 

3276 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

3277 try to check all slaves, including those that the Agent believes are 

3278 offline, but ignore failures for offline hosts. This is to avoid cases 

3279 where the Agent thinks a host is offline but the host is up.""" 

3280 args = {"vgName": self.vgName, 

3281 "action1": "deactivateNoRefcount", 

3282 "lvName1": vdi.fileName, 

3283 "action2": "cleanupLockAndRefcount", 

3284 "uuid2": vdi.uuid, 

3285 "ns2": lvhdutil.NS_PREFIX_LVM + self.uuid} 

3286 onlineHosts = self.xapi.getOnlineHosts() 

3287 abortFlag = IPCFlag(self.uuid) 

3288 for pbdRecord in self.xapi.getAttachedPBDs(): 

3289 hostRef = pbdRecord["host"] 

3290 if hostRef == self.xapi._hostRef: 

3291 continue 

3292 if abortFlag.test(FLAG_TYPE_ABORT): 

3293 raise AbortException("Aborting due to signal") 

3294 Util.log("Checking with slave %s (path %s)" % ( 

3295 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

3296 try: 

3297 self.xapi.ensureInactive(hostRef, args) 

3298 except XenAPI.Failure: 

3299 if hostRef in onlineHosts: 

3300 raise 

3301 

3302 @override 

3303 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

3304 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

3305 if not slaves: 

3306 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

3307 child) 

3308 return 

3309 

3310 tmpName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

3311 self.TMP_RENAME_PREFIX + child.uuid 

3312 args = {"vgName": self.vgName, 

3313 "action1": "deactivateNoRefcount", 

3314 "lvName1": tmpName, 

3315 "action2": "deactivateNoRefcount", 

3316 "lvName2": child.fileName, 

3317 "action3": "refresh", 

3318 "lvName3": child.fileName, 

3319 "action4": "refresh", 

3320 "lvName4": parent.fileName} 

3321 for slave in slaves: 

3322 Util.log("Updating %s, %s, %s on slave %s" % \ 

3323 (tmpName, child.fileName, parent.fileName, 

3324 self.xapi.getRecordHost(slave)['hostname'])) 

3325 text = self.xapi.session.xenapi.host.call_plugin( \ 

3326 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3327 Util.log("call-plugin returned: '%s'" % text) 

3328 

3329 @override 

3330 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None: 

3331 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

3332 if not slaves: 

3333 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

3334 return 

3335 

3336 args = {"vgName": self.vgName, 

3337 "action1": "deactivateNoRefcount", 

3338 "lvName1": oldNameLV, 

3339 "action2": "refresh", 

3340 "lvName2": vdi.fileName, 

3341 "action3": "cleanupLockAndRefcount", 

3342 "uuid3": origParentUuid, 

3343 "ns3": lvhdutil.NS_PREFIX_LVM + self.uuid} 

3344 for slave in slaves: 

3345 Util.log("Updating %s to %s on slave %s" % \ 

3346 (oldNameLV, vdi.fileName, 

3347 self.xapi.getRecordHost(slave)['hostname'])) 

3348 text = self.xapi.session.xenapi.host.call_plugin( \ 

3349 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3350 Util.log("call-plugin returned: '%s'" % text) 

3351 

3352 @override 

3353 def _updateSlavesOnResize(self, vdi) -> None: 

3354 uuids = [x.uuid for x in vdi.getAllLeaves()] 

3355 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

3356 if not slaves: 

3357 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

3358 return 

3359 lvhdutil.lvRefreshOnSlaves(self.xapi.session, self.uuid, self.vgName, 

3360 vdi.fileName, vdi.uuid, slaves) 

3361 

3362 

3363class LinstorSR(SR): 

3364 TYPE = SR.TYPE_LINSTOR 

3365 

3366 def __init__(self, uuid, xapi, createLock, force): 

3367 if not LINSTOR_AVAILABLE: 

3368 raise util.SMException( 

3369 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

3370 ) 

3371 

3372 SR.__init__(self, uuid, xapi, createLock, force) 

3373 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

3374 self._reloadLinstor(journaler_only=True) 

3375 

3376 @override 

3377 def deleteVDI(self, vdi) -> None: 

3378 self._checkSlaves(vdi) 

3379 SR.deleteVDI(self, vdi) 

3380 

3381 @override 

3382 def getFreeSpace(self) -> int: 

3383 return self._linstor.max_volume_size_allowed 

3384 

3385 @override 

3386 def scan(self, force=False) -> None: 

3387 all_vdi_info = self._scan(force) 

3388 for uuid, vdiInfo in all_vdi_info.items(): 

3389 # When vdiInfo is None, the VDI is RAW. 

3390 vdi = self.getVDI(uuid) 

3391 if not vdi: 

3392 self.logFilter.logNewVDI(uuid) 

3393 vdi = LinstorVDI(self, uuid, not vdiInfo) 

3394 self.vdis[uuid] = vdi 

3395 if vdiInfo: 

3396 vdi.load(vdiInfo) 

3397 self._removeStaleVDIs(all_vdi_info.keys()) 

3398 self._buildTree(force) 

3399 self.logFilter.logState() 

3400 self._handleInterruptedCoalesceLeaf() 

3401 

3402 @override 

3403 def pauseVDIs(self, vdiList) -> None: 

3404 self._linstor.ensure_volume_list_is_not_locked( 

3405 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3406 ) 

3407 return super(LinstorSR, self).pauseVDIs(vdiList) 

3408 

3409 def _reloadLinstor(self, journaler_only=False): 

3410 session = self.xapi.session 

3411 host_ref = util.get_this_host_ref(session) 

3412 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3413 

3414 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3415 if pbd is None: 

3416 raise util.SMException('Failed to find PBD') 

3417 

3418 dconf = session.xenapi.PBD.get_device_config(pbd) 

3419 group_name = dconf['group-name'] 

3420 

3421 controller_uri = get_controller_uri() 

3422 self.journaler = LinstorJournaler( 

3423 controller_uri, group_name, logger=util.SMlog 

3424 ) 

3425 

3426 if journaler_only: 

3427 return 

3428 

3429 self._linstor = LinstorVolumeManager( 

3430 controller_uri, 

3431 group_name, 

3432 repair=True, 

3433 logger=util.SMlog 

3434 ) 

3435 self._vhdutil = LinstorVhdUtil(session, self._linstor) 

3436 

3437 def _scan(self, force): 

3438 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3439 self._reloadLinstor() 

3440 error = False 

3441 try: 

3442 all_vdi_info = self._load_vdi_info() 

3443 for uuid, vdiInfo in all_vdi_info.items(): 

3444 if vdiInfo and vdiInfo.error: 

3445 error = True 

3446 break 

3447 if not error: 

3448 return all_vdi_info 

3449 Util.log('Scan error, retrying ({})'.format(i)) 

3450 except Exception as e: 

3451 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3452 Util.log(traceback.format_exc()) 

3453 

3454 if force: 

3455 return all_vdi_info 

3456 raise util.SMException('Scan error') 

3457 

3458 def _load_vdi_info(self): 

3459 all_vdi_info = {} 

3460 

3461 # TODO: Ensure metadata contains the right info. 

3462 

3463 all_volume_info = self._linstor.get_volumes_with_info() 

3464 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3465 for vdi_uuid, volume_info in all_volume_info.items(): 

3466 try: 

3467 volume_metadata = volumes_metadata[vdi_uuid] 

3468 if not volume_info.name and not list(volume_metadata.items()): 

3469 continue # Ignore it, probably deleted. 

3470 

3471 if vdi_uuid.startswith('DELETED_'): 

3472 # Assume it's really a RAW volume of a failed snap without VHD header/footer. 

3473 # We must remove this VDI now without adding it in the VDI list. 

3474 # Otherwise `Relinking` calls and other actions can be launched on it. 

3475 # We don't want that... 

3476 Util.log('Deleting bad VDI {}'.format(vdi_uuid)) 

3477 

3478 self.lock() 

3479 try: 

3480 self._linstor.destroy_volume(vdi_uuid) 

3481 try: 

3482 self.forgetVDI(vdi_uuid) 

3483 except: 

3484 pass 

3485 except Exception as e: 

3486 Util.log('Cannot delete bad VDI: {}'.format(e)) 

3487 finally: 

3488 self.unlock() 

3489 continue 

3490 

3491 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

3492 volume_name = self._linstor.get_volume_name(vdi_uuid) 

3493 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX): 

3494 # Always RAW! 

3495 info = None 

3496 elif vdi_type == vhdutil.VDI_TYPE_VHD: 

3497 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3498 else: 

3499 # Ensure it's not a VHD... 

3500 try: 

3501 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3502 except: 

3503 try: 

3504 self._vhdutil.force_repair( 

3505 self._linstor.get_device_path(vdi_uuid) 

3506 ) 

3507 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3508 except: 

3509 info = None 

3510 

3511 except Exception as e: 

3512 Util.log( 

3513 ' [VDI {}: failed to load VDI info]: {}' 

3514 .format(vdi_uuid, e) 

3515 ) 

3516 info = vhdutil.VHDInfo(vdi_uuid) 

3517 info.error = 1 

3518 

3519 all_vdi_info[vdi_uuid] = info 

3520 

3521 return all_vdi_info 

3522 

3523 @override 

3524 def _prepareCoalesceLeaf(self, vdi) -> None: 

3525 vdi._activateChain() 

3526 vdi.deflate() 

3527 vdi._inflateParentForCoalesce() 

3528 

3529 @override 

3530 def _finishCoalesceLeaf(self, parent) -> None: 

3531 if not parent.isSnapshot() or parent.isAttachedRW(): 

3532 parent.inflateFully() 

3533 else: 

3534 parent.deflate() 

3535 

3536 @override 

3537 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3538 return LinstorVhdUtil.compute_volume_size(parent.sizeVirt, parent.vdi_type) - parent.getDrbdSize() 

3539 

3540 def _hasValidDevicePath(self, uuid): 

3541 try: 

3542 self._linstor.get_device_path(uuid) 

3543 except Exception: 

3544 # TODO: Maybe log exception. 

3545 return False 

3546 return True 

3547 

3548 @override 

3549 def _liveLeafCoalesce(self, vdi) -> bool: 

3550 self.lock() 

3551 try: 

3552 self._linstor.ensure_volume_is_not_locked( 

3553 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3554 ) 

3555 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3556 finally: 

3557 self.unlock() 

3558 

3559 @override 

3560 def _handleInterruptedCoalesceLeaf(self) -> None: 

3561 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3562 for uuid, parentUuid in entries.items(): 

3563 if self._hasValidDevicePath(parentUuid) or \ 

3564 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3565 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3566 else: 

3567 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3568 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3569 vdi = self.getVDI(uuid) 

3570 if vdi: 

3571 vdi.ensureUnpaused() 

3572 

3573 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3574 Util.log('*** UNDO LEAF-COALESCE') 

3575 parent = self.getVDI(parentUuid) 

3576 if not parent: 

3577 parent = self.getVDI(childUuid) 

3578 if not parent: 

3579 raise util.SMException( 

3580 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3581 ) 

3582 Util.log( 

3583 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3584 ) 

3585 parent.rename(parentUuid) 

3586 

3587 child = self.getVDI(childUuid) 

3588 if not child: 

3589 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3590 if not child: 

3591 raise util.SMException( 

3592 'Neither {} nor {} found'.format( 

3593 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3594 ) 

3595 ) 

3596 Util.log('Renaming child back to {}'.format(childUuid)) 

3597 child.rename(childUuid) 

3598 Util.log('Updating the VDI record') 

3599 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3600 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3601 

3602 # TODO: Maybe deflate here. 

3603 

3604 if child.hidden: 

3605 child._setHidden(False) 

3606 if not parent.hidden: 

3607 parent._setHidden(True) 

3608 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3609 Util.log('*** leaf-coalesce undo successful') 

3610 

3611 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3612 Util.log('*** FINISH LEAF-COALESCE') 

3613 vdi = self.getVDI(childUuid) 

3614 if not vdi: 

3615 raise util.SMException('VDI {} not found'.format(childUuid)) 

3616 # TODO: Maybe inflate. 

3617 try: 

3618 self.forgetVDI(parentUuid) 

3619 except XenAPI.Failure: 

3620 pass 

3621 self._updateSlavesOnResize(vdi) 

3622 Util.log('*** finished leaf-coalesce successfully') 

3623 

3624 def _checkSlaves(self, vdi): 

3625 try: 

3626 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3627 for openers in all_openers.values(): 

3628 for opener in openers.values(): 

3629 if opener['process-name'] != 'tapdisk': 

3630 raise util.SMException( 

3631 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3632 ) 

3633 except LinstorVolumeManagerError as e: 

3634 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3635 raise 

3636 

3637 

3638################################################################################ 

3639# 

3640# Helpers 

3641# 

3642def daemonize(): 

3643 pid = os.fork() 

3644 if pid: 

3645 os.waitpid(pid, 0) 

3646 Util.log("New PID [%d]" % pid) 

3647 return False 

3648 os.chdir("/") 

3649 os.setsid() 

3650 pid = os.fork() 

3651 if pid: 

3652 Util.log("Will finish as PID [%d]" % pid) 

3653 os._exit(0) 

3654 for fd in [0, 1, 2]: 

3655 try: 

3656 os.close(fd) 

3657 except OSError: 

3658 pass 

3659 # we need to fill those special fd numbers or pread won't work 

3660 sys.stdin = open("/dev/null", 'r') 

3661 sys.stderr = open("/dev/null", 'w') 

3662 sys.stdout = open("/dev/null", 'w') 

3663 # As we're a new process we need to clear the lock objects 

3664 lock.Lock.clearAll() 

3665 return True 

3666 

3667 

3668def normalizeType(type): 

3669 if type in LVHDSR.SUBTYPES: 

3670 type = SR.TYPE_LVHD 

3671 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3672 # temporary while LVHD is symlinked as LVM 

3673 type = SR.TYPE_LVHD 

3674 if type in [ 

3675 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3676 "moosefs", "xfs", "zfs", "largeblock" 

3677 ]: 

3678 type = SR.TYPE_FILE 

3679 if type in ["linstor"]: 

3680 type = SR.TYPE_LINSTOR 

3681 if type not in SR.TYPES: 

3682 raise util.SMException("Unsupported SR type: %s" % type) 

3683 return type 

3684 

3685GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3686 

3687 

3688def _gc_init_file(sr_uuid): 

3689 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3690 

3691 

3692def _create_init_file(sr_uuid): 

3693 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3694 with open(os.path.join( 

3695 NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init'), 'w+') as f: 

3696 f.write('1') 

3697 

3698 

3699def _gcLoopPause(sr, dryRun=False, immediate=False): 

3700 if immediate: 

3701 return 

3702 

3703 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3704 # point will just return. Otherwise, fall back on an abortable sleep. 

3705 

3706 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3707 

3708 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3708 ↛ exitline 3708 didn't jump to the function exit

3709 lambda *args: None) 

3710 elif os.path.exists(_gc_init_file(sr.uuid)): 

3711 def abortTest(): 

3712 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3713 

3714 # If time.sleep hangs we are in deep trouble, however for 

3715 # completeness we set the timeout of the abort thread to 

3716 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3717 Util.log("GC active, about to go quiet") 

3718 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3718 ↛ exitline 3718 didn't run the lambda on line 3718

3719 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3720 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3721 Util.log("GC active, quiet period ended") 

3722 

3723 

3724def _gcLoop(sr, dryRun=False, immediate=False): 

3725 if not lockGCActive.acquireNoblock(): 3725 ↛ 3726line 3725 didn't jump to line 3726, because the condition on line 3725 was never true

3726 Util.log("Another GC instance already active, exiting") 

3727 return 

3728 

3729 # Check we're still attached after acquiring locks 

3730 if not sr.xapi.isPluggedHere(): 

3731 Util.log("SR no longer attached, exiting") 

3732 return 

3733 

3734 # Clean up Intellicache files 

3735 sr.cleanupCache() 

3736 

3737 # Track how many we do 

3738 coalesced = 0 

3739 task_status = "success" 

3740 try: 

3741 # Check if any work needs to be done 

3742 if not sr.xapi.isPluggedHere(): 3742 ↛ 3743line 3742 didn't jump to line 3743, because the condition on line 3742 was never true

3743 Util.log("SR no longer attached, exiting") 

3744 return 

3745 sr.scanLocked() 

3746 if not sr.hasWork(): 

3747 Util.log("No work, exiting") 

3748 return 

3749 sr.xapi.create_task( 

3750 "Garbage Collection", 

3751 "Garbage collection for SR %s" % sr.uuid) 

3752 _gcLoopPause(sr, dryRun, immediate=immediate) 

3753 while True: 

3754 if SIGTERM: 

3755 Util.log("Term requested") 

3756 return 

3757 

3758 if not sr.xapi.isPluggedHere(): 3758 ↛ 3759line 3758 didn't jump to line 3759, because the condition on line 3758 was never true

3759 Util.log("SR no longer attached, exiting") 

3760 break 

3761 sr.scanLocked() 

3762 if not sr.hasWork(): 

3763 Util.log("No work, exiting") 

3764 break 

3765 

3766 if not lockGCRunning.acquireNoblock(): 3766 ↛ 3767line 3766 didn't jump to line 3767, because the condition on line 3766 was never true

3767 Util.log("Unable to acquire GC running lock.") 

3768 return 

3769 try: 

3770 if not sr.gcEnabled(): 3770 ↛ 3771line 3770 didn't jump to line 3771, because the condition on line 3770 was never true

3771 break 

3772 

3773 sr.xapi.update_task_progress("done", coalesced) 

3774 

3775 sr.cleanupCoalesceJournals() 

3776 # Create the init file here in case startup is waiting on it 

3777 _create_init_file(sr.uuid) 

3778 sr.scanLocked() 

3779 sr.updateBlockInfo() 

3780 

3781 howmany = len(sr.findGarbage()) 

3782 if howmany > 0: 

3783 Util.log("Found %d orphaned vdis" % howmany) 

3784 sr.lock() 

3785 try: 

3786 sr.garbageCollect(dryRun) 

3787 finally: 

3788 sr.unlock() 

3789 sr.xapi.srUpdate() 

3790 

3791 candidate = sr.findCoalesceable() 

3792 if candidate: 

3793 util.fistpoint.activate( 

3794 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

3795 sr.coalesce(candidate, dryRun) 

3796 sr.xapi.srUpdate() 

3797 coalesced += 1 

3798 continue 

3799 

3800 candidate = sr.findLeafCoalesceable() 

3801 if candidate: 3801 ↛ 3808line 3801 didn't jump to line 3808, because the condition on line 3801 was never false

3802 sr.coalesceLeaf(candidate, dryRun) 

3803 sr.xapi.srUpdate() 

3804 coalesced += 1 

3805 continue 

3806 

3807 finally: 

3808 lockGCRunning.release() 3808 ↛ 3813line 3808 didn't jump to line 3813, because the break on line 3771 wasn't executed

3809 except: 

3810 task_status = "failure" 

3811 raise 

3812 finally: 

3813 sr.xapi.set_task_status(task_status) 

3814 Util.log("GC process exiting, no work left") 

3815 _create_init_file(sr.uuid) 

3816 lockGCActive.release() 

3817 

3818 

3819def _xapi_enabled(session, hostref): 

3820 host = session.xenapi.host.get_record(hostref) 

3821 return host['enabled'] 

3822 

3823 

3824def _ensure_xapi_initialised(session): 

3825 """ 

3826 Don't want to start GC until Xapi is fully initialised 

3827 """ 

3828 local_session = None 

3829 if session is None: 

3830 local_session = util.get_localAPI_session() 

3831 session = local_session 

3832 

3833 try: 

3834 hostref = session.xenapi.host.get_by_uuid(util.get_this_host()) 

3835 while not _xapi_enabled(session, hostref): 

3836 util.SMlog("Xapi not ready, GC waiting") 

3837 time.sleep(15) 

3838 finally: 

3839 if local_session is not None: 

3840 local_session.xenapi.session.logout() 

3841 

3842def _gc(session, srUuid, dryRun=False, immediate=False): 

3843 init(srUuid) 

3844 _ensure_xapi_initialised(session) 

3845 sr = SR.getInstance(srUuid, session) 

3846 if not sr.gcEnabled(False): 3846 ↛ 3847line 3846 didn't jump to line 3847, because the condition on line 3846 was never true

3847 return 

3848 

3849 try: 

3850 _gcLoop(sr, dryRun, immediate=immediate) 

3851 finally: 

3852 sr.check_no_space_candidates() 

3853 sr.cleanup() 

3854 sr.logFilter.logState() 

3855 del sr.xapi 

3856 

3857 

3858def _abort(srUuid, soft=False): 

3859 """Aborts an GC/coalesce. 

3860 

3861 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

3862 soft: If set to True and there is a pending abort signal, the function 

3863 doesn't do anything. If set to False, a new abort signal is issued. 

3864 

3865 returns: If soft is set to False, we return True holding lockGCActive. If 

3866 soft is set to False and an abort signal is pending, we return False 

3867 without holding lockGCActive. An exception is raised in case of error.""" 

3868 Util.log("=== SR %s: abort ===" % (srUuid)) 

3869 init(srUuid) 

3870 if not lockGCActive.acquireNoblock(): 

3871 gotLock = False 

3872 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

3873 abortFlag = IPCFlag(srUuid) 

3874 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

3875 return False 

3876 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

3877 gotLock = lockGCActive.acquireNoblock() 

3878 if gotLock: 

3879 break 

3880 time.sleep(SR.LOCK_RETRY_INTERVAL) 

3881 abortFlag.clear(FLAG_TYPE_ABORT) 

3882 if not gotLock: 

3883 raise util.CommandException(code=errno.ETIMEDOUT, 

3884 reason="SR %s: error aborting existing process" % srUuid) 

3885 return True 

3886 

3887 

3888def init(srUuid): 

3889 global lockGCRunning 

3890 if not lockGCRunning: 3890 ↛ 3891line 3890 didn't jump to line 3891, because the condition on line 3890 was never true

3891 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid) 

3892 global lockGCActive 

3893 if not lockGCActive: 3893 ↛ 3894line 3893 didn't jump to line 3894, because the condition on line 3893 was never true

3894 lockGCActive = LockActive(srUuid) 

3895 

3896 

3897class LockActive: 

3898 """ 

3899 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired 

3900 if another process holds the SR lock. 

3901 """ 

3902 def __init__(self, srUuid): 

3903 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

3904 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, srUuid) 

3905 

3906 def acquireNoblock(self): 

3907 self._srLock.acquire() 

3908 

3909 try: 

3910 return self._lock.acquireNoblock() 

3911 finally: 

3912 self._srLock.release() 

3913 

3914 def release(self): 

3915 self._lock.release() 

3916 

3917 

3918def usage(): 

3919 output = """Garbage collect and/or coalesce VHDs in a VHD-based SR 

3920 

3921Parameters: 

3922 -u --uuid UUID SR UUID 

3923 and one of: 

3924 -g --gc garbage collect, coalesce, and repeat while there is work 

3925 -G --gc_force garbage collect once, aborting any current operations 

3926 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

3927 max_age hours 

3928 -a --abort abort any currently running operation (GC or coalesce) 

3929 -q --query query the current state (GC'ing, coalescing or not running) 

3930 -x --disable disable GC/coalesce (will be in effect until you exit) 

3931 -t --debug see Debug below 

3932 

3933Options: 

3934 -b --background run in background (return immediately) (valid for -g only) 

3935 -f --force continue in the presence of VHDs with errors (when doing 

3936 GC, this might cause removal of any such VHDs) (only valid 

3937 for -G) (DANGEROUS) 

3938 

3939Debug: 

3940 The --debug parameter enables manipulation of LVHD VDIs for debugging 

3941 purposes. ** NEVER USE IT ON A LIVE VM ** 

3942 The following parameters are required: 

3943 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

3944 "deflate". 

3945 -v --vdi_uuid VDI UUID 

3946 """ 

3947 #-d --dry-run don't actually perform any SR-modifying operations 

3948 print(output) 

3949 Util.log("(Invalid usage)") 

3950 sys.exit(1) 

3951 

3952 

3953############################################################################## 

3954# 

3955# API 

3956# 

3957def abort(srUuid, soft=False): 

3958 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

3959 """ 

3960 if _abort(srUuid, soft): 

3961 Util.log("abort: releasing the process lock") 

3962 lockGCActive.release() 

3963 return True 

3964 else: 

3965 return False 

3966 

3967 

3968def gc(session, srUuid, inBackground, dryRun=False): 

3969 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

3970 immediately if inBackground=True. 

3971 

3972 The following algorithm is used: 

3973 1. If we are already GC'ing in this SR, return 

3974 2. If we are already coalescing a VDI pair: 

3975 a. Scan the SR and determine if the VDI pair is GC'able 

3976 b. If the pair is not GC'able, return 

3977 c. If the pair is GC'able, abort coalesce 

3978 3. Scan the SR 

3979 4. If there is nothing to collect, nor to coalesce, return 

3980 5. If there is something to collect, GC all, then goto 3 

3981 6. If there is something to coalesce, coalesce one pair, then goto 3 

3982 """ 

3983 Util.log("=== SR %s: gc ===" % srUuid) 

3984 

3985 signal.signal(signal.SIGTERM, receiveSignal) 

3986 

3987 if inBackground: 

3988 if daemonize(): 3988 ↛ exitline 3988 didn't return from function 'gc', because the condition on line 3988 was never false

3989 # we are now running in the background. Catch & log any errors 

3990 # because there is no other way to propagate them back at this 

3991 # point 

3992 

3993 try: 

3994 _gc(None, srUuid, dryRun) 

3995 except AbortException: 

3996 Util.log("Aborted") 

3997 except Exception: 

3998 Util.logException("gc") 

3999 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

4000 os._exit(0) 

4001 else: 

4002 _gc(session, srUuid, dryRun, immediate=True) 

4003 

4004 

4005def start_gc(session, sr_uuid): 

4006 """ 

4007 This function is used to try to start a backgrounded GC session by forking 

4008 the current process. If using the systemd version, call start_gc_service() instead. 

4009 """ 

4010 # don't bother if an instance already running (this is just an 

4011 # optimization to reduce the overhead of forking a new process if we 

4012 # don't have to, but the process will check the lock anyways) 

4013 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid) 

4014 if not lockRunning.acquireNoblock(): 

4015 if should_preempt(session, sr_uuid): 

4016 util.SMlog("Aborting currently-running coalesce of garbage VDI") 

4017 try: 

4018 if not abort(sr_uuid, soft=True): 

4019 util.SMlog("The GC has already been scheduled to re-start") 

4020 except util.CommandException as e: 

4021 if e.code != errno.ETIMEDOUT: 

4022 raise 

4023 util.SMlog('failed to abort the GC') 

4024 else: 

4025 util.SMlog("A GC instance already running, not kicking") 

4026 return 

4027 else: 

4028 lockRunning.release() 

4029 

4030 util.SMlog(f"Starting GC file is {__file__}") 

4031 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'], 

4032 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4033 

4034def start_gc_service(sr_uuid, wait=False): 

4035 """ 

4036 This starts the templated systemd service which runs GC on the given SR UUID. 

4037 If the service was already started, this is a no-op. 

4038 

4039 Because the service is a one-shot with RemainAfterExit=no, when called with 

4040 wait=True this will run the service synchronously and will not return until the 

4041 run has finished. This is used to force a run of the GC instead of just kicking it 

4042 in the background. 

4043 """ 

4044 sr_uuid_esc = sr_uuid.replace("-", "\\x2d") 

4045 util.SMlog(f"Kicking SMGC@{sr_uuid}...") 

4046 cmd=[ "/usr/bin/systemctl", "--quiet" ] 

4047 if not wait: 4047 ↛ 4049line 4047 didn't jump to line 4049, because the condition on line 4047 was never false

4048 cmd.append("--no-block") 

4049 cmd += ["start", f"SMGC@{sr_uuid_esc}"] 

4050 subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4051 

4052 

4053def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

4054 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

4055 the SR lock is held. 

4056 The following algorithm is used: 

4057 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

4058 2. Scan the SR 

4059 3. GC 

4060 4. return 

4061 """ 

4062 Util.log("=== SR %s: gc_force ===" % srUuid) 

4063 init(srUuid) 

4064 sr = SR.getInstance(srUuid, session, lockSR, True) 

4065 if not lockGCActive.acquireNoblock(): 

4066 abort(srUuid) 

4067 else: 

4068 Util.log("Nothing was running, clear to proceed") 

4069 

4070 if force: 

4071 Util.log("FORCED: will continue even if there are VHD errors") 

4072 sr.scanLocked(force) 

4073 sr.cleanupCoalesceJournals() 

4074 

4075 try: 

4076 sr.cleanupCache() 

4077 sr.garbageCollect(dryRun) 

4078 finally: 

4079 sr.cleanup() 

4080 sr.logFilter.logState() 

4081 lockGCActive.release() 

4082 

4083 

4084def get_state(srUuid): 

4085 """Return whether GC/coalesce is currently running or not. This asks systemd for 

4086 the state of the templated SMGC service and will return True if it is "activating" 

4087 or "running" (for completeness, as in practice it will never achieve the latter state) 

4088 """ 

4089 sr_uuid_esc = srUuid.replace("-", "\\x2d") 

4090 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"] 

4091 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4092 state = result.stdout.decode('utf-8').rstrip() 

4093 if state == "activating" or state == "running": 

4094 return True 

4095 return False 

4096 

4097 

4098def should_preempt(session, srUuid): 

4099 sr = SR.getInstance(srUuid, session) 

4100 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

4101 if len(entries) == 0: 

4102 return False 

4103 elif len(entries) > 1: 

4104 raise util.SMException("More than one coalesce entry: " + str(entries)) 

4105 sr.scanLocked() 

4106 coalescedUuid = entries.popitem()[0] 

4107 garbage = sr.findGarbage() 

4108 for vdi in garbage: 

4109 if vdi.uuid == coalescedUuid: 

4110 return True 

4111 return False 

4112 

4113 

4114def get_coalesceable_leaves(session, srUuid, vdiUuids): 

4115 coalesceable = [] 

4116 sr = SR.getInstance(srUuid, session) 

4117 sr.scanLocked() 

4118 for uuid in vdiUuids: 

4119 vdi = sr.getVDI(uuid) 

4120 if not vdi: 

4121 raise util.SMException("VDI %s not found" % uuid) 

4122 if vdi.isLeafCoalesceable(): 

4123 coalesceable.append(uuid) 

4124 return coalesceable 

4125 

4126 

4127def cache_cleanup(session, srUuid, maxAge): 

4128 sr = SR.getInstance(srUuid, session) 

4129 return sr.cleanupCache(maxAge) 

4130 

4131 

4132def debug(sr_uuid, cmd, vdi_uuid): 

4133 Util.log("Debug command: %s" % cmd) 

4134 sr = SR.getInstance(sr_uuid, None) 

4135 if not isinstance(sr, LVHDSR): 

4136 print("Error: not an LVHD SR") 

4137 return 

4138 sr.scanLocked() 

4139 vdi = sr.getVDI(vdi_uuid) 

4140 if not vdi: 

4141 print("Error: VDI %s not found") 

4142 return 

4143 print("Running %s on SR %s" % (cmd, sr)) 

4144 print("VDI before: %s" % vdi) 

4145 if cmd == "activate": 

4146 vdi._activate() 

4147 print("VDI file: %s" % vdi.path) 

4148 if cmd == "deactivate": 

4149 ns = lvhdutil.NS_PREFIX_LVM + sr.uuid 

4150 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

4151 if cmd == "inflate": 

4152 vdi.inflateFully() 

4153 sr.cleanup() 

4154 if cmd == "deflate": 

4155 vdi.deflate() 

4156 sr.cleanup() 

4157 sr.scanLocked() 

4158 print("VDI after: %s" % vdi) 

4159 

4160 

4161def abort_optional_reenable(uuid): 

4162 print("Disabling GC/coalesce for %s" % uuid) 

4163 ret = _abort(uuid) 

4164 input("Press enter to re-enable...") 

4165 print("GC/coalesce re-enabled") 

4166 lockGCRunning.release() 

4167 if ret: 

4168 lockGCActive.release() 

4169 

4170 

4171############################################################################## 

4172# 

4173# CLI 

4174# 

4175def main(): 

4176 action = "" 

4177 uuid = "" 

4178 background = False 

4179 force = False 

4180 dryRun = False 

4181 debug_cmd = "" 

4182 vdi_uuid = "" 

4183 shortArgs = "gGc:aqxu:bfdt:v:" 

4184 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

4185 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

4186 

4187 try: 

4188 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

4189 except getopt.GetoptError: 

4190 usage() 

4191 for o, a in opts: 

4192 if o in ("-g", "--gc"): 

4193 action = "gc" 

4194 if o in ("-G", "--gc_force"): 

4195 action = "gc_force" 

4196 if o in ("-c", "--clean_cache"): 

4197 action = "clean_cache" 

4198 maxAge = int(a) 

4199 if o in ("-a", "--abort"): 

4200 action = "abort" 

4201 if o in ("-q", "--query"): 

4202 action = "query" 

4203 if o in ("-x", "--disable"): 

4204 action = "disable" 

4205 if o in ("-u", "--uuid"): 

4206 uuid = a 

4207 if o in ("-b", "--background"): 

4208 background = True 

4209 if o in ("-f", "--force"): 

4210 force = True 

4211 if o in ("-d", "--dry-run"): 

4212 Util.log("Dry run mode") 

4213 dryRun = True 

4214 if o in ("-t", "--debug"): 

4215 action = "debug" 

4216 debug_cmd = a 

4217 if o in ("-v", "--vdi_uuid"): 

4218 vdi_uuid = a 

4219 

4220 if not action or not uuid: 

4221 usage() 

4222 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

4223 action != "debug" and (debug_cmd or vdi_uuid): 

4224 usage() 

4225 

4226 if action != "query" and action != "debug": 

4227 print("All output goes to log") 

4228 

4229 if action == "gc": 

4230 gc(None, uuid, background, dryRun) 

4231 elif action == "gc_force": 

4232 gc_force(None, uuid, force, dryRun, True) 

4233 elif action == "clean_cache": 

4234 cache_cleanup(None, uuid, maxAge) 

4235 elif action == "abort": 

4236 abort(uuid) 

4237 elif action == "query": 

4238 print("Currently running: %s" % get_state(uuid)) 

4239 elif action == "disable": 

4240 abort_optional_reenable(uuid) 

4241 elif action == "debug": 

4242 debug(uuid, debug_cmd, vdi_uuid) 

4243 

4244 

4245if __name__ == '__main__': 4245 ↛ 4246line 4245 didn't jump to line 4246, because the condition on line 4245 was never true

4246 main()