Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect VHD-based SR's in the background 

19# 

20 

21import os 

22import os.path 

23import sys 

24import time 

25import signal 

26import subprocess 

27import getopt 

28import datetime 

29import traceback 

30import base64 

31import zlib 

32import errno 

33import stat 

34 

35import XenAPI 

36import util 

37import lvutil 

38import vhdutil 

39import lvhdutil 

40import lvmcache 

41import journaler 

42import fjournaler 

43import lock 

44import blktap2 

45import xs_errors 

46from refcounter import RefCounter 

47from ipc import IPCFlag 

48from lvmanager import LVActivator 

49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

50from functools import reduce 

51from time import monotonic as _time 

52 

53try: 

54 from linstorjournaler import LinstorJournaler 

55 from linstorvhdutil import LinstorVhdUtil 

56 from linstorvolumemanager \ 

57 import LinstorVolumeManager, LinstorVolumeManagerError 

58 LINSTOR_AVAILABLE = True 

59except ImportError: 

60 LINSTOR_AVAILABLE = False 

61 

62# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

63# possible due to lvhd_stop_using_() not working correctly. However, we leave 

64# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

65# record for use by the offline tool (which makes the operation safe by pausing 

66# the VM first) 

67AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

68 

69FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

70 

71# process "lock", used simply as an indicator that a process already exists 

72# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

73# check for the fact by trying the lock). 

74LOCK_TYPE_RUNNING = "running" 

75lockRunning = None 

76 

77# process "lock" to indicate that the GC process has been activated but may not 

78# yet be running, stops a second process from being started. 

79LOCK_TYPE_GC_ACTIVE = "gc_active" 

80lockActive = None 

81 

82# Default coalesce error rate limit, in messages per minute. A zero value 

83# disables throttling, and a negative value disables error reporting. 

84DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

85 

86COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

87COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

88VAR_RUN = "/var/run/" 

89SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

90 

91N_RUNNING_AVERAGE = 10 

92 

93NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

94 

95 

96class AbortException(util.SMException): 

97 pass 

98 

99 

100################################################################################ 

101# 

102# Util 

103# 

104class Util: 

105 RET_RC = 1 

106 RET_STDOUT = 2 

107 RET_STDERR = 4 

108 

109 UUID_LEN = 36 

110 

111 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

112 

113 def log(text): 

114 util.SMlog(text, ident="SMGC") 

115 log = staticmethod(log) 

116 

117 def logException(tag): 

118 info = sys.exc_info() 

119 if info[0] == SystemExit: 119 ↛ 121line 119 didn't jump to line 121, because the condition on line 119 was never true

120 # this should not be happening when catching "Exception", but it is 

121 sys.exit(0) 

122 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

123 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

124 Util.log(" ***********************") 

125 Util.log(" * E X C E P T I O N *") 

126 Util.log(" ***********************") 

127 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

128 Util.log(tb) 

129 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

130 logException = staticmethod(logException) 

131 

132 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

133 "Execute a subprocess, then return its return code, stdout, stderr" 

134 proc = subprocess.Popen(args, 

135 stdin=subprocess.PIPE, \ 

136 stdout=subprocess.PIPE, \ 

137 stderr=subprocess.PIPE, \ 

138 shell=True, \ 

139 close_fds=True) 

140 (stdout, stderr) = proc.communicate(inputtext) 

141 stdout = str(stdout) 

142 stderr = str(stderr) 

143 rc = proc.returncode 

144 if log: 

145 Util.log("`%s`: %s" % (args, rc)) 

146 if type(expectedRC) != type([]): 

147 expectedRC = [expectedRC] 

148 if not rc in expectedRC: 

149 reason = stderr.strip() 

150 if stdout.strip(): 

151 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

152 Util.log("Failed: %s" % reason) 

153 raise util.CommandException(rc, args, reason) 

154 

155 if ret == Util.RET_RC: 

156 return rc 

157 if ret == Util.RET_STDERR: 

158 return stderr 

159 return stdout 

160 doexec = staticmethod(doexec) 

161 

162 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): 

163 """execute func in a separate thread and kill it if abortTest signals 

164 so""" 

165 abortSignaled = abortTest() # check now before we clear resultFlag 

166 resultFlag = IPCFlag(ns) 

167 resultFlag.clearAll() 

168 pid = os.fork() 

169 if pid: 

170 startTime = _time() 

171 try: 

172 while True: 

173 if resultFlag.test("success"): 

174 Util.log(" Child process completed successfully") 

175 resultFlag.clear("success") 

176 return 

177 if resultFlag.test("failure"): 

178 resultFlag.clear("failure") 

179 raise util.SMException("Child process exited with error") 

180 if abortTest() or abortSignaled: 

181 os.killpg(pid, signal.SIGKILL) 

182 raise AbortException("Aborting due to signal") 

183 if timeOut and _time() - startTime > timeOut: 

184 os.killpg(pid, signal.SIGKILL) 

185 resultFlag.clearAll() 

186 raise util.SMException("Timed out") 

187 time.sleep(pollInterval) 

188 finally: 

189 wait_pid = 0 

190 rc = -1 

191 count = 0 

192 while wait_pid == 0 and count < 10: 

193 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

194 if wait_pid == 0: 

195 time.sleep(2) 

196 count += 1 

197 

198 if wait_pid == 0: 

199 Util.log("runAbortable: wait for process completion timed out") 

200 else: 

201 os.setpgrp() 

202 try: 

203 if func() == ret: 

204 resultFlag.set("success") 

205 else: 

206 resultFlag.set("failure") 

207 except Exception as e: 

208 Util.log("Child process failed with : (%s)" % e) 

209 resultFlag.set("failure") 

210 Util.logException("This exception has occured") 

211 os._exit(0) 

212 runAbortable = staticmethod(runAbortable) 

213 

214 def num2str(number): 

215 for prefix in ("G", "M", "K"): 

216 if number >= Util.PREFIX[prefix]: 

217 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

218 return "%s" % number 

219 num2str = staticmethod(num2str) 

220 

221 def numBits(val): 

222 count = 0 

223 while val: 

224 count += val & 1 

225 val = val >> 1 

226 return count 

227 numBits = staticmethod(numBits) 

228 

229 def countBits(bitmap1, bitmap2): 

230 """return bit count in the bitmap produced by ORing the two bitmaps""" 

231 len1 = len(bitmap1) 

232 len2 = len(bitmap2) 

233 lenLong = len1 

234 lenShort = len2 

235 bitmapLong = bitmap1 

236 if len2 > len1: 

237 lenLong = len2 

238 lenShort = len1 

239 bitmapLong = bitmap2 

240 

241 count = 0 

242 for i in range(lenShort): 

243 val = bitmap1[i] | bitmap2[i] 

244 count += Util.numBits(val) 

245 

246 for i in range(i + 1, lenLong): 

247 val = bitmapLong[i] 

248 count += Util.numBits(val) 

249 return count 

250 countBits = staticmethod(countBits) 

251 

252 def getThisScript(): 

253 thisScript = util.get_real_path(__file__) 

254 if thisScript.endswith(".pyc"): 

255 thisScript = thisScript[:-1] 

256 return thisScript 

257 getThisScript = staticmethod(getThisScript) 

258 

259 

260################################################################################ 

261# 

262# XAPI 

263# 

264class XAPI: 

265 USER = "root" 

266 PLUGIN_ON_SLAVE = "on-slave" 

267 

268 CONFIG_SM = 0 

269 CONFIG_OTHER = 1 

270 CONFIG_ON_BOOT = 2 

271 CONFIG_ALLOW_CACHING = 3 

272 

273 CONFIG_NAME = { 

274 CONFIG_SM: "sm-config", 

275 CONFIG_OTHER: "other-config", 

276 CONFIG_ON_BOOT: "on-boot", 

277 CONFIG_ALLOW_CACHING: "allow_caching" 

278 } 

279 

280 class LookupError(util.SMException): 

281 pass 

282 

283 def getSession(): 

284 session = XenAPI.xapi_local() 

285 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

286 return session 

287 getSession = staticmethod(getSession) 

288 

289 def __init__(self, session, srUuid): 

290 self.sessionPrivate = False 

291 self.session = session 

292 if self.session is None: 

293 self.session = self.getSession() 

294 self.sessionPrivate = True 

295 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

296 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

297 self.hostUuid = util.get_this_host() 

298 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

299 self.task = None 

300 self.task_progress = {"coalescable": 0, "done": 0} 

301 

302 def __del__(self): 

303 if self.sessionPrivate: 

304 self.session.xenapi.session.logout() 

305 

306 def isPluggedHere(self): 

307 pbds = self.getAttachedPBDs() 

308 for pbdRec in pbds: 

309 if pbdRec["host"] == self._hostRef: 

310 return True 

311 return False 

312 

313 def poolOK(self): 

314 host_recs = self.session.xenapi.host.get_all_records() 

315 for host_ref, host_rec in host_recs.items(): 

316 if not host_rec["enabled"]: 

317 Util.log("Host %s not enabled" % host_rec["uuid"]) 

318 return False 

319 return True 

320 

321 def isMaster(self): 

322 if self.srRecord["shared"]: 

323 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

324 return pool["master"] == self._hostRef 

325 else: 

326 pbds = self.getAttachedPBDs() 

327 if len(pbds) < 1: 

328 raise util.SMException("Local SR not attached") 

329 elif len(pbds) > 1: 

330 raise util.SMException("Local SR multiply attached") 

331 return pbds[0]["host"] == self._hostRef 

332 

333 def getAttachedPBDs(self): 

334 """Return PBD records for all PBDs of this SR that are currently 

335 attached""" 

336 attachedPBDs = [] 

337 pbds = self.session.xenapi.PBD.get_all_records() 

338 for pbdRec in pbds.values(): 

339 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

340 attachedPBDs.append(pbdRec) 

341 return attachedPBDs 

342 

343 def getOnlineHosts(self): 

344 return util.get_online_hosts(self.session) 

345 

346 def ensureInactive(self, hostRef, args): 

347 text = self.session.xenapi.host.call_plugin( \ 

348 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

349 Util.log("call-plugin returned: '%s'" % text) 

350 

351 def getRecordHost(self, hostRef): 

352 return self.session.xenapi.host.get_record(hostRef) 

353 

354 def _getRefVDI(self, uuid): 

355 return self.session.xenapi.VDI.get_by_uuid(uuid) 

356 

357 def getRefVDI(self, vdi): 

358 return self._getRefVDI(vdi.uuid) 

359 

360 def getRecordVDI(self, uuid): 

361 try: 

362 ref = self._getRefVDI(uuid) 

363 return self.session.xenapi.VDI.get_record(ref) 

364 except XenAPI.Failure: 

365 return None 

366 

367 def singleSnapshotVDI(self, vdi): 

368 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

369 {"type": "internal"}) 

370 

371 def forgetVDI(self, srUuid, vdiUuid): 

372 """Forget the VDI, but handle the case where the VDI has already been 

373 forgotten (i.e. ignore errors)""" 

374 try: 

375 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

376 self.session.xenapi.VDI.forget(vdiRef) 

377 except XenAPI.Failure: 

378 pass 

379 

380 def getConfigVDI(self, vdi, key): 

381 kind = vdi.CONFIG_TYPE[key] 

382 if kind == self.CONFIG_SM: 

383 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

384 elif kind == self.CONFIG_OTHER: 

385 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

386 elif kind == self.CONFIG_ON_BOOT: 

387 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

388 elif kind == self.CONFIG_ALLOW_CACHING: 

389 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

390 else: 

391 assert(False) 

392 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

393 return cfg 

394 

395 def removeFromConfigVDI(self, vdi, key): 

396 kind = vdi.CONFIG_TYPE[key] 

397 if kind == self.CONFIG_SM: 

398 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

399 elif kind == self.CONFIG_OTHER: 

400 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

401 else: 

402 assert(False) 

403 

404 def addToConfigVDI(self, vdi, key, val): 

405 kind = vdi.CONFIG_TYPE[key] 

406 if kind == self.CONFIG_SM: 

407 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

408 elif kind == self.CONFIG_OTHER: 

409 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

410 else: 

411 assert(False) 

412 

413 def isSnapshot(self, vdi): 

414 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

415 

416 def markCacheSRsDirty(self): 

417 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

418 'field "local_cache_enabled" = "true"') 

419 for sr_ref in sr_refs: 

420 Util.log("Marking SR %s dirty" % sr_ref) 

421 util.set_dirty(self.session, sr_ref) 

422 

423 def srUpdate(self): 

424 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

425 abortFlag = IPCFlag(self.srRecord["uuid"]) 

426 task = self.session.xenapi.Async.SR.update(self._srRef) 

427 cancelTask = True 

428 try: 

429 for i in range(60): 

430 status = self.session.xenapi.task.get_status(task) 

431 if not status == "pending": 

432 Util.log("SR.update_asynch status changed to [%s]" % status) 

433 cancelTask = False 

434 return 

435 if abortFlag.test(FLAG_TYPE_ABORT): 

436 Util.log("Abort signalled during srUpdate, cancelling task...") 

437 try: 

438 self.session.xenapi.task.cancel(task) 

439 cancelTask = False 

440 Util.log("Task cancelled") 

441 except: 

442 pass 

443 return 

444 time.sleep(1) 

445 finally: 

446 if cancelTask: 

447 self.session.xenapi.task.cancel(task) 

448 self.session.xenapi.task.destroy(task) 

449 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

450 

451 def update_task(self): 

452 self.session.xenapi.task.set_other_config( 

453 self.task, 

454 { 

455 "applies_to": self._srRef 

456 }) 

457 total = self.task_progress['coalescable'] + self.task_progress['done'] 

458 if (total > 0): 

459 self.session.xenapi.task.set_progress( 

460 self.task, float(self.task_progress['done']) / total) 

461 

462 def create_task(self, label, description): 

463 self.task = self.session.xenapi.task.create(label, description) 

464 self.update_task() 

465 

466 def update_task_progress(self, key, value): 

467 self.task_progress[key] = value 

468 if self.task: 

469 self.update_task() 

470 

471 def set_task_status(self, status): 

472 if self.task: 

473 self.session.xenapi.task.set_status(self.task, status) 

474 

475 

476################################################################################ 

477# 

478# VDI 

479# 

480class VDI: 

481 """Object representing a VDI of a VHD-based SR""" 

482 

483 POLL_INTERVAL = 1 

484 POLL_TIMEOUT = 30 

485 DEVICE_MAJOR = 202 

486 DRIVER_NAME_VHD = "vhd" 

487 

488 # config keys & values 

489 DB_VHD_PARENT = "vhd-parent" 

490 DB_VDI_TYPE = "vdi_type" 

491 DB_VHD_BLOCKS = "vhd-blocks" 

492 DB_VDI_PAUSED = "paused" 

493 DB_VDI_RELINKING = "relinking" 

494 DB_VDI_ACTIVATING = "activating" 

495 DB_GC = "gc" 

496 DB_COALESCE = "coalesce" 

497 DB_LEAFCLSC = "leaf-coalesce" # config key 

498 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

499 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

500 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

501 # no space to snap-coalesce or unable to keep 

502 # up with VDI. This is not used by the SM, it 

503 # might be used by external components. 

504 DB_ONBOOT = "on-boot" 

505 ONBOOT_RESET = "reset" 

506 DB_ALLOW_CACHING = "allow_caching" 

507 

508 CONFIG_TYPE = { 

509 DB_VHD_PARENT: XAPI.CONFIG_SM, 

510 DB_VDI_TYPE: XAPI.CONFIG_SM, 

511 DB_VHD_BLOCKS: XAPI.CONFIG_SM, 

512 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

513 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

514 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

515 DB_GC: XAPI.CONFIG_OTHER, 

516 DB_COALESCE: XAPI.CONFIG_OTHER, 

517 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

518 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

519 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

520 } 

521 

522 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

523 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

524 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

525 # feasibility of leaf coalesce 

526 

527 JRN_RELINK = "relink" # journal entry type for relinking children 

528 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

529 JRN_LEAF = "leaf" # used in coalesce-leaf 

530 

531 STR_TREE_INDENT = 4 

532 

533 def __init__(self, sr, uuid, raw): 

534 self.sr = sr 

535 self.scanError = True 

536 self.uuid = uuid 

537 self.raw = raw 

538 self.fileName = "" 

539 self.parentUuid = "" 

540 self.sizeVirt = -1 

541 self._sizeVHD = -1 

542 self.hidden = False 

543 self.parent = None 

544 self.children = [] 

545 self._vdiRef = None 

546 self._clearRef() 

547 

548 @staticmethod 

549 def extractUuid(path): 

550 raise NotImplementedError("Implement in sub class") 

551 

552 def load(self): 

553 """Load VDI info""" 

554 pass # abstract 

555 

556 def getDriverName(self): 

557 return self.DRIVER_NAME_VHD 

558 

559 def getRef(self): 

560 if self._vdiRef is None: 

561 self._vdiRef = self.sr.xapi.getRefVDI(self) 

562 return self._vdiRef 

563 

564 def getConfig(self, key, default=None): 

565 config = self.sr.xapi.getConfigVDI(self, key) 

566 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 566 ↛ 567line 566 didn't jump to line 567, because the condition on line 566 was never true

567 val = config 

568 else: 

569 val = config.get(key) 

570 if val: 

571 return val 

572 return default 

573 

574 def setConfig(self, key, val): 

575 self.sr.xapi.removeFromConfigVDI(self, key) 

576 self.sr.xapi.addToConfigVDI(self, key, val) 

577 Util.log("Set %s = %s for %s" % (key, val, self)) 

578 

579 def delConfig(self, key): 

580 self.sr.xapi.removeFromConfigVDI(self, key) 

581 Util.log("Removed %s from %s" % (key, self)) 

582 

583 def ensureUnpaused(self): 

584 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

585 Util.log("Unpausing VDI %s" % self) 

586 self.unpause() 

587 

588 def pause(self, failfast=False): 

589 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

590 self.uuid, failfast): 

591 raise util.SMException("Failed to pause VDI %s" % self) 

592 

593 def _report_tapdisk_unpause_error(self): 

594 try: 

595 xapi = self.sr.xapi.session.xenapi 

596 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

597 msg_name = "failed to unpause tapdisk" 

598 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

599 "VMs using this tapdisk have lost access " \ 

600 "to the corresponding disk(s)" % self.uuid 

601 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

602 except Exception as e: 

603 util.SMlog("failed to generate message: %s" % e) 

604 

605 def unpause(self): 

606 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

607 self.uuid): 

608 self._report_tapdisk_unpause_error() 

609 raise util.SMException("Failed to unpause VDI %s" % self) 

610 

611 def refresh(self, ignoreNonexistent=True): 

612 """Pause-unpause in one step""" 

613 self.sr.lock() 

614 try: 

615 try: 

616 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 616 ↛ 618line 616 didn't jump to line 618, because the condition on line 616 was never true

617 self.sr.uuid, self.uuid): 

618 self._report_tapdisk_unpause_error() 

619 raise util.SMException("Failed to refresh %s" % self) 

620 except XenAPI.Failure as e: 

621 if util.isInvalidVDI(e) and ignoreNonexistent: 

622 Util.log("VDI %s not found, ignoring" % self) 

623 return 

624 raise 

625 finally: 

626 self.sr.unlock() 

627 

628 def isSnapshot(self): 

629 return self.sr.xapi.isSnapshot(self) 

630 

631 def isAttachedRW(self): 

632 return util.is_attached_rw( 

633 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

634 

635 def getVHDBlocks(self): 

636 val = self.updateBlockInfo() 

637 bitmap = zlib.decompress(base64.b64decode(val)) 

638 return bitmap 

639 

640 def isCoalesceable(self): 

641 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

642 return not self.scanError and \ 

643 self.parent and \ 

644 len(self.parent.children) == 1 and \ 

645 self.hidden and \ 

646 len(self.children) > 0 

647 

648 def isLeafCoalesceable(self): 

649 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

650 return not self.scanError and \ 

651 self.parent and \ 

652 len(self.parent.children) == 1 and \ 

653 not self.hidden and \ 

654 len(self.children) == 0 

655 

656 def canLiveCoalesce(self, speed): 

657 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

658 isLeafCoalesceable() already""" 

659 feasibleSize = False 

660 allowedDownTime = \ 

661 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

662 if speed: 

663 feasibleSize = \ 

664 self.getSizeVHD() // speed < allowedDownTime 

665 else: 

666 feasibleSize = \ 

667 self.getSizeVHD() < self.LIVE_LEAF_COALESCE_MAX_SIZE 

668 

669 return (feasibleSize or 

670 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

671 

672 def getAllPrunable(self): 

673 if len(self.children) == 0: # base case 

674 # it is possible to have a hidden leaf that was recently coalesced 

675 # onto its parent, its children already relinked but not yet 

676 # reloaded - in which case it may not be garbage collected yet: 

677 # some tapdisks could still be using the file. 

678 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

679 return [] 

680 if not self.scanError and self.hidden: 

681 return [self] 

682 return [] 

683 

684 thisPrunable = True 

685 vdiList = [] 

686 for child in self.children: 

687 childList = child.getAllPrunable() 

688 vdiList.extend(childList) 

689 if child not in childList: 

690 thisPrunable = False 

691 

692 # We can destroy the current VDI if all childs are hidden BUT the 

693 # current VDI must be hidden too to do that! 

694 # Example in this case (after a failed live leaf coalesce): 

695 # 

696 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

697 # SMGC: [32436] b5458d61(1.000G/4.127M) 

698 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

699 # 

700 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

701 # Normally we are not in this function when the delete action is 

702 # executed but in `_liveLeafCoalesce`. 

703 

704 if not self.scanError and not self.hidden and thisPrunable: 

705 vdiList.append(self) 

706 return vdiList 

707 

708 def getSizeVHD(self): 

709 return self._sizeVHD 

710 

711 def getTreeRoot(self): 

712 "Get the root of the tree that self belongs to" 

713 root = self 

714 while root.parent: 

715 root = root.parent 

716 return root 

717 

718 def getTreeHeight(self): 

719 "Get the height of the subtree rooted at self" 

720 if len(self.children) == 0: 

721 return 1 

722 

723 maxChildHeight = 0 

724 for child in self.children: 

725 childHeight = child.getTreeHeight() 

726 if childHeight > maxChildHeight: 

727 maxChildHeight = childHeight 

728 

729 return maxChildHeight + 1 

730 

731 def getAllLeaves(self): 

732 "Get all leaf nodes in the subtree rooted at self" 

733 if len(self.children) == 0: 

734 return [self] 

735 

736 leaves = [] 

737 for child in self.children: 

738 leaves.extend(child.getAllLeaves()) 

739 return leaves 

740 

741 def updateBlockInfo(self): 

742 val = base64.b64encode(self._queryVHDBlocks()).decode() 

743 self.setConfig(VDI.DB_VHD_BLOCKS, val) 

744 return val 

745 

746 def rename(self, uuid): 

747 "Rename the VDI file" 

748 assert(not self.sr.vdis.get(uuid)) 

749 self._clearRef() 

750 oldUuid = self.uuid 

751 self.uuid = uuid 

752 self.children = [] 

753 # updating the children themselves is the responsibility of the caller 

754 del self.sr.vdis[oldUuid] 

755 self.sr.vdis[self.uuid] = self 

756 

757 def delete(self): 

758 "Physically delete the VDI" 

759 lock.Lock.cleanup(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

760 lock.Lock.cleanupAll(self.uuid) 

761 self._clear() 

762 

763 def __str__(self): 

764 strHidden = "" 

765 if self.hidden: 765 ↛ 766line 765 didn't jump to line 766, because the condition on line 765 was never true

766 strHidden = "*" 

767 strSizeVirt = "?" 

768 if self.sizeVirt > 0: 768 ↛ 769line 768 didn't jump to line 769, because the condition on line 768 was never true

769 strSizeVirt = Util.num2str(self.sizeVirt) 

770 strSizeVHD = "?" 

771 if self._sizeVHD > 0: 771 ↛ 772line 771 didn't jump to line 772, because the condition on line 771 was never true

772 strSizeVHD = "/%s" % Util.num2str(self._sizeVHD) 

773 strType = "" 

774 if self.raw: 

775 strType = "[RAW]" 

776 strSizeVHD = "" 

777 

778 return "%s%s(%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

779 strSizeVHD, strType) 

780 

781 def validate(self, fast=False): 

782 if not vhdutil.check(self.path, fast=fast): 782 ↛ 783line 782 didn't jump to line 783, because the condition on line 782 was never true

783 raise util.SMException("VHD %s corrupted" % self) 

784 

785 def _clear(self): 

786 self.uuid = "" 

787 self.path = "" 

788 self.parentUuid = "" 

789 self.parent = None 

790 self._clearRef() 

791 

792 def _clearRef(self): 

793 self._vdiRef = None 

794 

795 def _doCoalesce(self): 

796 """Coalesce self onto parent. Only perform the actual coalescing of 

797 VHD, but not the subsequent relinking. We'll do that as the next step, 

798 after reloading the entire SR in case things have changed while we 

799 were coalescing""" 

800 self.validate() 

801 self.parent.validate(True) 

802 self.parent._increaseSizeVirt(self.sizeVirt) 

803 self.sr._updateSlavesOnResize(self.parent) 

804 self._coalesceVHD(0) 

805 self.parent.validate(True) 

806 #self._verifyContents(0) 

807 self.parent.updateBlockInfo() 

808 

809 def _verifyContents(self, timeOut): 

810 Util.log(" Coalesce verification on %s" % self) 

811 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

812 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

813 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

814 Util.log(" Coalesce verification succeeded") 

815 

816 def _runTapdiskDiff(self): 

817 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

818 (self.getDriverName(), self.path, \ 

819 self.parent.getDriverName(), self.parent.path) 

820 Util.doexec(cmd, 0) 

821 return True 

822 

823 def _reportCoalesceError(vdi, ce): 

824 """Reports a coalesce error to XenCenter. 

825 

826 vdi: the VDI object on which the coalesce error occured 

827 ce: the CommandException that was raised""" 

828 

829 msg_name = os.strerror(ce.code) 

830 if ce.code == errno.ENOSPC: 

831 # TODO We could add more information here, e.g. exactly how much 

832 # space is required for the particular coalesce, as well as actions 

833 # to be taken by the user and consequences of not taking these 

834 # actions. 

835 msg_body = 'Run out of space while coalescing.' 

836 elif ce.code == errno.EIO: 

837 msg_body = 'I/O error while coalescing.' 

838 else: 

839 msg_body = '' 

840 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

841 % (vdi.sr.uuid, msg_name, msg_body)) 

842 

843 # Create a XenCenter message, but don't spam. 

844 xapi = vdi.sr.xapi.session.xenapi 

845 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

846 oth_cfg = xapi.SR.get_other_config(sr_ref) 

847 if COALESCE_ERR_RATE_TAG in oth_cfg: 

848 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

849 else: 

850 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

851 

852 xcmsg = False 

853 if coalesce_err_rate == 0: 

854 xcmsg = True 

855 elif coalesce_err_rate > 0: 

856 now = datetime.datetime.now() 

857 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

858 if COALESCE_LAST_ERR_TAG in sm_cfg: 

859 # seconds per message (minimum distance in time between two 

860 # messages in seconds) 

861 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

862 last = datetime.datetime.fromtimestamp( 

863 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

864 if now - last >= spm: 

865 xapi.SR.remove_from_sm_config(sr_ref, 

866 COALESCE_LAST_ERR_TAG) 

867 xcmsg = True 

868 else: 

869 xcmsg = True 

870 if xcmsg: 

871 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

872 str(now.strftime('%s'))) 

873 if xcmsg: 

874 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

875 _reportCoalesceError = staticmethod(_reportCoalesceError) 

876 

877 def _doCoalesceVHD(vdi): 

878 try: 

879 

880 startTime = time.time() 

881 vhdSize = vdi.getSizeVHD() 

882 vhdutil.coalesce(vdi.path) 

883 endTime = time.time() 

884 vdi.sr.recordStorageSpeed(startTime, endTime, vhdSize) 

885 except util.CommandException as ce: 

886 # We use try/except for the following piece of code because it runs 

887 # in a separate process context and errors will not be caught and 

888 # reported by anyone. 

889 try: 

890 # Report coalesce errors back to user via XC 

891 VDI._reportCoalesceError(vdi, ce) 

892 except Exception as e: 

893 util.SMlog('failed to create XenCenter message: %s' % e) 

894 raise ce 

895 except: 

896 raise 

897 _doCoalesceVHD = staticmethod(_doCoalesceVHD) 

898 

899 def _vdi_is_raw(self, vdi_path): 

900 """ 

901 Given path to vdi determine if it is raw 

902 """ 

903 uuid = self.extractUuid(vdi_path) 

904 return self.sr.vdis[uuid].raw 

905 

906 def _coalesceVHD(self, timeOut): 

907 Util.log(" Running VHD coalesce on %s" % self) 

908 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 908 ↛ exitline 908 didn't run the lambda on line 908

909 try: 

910 util.fistpoint.activate_custom_fn( 

911 "cleanup_coalesceVHD_inject_failure", 

912 util.inject_failure) 

913 Util.runAbortable(lambda: VDI._doCoalesceVHD(self), None, 

914 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

915 except: 

916 #exception at this phase could indicate a failure in vhd coalesce 

917 # or a kill of vhd coalesce by runAbortable due to timeOut 

918 # Try a repair and reraise the exception 

919 parent = "" 

920 try: 

921 parent = vhdutil.getParent(self.path, lambda x: x.strip()) 921 ↛ exitline 921 didn't run the lambda on line 921

922 if not self._vdi_is_raw(parent): 

923 # Repair error is logged and ignored. Error reraised later 

924 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

925 'parent %s' % (self.uuid, parent)) 

926 vhdutil.repair(parent) 

927 except Exception as e: 

928 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

929 'after failed coalesce on %s, err: %s' % 

930 (parent, self.path, e)) 

931 raise 

932 

933 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

934 

935 def _relinkSkip(self): 

936 """Relink children of this VDI to point to the parent of this VDI""" 

937 abortFlag = IPCFlag(self.sr.uuid) 

938 for child in self.children: 

939 if abortFlag.test(FLAG_TYPE_ABORT): 939 ↛ 940line 939 didn't jump to line 940, because the condition on line 939 was never true

940 raise AbortException("Aborting due to signal") 

941 Util.log(" Relinking %s from %s to %s" % \ 

942 (child, self, self.parent)) 

943 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

944 child._setParent(self.parent) 

945 self.children = [] 

946 

947 def _reloadChildren(self, vdiSkip): 

948 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

949 the VHD metadata for this file in any online VDI""" 

950 abortFlag = IPCFlag(self.sr.uuid) 

951 for child in self.children: 

952 if child == vdiSkip: 

953 continue 

954 if abortFlag.test(FLAG_TYPE_ABORT): 954 ↛ 955line 954 didn't jump to line 955, because the condition on line 954 was never true

955 raise AbortException("Aborting due to signal") 

956 Util.log(" Reloading VDI %s" % child) 

957 child._reload() 

958 

959 def _reload(self): 

960 """Pause & unpause to cause blktap to reload the VHD metadata""" 

961 for child in self.children: 961 ↛ 962line 961 didn't jump to line 962, because the loop on line 961 never started

962 child._reload() 

963 

964 # only leaves can be attached 

965 if len(self.children) == 0: 965 ↛ exitline 965 didn't return from function '_reload', because the condition on line 965 was never false

966 try: 

967 self.delConfig(VDI.DB_VDI_RELINKING) 

968 except XenAPI.Failure as e: 

969 if not util.isInvalidVDI(e): 

970 raise 

971 self.refresh() 

972 

973 def _tagChildrenForRelink(self): 

974 if len(self.children) == 0: 

975 retries = 0 

976 try: 

977 while retries < 15: 

978 retries += 1 

979 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

980 Util.log("VDI %s is activating, wait to relink" % 

981 self.uuid) 

982 else: 

983 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

984 

985 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

986 self.delConfig(VDI.DB_VDI_RELINKING) 

987 Util.log("VDI %s started activating while tagging" % 

988 self.uuid) 

989 else: 

990 return 

991 time.sleep(2) 

992 

993 raise util.SMException("Failed to tag vdi %s for relink" % self) 

994 except XenAPI.Failure as e: 

995 if not util.isInvalidVDI(e): 

996 raise 

997 

998 for child in self.children: 

999 child._tagChildrenForRelink() 

1000 

1001 def _loadInfoParent(self): 

1002 ret = vhdutil.getParent(self.path, lvhdutil.extractUuid) 

1003 if ret: 

1004 self.parentUuid = ret 

1005 

1006 def _setParent(self, parent): 

1007 vhdutil.setParent(self.path, parent.path, False) 

1008 self.parent = parent 

1009 self.parentUuid = parent.uuid 

1010 parent.children.append(self) 

1011 try: 

1012 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1013 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1014 (self.uuid, self.parentUuid)) 

1015 except: 

1016 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1017 (self.uuid, self.parentUuid)) 

1018 

1019 def _loadInfoHidden(self): 

1020 hidden = vhdutil.getHidden(self.path) 

1021 self.hidden = (hidden != 0) 

1022 

1023 def _setHidden(self, hidden=True): 

1024 vhdutil.setHidden(self.path, hidden) 

1025 self.hidden = hidden 

1026 

1027 def _increaseSizeVirt(self, size, atomic=True): 

1028 """ensure the virtual size of 'self' is at least 'size'. Note that  

1029 resizing a VHD must always be offline and atomically: the file must 

1030 not be open by anyone and no concurrent operations may take place. 

1031 Thus we use the Agent API call for performing paused atomic  

1032 operations. If the caller is already in the atomic context, it must 

1033 call with atomic = False""" 

1034 if self.sizeVirt >= size: 1034 ↛ 1036line 1034 didn't jump to line 1036, because the condition on line 1034 was never false

1035 return 

1036 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ 

1037 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1038 

1039 msize = vhdutil.getMaxResizeSize(self.path) * 1024 * 1024 

1040 if (size <= msize): 

1041 vhdutil.setSizeVirtFast(self.path, size) 

1042 else: 

1043 if atomic: 

1044 vdiList = self._getAllSubtree() 

1045 self.sr.lock() 

1046 try: 

1047 self.sr.pauseVDIs(vdiList) 

1048 try: 

1049 self._setSizeVirt(size) 

1050 finally: 

1051 self.sr.unpauseVDIs(vdiList) 

1052 finally: 

1053 self.sr.unlock() 

1054 else: 

1055 self._setSizeVirt(size) 

1056 

1057 self.sizeVirt = vhdutil.getSizeVirt(self.path) 

1058 

1059 def _setSizeVirt(self, size): 

1060 """WARNING: do not call this method directly unless all VDIs in the 

1061 subtree are guaranteed to be unplugged (and remain so for the duration 

1062 of the operation): this operation is only safe for offline VHDs""" 

1063 jFile = os.path.join(self.sr.path, self.uuid) 

1064 vhdutil.setSizeVirt(self.path, size, jFile) 

1065 

1066 def _queryVHDBlocks(self): 

1067 return vhdutil.getBlockBitmap(self.path) 

1068 

1069 def _getCoalescedSizeData(self): 

1070 """Get the data size of the resulting VHD if we coalesce self onto 

1071 parent. We calculate the actual size by using the VHD block allocation 

1072 information (as opposed to just adding up the two VHD sizes to get an 

1073 upper bound)""" 

1074 # make sure we don't use stale BAT info from vdi_rec since the child 

1075 # was writable all this time 

1076 self.delConfig(VDI.DB_VHD_BLOCKS) 

1077 blocksChild = self.getVHDBlocks() 

1078 blocksParent = self.parent.getVHDBlocks() 

1079 numBlocks = Util.countBits(blocksChild, blocksParent) 

1080 Util.log("Num combined blocks = %d" % numBlocks) 

1081 sizeData = numBlocks * vhdutil.VHD_BLOCK_SIZE 

1082 assert(sizeData <= self.sizeVirt) 

1083 return sizeData 

1084 

1085 def _calcExtraSpaceForCoalescing(self): 

1086 sizeData = self._getCoalescedSizeData() 

1087 sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \ 

1088 vhdutil.calcOverheadEmpty(self.sizeVirt) 

1089 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1090 return sizeCoalesced - self.parent.getSizeVHD() 

1091 

1092 def _calcExtraSpaceForLeafCoalescing(self): 

1093 """How much extra space in the SR will be required to 

1094 [live-]leaf-coalesce this VDI""" 

1095 # the space requirements are the same as for inline coalesce 

1096 return self._calcExtraSpaceForCoalescing() 

1097 

1098 def _calcExtraSpaceForSnapshotCoalescing(self): 

1099 """How much extra space in the SR will be required to 

1100 snapshot-coalesce this VDI""" 

1101 return self._calcExtraSpaceForCoalescing() + \ 

1102 vhdutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1103 

1104 def _getAllSubtree(self): 

1105 """Get self and all VDIs in the subtree of self as a flat list""" 

1106 vdiList = [self] 

1107 for child in self.children: 

1108 vdiList.extend(child._getAllSubtree()) 

1109 return vdiList 

1110 

1111 

1112class FileVDI(VDI): 

1113 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1114 

1115 @staticmethod 

1116 def extractUuid(path): 

1117 path = os.path.basename(path.strip()) 

1118 if not (path.endswith(vhdutil.FILE_EXTN_VHD) or \ 1118 ↛ 1120line 1118 didn't jump to line 1120, because the condition on line 1118 was never true

1119 path.endswith(vhdutil.FILE_EXTN_RAW)): 

1120 return None 

1121 uuid = path.replace(vhdutil.FILE_EXTN_VHD, "").replace( \ 

1122 vhdutil.FILE_EXTN_RAW, "") 

1123 # TODO: validate UUID format 

1124 return uuid 

1125 

1126 def __init__(self, sr, uuid, raw): 

1127 VDI.__init__(self, sr, uuid, raw) 

1128 if self.raw: 1128 ↛ 1129line 1128 didn't jump to line 1129, because the condition on line 1128 was never true

1129 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_RAW) 

1130 else: 

1131 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1132 

1133 def load(self, info=None): 

1134 if not info: 

1135 if not util.pathexists(self.path): 

1136 raise util.SMException("%s not found" % self.path) 

1137 try: 

1138 info = vhdutil.getVHDInfo(self.path, self.extractUuid) 

1139 except util.SMException: 

1140 Util.log(" [VDI %s: failed to read VHD metadata]" % self.uuid) 

1141 return 

1142 self.parent = None 

1143 self.children = [] 

1144 self.parentUuid = info.parentUuid 

1145 self.sizeVirt = info.sizeVirt 

1146 self._sizeVHD = info.sizePhys 

1147 self.hidden = info.hidden 

1148 self.scanError = False 

1149 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1150 (self.uuid, vhdutil.FILE_EXTN_VHD)) 

1151 

1152 def rename(self, uuid): 

1153 oldPath = self.path 

1154 VDI.rename(self, uuid) 

1155 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1156 self.path = os.path.join(self.sr.path, self.fileName) 

1157 assert(not util.pathexists(self.path)) 

1158 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1159 os.rename(oldPath, self.path) 

1160 

1161 def delete(self): 

1162 if len(self.children) > 0: 1162 ↛ 1163line 1162 didn't jump to line 1163, because the condition on line 1162 was never true

1163 raise util.SMException("VDI %s has children, can't delete" % \ 

1164 self.uuid) 

1165 try: 

1166 self.sr.lock() 

1167 try: 

1168 os.unlink(self.path) 

1169 self.sr.forgetVDI(self.uuid) 

1170 finally: 

1171 self.sr.unlock() 

1172 except OSError: 

1173 raise util.SMException("os.unlink(%s) failed" % self.path) 

1174 VDI.delete(self) 

1175 

1176 

1177class LVHDVDI(VDI): 

1178 """Object representing a VDI in an LVHD SR""" 

1179 

1180 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1181 DRIVER_NAME_RAW = "aio" 

1182 

1183 def load(self, vdiInfo): 

1184 self.parent = None 

1185 self.children = [] 

1186 self._sizeVHD = -1 

1187 self.scanError = vdiInfo.scanError 

1188 self.sizeLV = vdiInfo.sizeLV 

1189 self.sizeVirt = vdiInfo.sizeVirt 

1190 self.fileName = vdiInfo.lvName 

1191 self.lvActive = vdiInfo.lvActive 

1192 self.lvOpen = vdiInfo.lvOpen 

1193 self.lvReadonly = vdiInfo.lvReadonly 

1194 self.hidden = vdiInfo.hidden 

1195 self.parentUuid = vdiInfo.parentUuid 

1196 self.path = os.path.join(self.sr.path, self.fileName) 

1197 

1198 @staticmethod 

1199 def extractUuid(path): 

1200 return lvhdutil.extractUuid(path) 

1201 

1202 def getDriverName(self): 

1203 if self.raw: 

1204 return self.DRIVER_NAME_RAW 

1205 return self.DRIVER_NAME_VHD 

1206 

1207 def inflate(self, size): 

1208 """inflate the LV containing the VHD to 'size'""" 

1209 if self.raw: 

1210 return 

1211 self._activate() 

1212 self.sr.lock() 

1213 try: 

1214 lvhdutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, size) 

1215 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1216 finally: 

1217 self.sr.unlock() 

1218 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1219 self._sizeVHD = -1 

1220 

1221 def deflate(self): 

1222 """deflate the LV containing the VHD to minimum""" 

1223 if self.raw: 

1224 return 

1225 self._activate() 

1226 self.sr.lock() 

1227 try: 

1228 lvhdutil.deflate(self.sr.lvmCache, self.fileName, self.getSizeVHD()) 

1229 finally: 

1230 self.sr.unlock() 

1231 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1232 self._sizeVHD = -1 

1233 

1234 def inflateFully(self): 

1235 self.inflate(lvhdutil.calcSizeVHDLV(self.sizeVirt)) 

1236 

1237 def inflateParentForCoalesce(self): 

1238 """Inflate the parent only as much as needed for the purposes of 

1239 coalescing""" 

1240 if self.parent.raw: 

1241 return 

1242 inc = self._calcExtraSpaceForCoalescing() 

1243 if inc > 0: 

1244 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1245 self.parent.inflate(self.parent.sizeLV + inc) 

1246 

1247 def updateBlockInfo(self): 

1248 if not self.raw: 

1249 return VDI.updateBlockInfo(self) 

1250 

1251 def rename(self, uuid): 

1252 oldUuid = self.uuid 

1253 oldLVName = self.fileName 

1254 VDI.rename(self, uuid) 

1255 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + self.uuid 

1256 if self.raw: 

1257 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + self.uuid 

1258 self.path = os.path.join(self.sr.path, self.fileName) 

1259 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1260 

1261 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1262 if self.sr.lvActivator.get(oldUuid, False): 

1263 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1264 

1265 ns = lvhdutil.NS_PREFIX_LVM + self.sr.uuid 

1266 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1267 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1268 RefCounter.reset(oldUuid, ns) 

1269 

1270 def delete(self): 

1271 if len(self.children) > 0: 

1272 raise util.SMException("VDI %s has children, can't delete" % \ 

1273 self.uuid) 

1274 self.sr.lock() 

1275 try: 

1276 self.sr.lvmCache.remove(self.fileName) 

1277 self.sr.forgetVDI(self.uuid) 

1278 finally: 

1279 self.sr.unlock() 

1280 RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

1281 VDI.delete(self) 

1282 

1283 def getSizeVHD(self): 

1284 if self._sizeVHD == -1: 

1285 self._loadInfoSizeVHD() 

1286 return self._sizeVHD 

1287 

1288 def _loadInfoSizeVHD(self): 

1289 """Get the physical utilization of the VHD file. We do it individually 

1290 (and not using the VHD batch scanner) as an optimization: this info is 

1291 relatively expensive and we need it only for VDI's involved in 

1292 coalescing.""" 

1293 if self.raw: 

1294 return 

1295 self._activate() 

1296 self._sizeVHD = vhdutil.getSizePhys(self.path) 

1297 if self._sizeVHD <= 0: 

1298 raise util.SMException("phys size of %s = %d" % \ 

1299 (self, self._sizeVHD)) 

1300 

1301 def _loadInfoHidden(self): 

1302 if self.raw: 

1303 self.hidden = self.sr.lvmCache.getHidden(self.fileName) 

1304 else: 

1305 VDI._loadInfoHidden(self) 

1306 

1307 def _setHidden(self, hidden=True): 

1308 if self.raw: 

1309 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1310 self.hidden = hidden 

1311 else: 

1312 VDI._setHidden(self, hidden) 

1313 

1314 def __str__(self): 

1315 strType = "VHD" 

1316 if self.raw: 

1317 strType = "RAW" 

1318 strHidden = "" 

1319 if self.hidden: 

1320 strHidden = "*" 

1321 strSizeVHD = "" 

1322 if self._sizeVHD > 0: 

1323 strSizeVHD = Util.num2str(self._sizeVHD) 

1324 strActive = "n" 

1325 if self.lvActive: 

1326 strActive = "a" 

1327 if self.lvOpen: 

1328 strActive += "o" 

1329 return "%s%s[%s](%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1330 Util.num2str(self.sizeVirt), strSizeVHD, 

1331 Util.num2str(self.sizeLV), strActive) 

1332 

1333 def validate(self, fast=False): 

1334 if not self.raw: 

1335 VDI.validate(self, fast) 

1336 

1337 def _doCoalesce(self): 

1338 """LVHD parents must first be activated, inflated, and made writable""" 

1339 try: 

1340 self._activateChain() 

1341 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1342 self.parent.validate() 

1343 self.inflateParentForCoalesce() 

1344 VDI._doCoalesce(self) 

1345 finally: 

1346 self.parent._loadInfoSizeVHD() 

1347 self.parent.deflate() 

1348 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1349 

1350 def _setParent(self, parent): 

1351 self._activate() 

1352 if self.lvReadonly: 

1353 self.sr.lvmCache.setReadonly(self.fileName, False) 

1354 

1355 try: 

1356 vhdutil.setParent(self.path, parent.path, parent.raw) 

1357 finally: 

1358 if self.lvReadonly: 

1359 self.sr.lvmCache.setReadonly(self.fileName, True) 

1360 self._deactivate() 

1361 self.parent = parent 

1362 self.parentUuid = parent.uuid 

1363 parent.children.append(self) 

1364 try: 

1365 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1366 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1367 (self.uuid, self.parentUuid)) 

1368 except: 

1369 Util.log("Failed to update the vhd-parent with %s for child %s" % \ 

1370 (self.parentUuid, self.uuid)) 

1371 

1372 def _activate(self): 

1373 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1374 

1375 def _activateChain(self): 

1376 vdi = self 

1377 while vdi: 

1378 vdi._activate() 

1379 vdi = vdi.parent 

1380 

1381 def _deactivate(self): 

1382 self.sr.lvActivator.deactivate(self.uuid, False) 

1383 

1384 def _increaseSizeVirt(self, size, atomic=True): 

1385 "ensure the virtual size of 'self' is at least 'size'" 

1386 self._activate() 

1387 if not self.raw: 

1388 VDI._increaseSizeVirt(self, size, atomic) 

1389 return 

1390 

1391 # raw VDI case 

1392 offset = self.sizeLV 

1393 if self.sizeVirt < size: 

1394 oldSize = self.sizeLV 

1395 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1396 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1397 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1398 offset = oldSize 

1399 unfinishedZero = False 

1400 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1401 if jval: 

1402 unfinishedZero = True 

1403 offset = int(jval) 

1404 length = self.sizeLV - offset 

1405 if not length: 

1406 return 

1407 

1408 if unfinishedZero: 

1409 Util.log(" ==> Redoing unfinished zeroing out") 

1410 else: 

1411 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1412 str(offset)) 

1413 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1414 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1415 func = lambda: util.zeroOut(self.path, offset, length) 

1416 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1417 VDI.POLL_INTERVAL, 0) 

1418 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1419 

1420 def _setSizeVirt(self, size): 

1421 """WARNING: do not call this method directly unless all VDIs in the 

1422 subtree are guaranteed to be unplugged (and remain so for the duration 

1423 of the operation): this operation is only safe for offline VHDs""" 

1424 self._activate() 

1425 jFile = lvhdutil.createVHDJournalLV(self.sr.lvmCache, self.uuid, 

1426 vhdutil.MAX_VHD_JOURNAL_SIZE) 

1427 try: 

1428 lvhdutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, 

1429 size, jFile) 

1430 finally: 

1431 lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid) 

1432 

1433 def _queryVHDBlocks(self): 

1434 self._activate() 

1435 return VDI._queryVHDBlocks(self) 

1436 

1437 def _calcExtraSpaceForCoalescing(self): 

1438 if self.parent.raw: 

1439 return 0 # raw parents are never deflated in the first place 

1440 sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData()) 

1441 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1442 return sizeCoalesced - self.parent.sizeLV 

1443 

1444 def _calcExtraSpaceForLeafCoalescing(self): 

1445 """How much extra space in the SR will be required to 

1446 [live-]leaf-coalesce this VDI""" 

1447 # we can deflate the leaf to minimize the space requirements 

1448 deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD()) 

1449 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1450 

1451 def _calcExtraSpaceForSnapshotCoalescing(self): 

1452 return self._calcExtraSpaceForCoalescing() + \ 

1453 lvhdutil.calcSizeLV(self.getSizeVHD()) 

1454 

1455 

1456class LinstorVDI(VDI): 

1457 """Object representing a VDI in a LINSTOR SR""" 

1458 

1459 MAX_SIZE = 2 * 1024 * 1024 * 1024 * 1024 # Max VHD size. 

1460 

1461 VOLUME_LOCK_TIMEOUT = 30 

1462 

1463 def load(self, info=None): 

1464 self.parentUuid = info.parentUuid 

1465 self.scanError = True 

1466 self.parent = None 

1467 self.children = [] 

1468 

1469 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1470 self.path = self.sr._linstor.build_device_path(self.fileName) 

1471 if not util.pathexists(self.path): 

1472 raise util.SMException( 

1473 '{} of {} not found' 

1474 .format(self.fileName, self.uuid) 

1475 ) 

1476 

1477 if not info: 

1478 try: 

1479 info = self.sr._vhdutil.get_vhd_info(self.uuid) 

1480 except util.SMException: 

1481 Util.log( 

1482 ' [VDI {}: failed to read VHD metadata]'.format(self.uuid) 

1483 ) 

1484 return 

1485 

1486 self.parentUuid = info.parentUuid 

1487 self.sizeVirt = info.sizeVirt 

1488 self._sizeVHD = info.sizePhys 

1489 self.hidden = info.hidden 

1490 self.scanError = False 

1491 

1492 def rename(self, uuid): 

1493 Util.log('Renaming {} -> {} (path={})'.format( 

1494 self.uuid, uuid, self.path 

1495 )) 

1496 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1497 VDI.rename(self, uuid) 

1498 

1499 def delete(self): 

1500 if len(self.children) > 0: 

1501 raise util.SMException( 

1502 'VDI {} has children, can\'t delete'.format(self.uuid) 

1503 ) 

1504 self.sr.lock() 

1505 try: 

1506 self.sr._linstor.destroy_volume(self.uuid) 

1507 self.sr.forgetVDI(self.uuid) 

1508 finally: 

1509 self.sr.unlock() 

1510 VDI.delete(self) 

1511 

1512 def pauseVDIs(self, vdiList): 

1513 self.sr._linstor.ensure_volume_list_is_not_locked( 

1514 vdiList, timeout=self.VOLUME_LOCK_TIMEOUT 

1515 ) 

1516 return super(VDI).pauseVDIs(vdiList) 

1517 

1518 def _liveLeafCoalesce(self, vdi): 

1519 self.sr._linstor.ensure_volume_is_not_locked( 

1520 vdi.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1521 ) 

1522 return super(VDI)._liveLeafCoalesce(vdi) 

1523 

1524 def _relinkSkip(self): 

1525 abortFlag = IPCFlag(self.sr.uuid) 

1526 for child in self.children: 

1527 if abortFlag.test(FLAG_TYPE_ABORT): 

1528 raise AbortException('Aborting due to signal') 

1529 Util.log( 

1530 ' Relinking {} from {} to {}'.format( 

1531 child, self, self.parent 

1532 ) 

1533 ) 

1534 

1535 session = child.sr.xapi.session 

1536 sr_uuid = child.sr.uuid 

1537 vdi_uuid = child.uuid 

1538 try: 

1539 self.sr._linstor.ensure_volume_is_not_locked( 

1540 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1541 ) 

1542 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1543 child._setParent(self.parent) 

1544 finally: 

1545 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1546 self.children = [] 

1547 

1548 def _setHidden(self, hidden=True): 

1549 HIDDEN_TAG = 'hidden' 

1550 

1551 if self.raw: 

1552 self.sr._linstor.update_volume_metadata(self.uuid, { 

1553 HIDDEN_TAG: hidden 

1554 }) 

1555 self.hidden = hidden 

1556 else: 

1557 VDI._setHidden(self, hidden) 

1558 

1559 def _queryVHDBlocks(self): 

1560 return self.sr._vhdutil.get_block_bitmap(self.uuid) 

1561 

1562################################################################################ 

1563# 

1564# SR 

1565# 

1566class SR: 

1567 class LogFilter: 

1568 def __init__(self, sr): 

1569 self.sr = sr 

1570 self.stateLogged = False 

1571 self.prevState = {} 

1572 self.currState = {} 

1573 

1574 def logState(self): 

1575 changes = "" 

1576 self.currState.clear() 

1577 for vdi in self.sr.vdiTrees: 

1578 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

1579 if not self.prevState.get(vdi.uuid) or \ 

1580 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

1581 changes += self.currState[vdi.uuid] 

1582 

1583 for uuid in self.prevState: 

1584 if not self.currState.get(uuid): 

1585 changes += "Tree %s gone\n" % uuid 

1586 

1587 result = "SR %s (%d VDIs in %d VHD trees): " % \ 

1588 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

1589 

1590 if len(changes) > 0: 

1591 if self.stateLogged: 

1592 result += "showing only VHD trees that changed:" 

1593 result += "\n%s" % changes 

1594 else: 

1595 result += "no changes" 

1596 

1597 for line in result.split("\n"): 

1598 Util.log("%s" % line) 

1599 self.prevState.clear() 

1600 for key, val in self.currState.items(): 

1601 self.prevState[key] = val 

1602 self.stateLogged = True 

1603 

1604 def logNewVDI(self, uuid): 

1605 if self.stateLogged: 

1606 Util.log("Found new VDI when scanning: %s" % uuid) 

1607 

1608 def _getTreeStr(self, vdi, indent=8): 

1609 treeStr = "%s%s\n" % (" " * indent, vdi) 

1610 for child in vdi.children: 

1611 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

1612 return treeStr 

1613 

1614 TYPE_FILE = "file" 

1615 TYPE_LVHD = "lvhd" 

1616 TYPE_LINSTOR = "linstor" 

1617 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

1618 

1619 LOCK_RETRY_INTERVAL = 3 

1620 LOCK_RETRY_ATTEMPTS = 20 

1621 LOCK_RETRY_ATTEMPTS_LOCK = 100 

1622 

1623 SCAN_RETRY_ATTEMPTS = 3 

1624 

1625 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

1626 TMP_RENAME_PREFIX = "OLD_" 

1627 

1628 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

1629 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

1630 

1631 def getInstance(uuid, xapiSession, createLock=True, force=False): 

1632 xapi = XAPI(xapiSession, uuid) 

1633 type = normalizeType(xapi.srRecord["type"]) 

1634 if type == SR.TYPE_FILE: 

1635 return FileSR(uuid, xapi, createLock, force) 

1636 elif type == SR.TYPE_LVHD: 

1637 return LVHDSR(uuid, xapi, createLock, force) 

1638 elif type == SR.TYPE_LINSTOR: 

1639 return LinstorSR(uuid, xapi, createLock, force) 

1640 raise util.SMException("SR type %s not recognized" % type) 

1641 getInstance = staticmethod(getInstance) 

1642 

1643 def __init__(self, uuid, xapi, createLock, force): 

1644 self.logFilter = self.LogFilter(self) 

1645 self.uuid = uuid 

1646 self.path = "" 

1647 self.name = "" 

1648 self.vdis = {} 

1649 self.vdiTrees = [] 

1650 self.journaler = None 

1651 self.xapi = xapi 

1652 self._locked = 0 

1653 self._srLock = None 

1654 if createLock: 1654 ↛ 1655line 1654 didn't jump to line 1655, because the condition on line 1654 was never true

1655 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, self.uuid) 

1656 else: 

1657 Util.log("Requested no SR locking") 

1658 self.name = self.xapi.srRecord["name_label"] 

1659 self._failedCoalesceTargets = [] 

1660 

1661 if not self.xapi.isPluggedHere(): 1661 ↛ 1662line 1661 didn't jump to line 1662, because the condition on line 1661 was never true

1662 if force: 

1663 Util.log("SR %s not attached on this host, ignoring" % uuid) 

1664 else: 

1665 raise util.SMException("SR %s not attached on this host" % uuid) 

1666 

1667 if force: 1667 ↛ 1668line 1667 didn't jump to line 1668, because the condition on line 1667 was never true

1668 Util.log("Not checking if we are Master (SR %s)" % uuid) 

1669 elif not self.xapi.isMaster(): 1669 ↛ 1670line 1669 didn't jump to line 1670, because the condition on line 1669 was never true

1670 raise util.SMException("This host is NOT master, will not run") 

1671 

1672 def gcEnabled(self, refresh=True): 

1673 if refresh: 

1674 self.xapi.srRecord = \ 

1675 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

1676 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

1677 Util.log("GC is disabled for this SR, abort") 

1678 return False 

1679 return True 

1680 

1681 def scan(self, force=False): 

1682 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

1683 update VDI objects if they already exist""" 

1684 pass # abstract 

1685 

1686 def scanLocked(self, force=False): 

1687 self.lock() 

1688 try: 

1689 self.scan(force) 

1690 finally: 

1691 self.unlock() 

1692 

1693 def getVDI(self, uuid): 

1694 return self.vdis.get(uuid) 

1695 

1696 def hasWork(self): 

1697 if len(self.findGarbage()) > 0: 

1698 return True 

1699 if self.findCoalesceable(): 

1700 return True 

1701 if self.findLeafCoalesceable(): 

1702 return True 

1703 if self.needUpdateBlockInfo(): 

1704 return True 

1705 return False 

1706 

1707 def findCoalesceable(self): 

1708 """Find a coalesceable VDI. Return a vdi that should be coalesced 

1709 (choosing one among all coalesceable candidates according to some 

1710 criteria) or None if there is no VDI that could be coalesced""" 

1711 

1712 candidates = [] 

1713 

1714 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

1715 if srSwitch == "false": 

1716 Util.log("Coalesce disabled for this SR") 

1717 return candidates 

1718 

1719 # finish any VDI for which a relink journal entry exists first 

1720 journals = self.journaler.getAll(VDI.JRN_RELINK) 

1721 for uuid in journals: 

1722 vdi = self.getVDI(uuid) 

1723 if vdi and vdi not in self._failedCoalesceTargets: 

1724 return vdi 

1725 

1726 for vdi in self.vdis.values(): 

1727 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

1728 candidates.append(vdi) 

1729 Util.log("%s is coalescable" % vdi.uuid) 

1730 

1731 self.xapi.update_task_progress("coalescable", len(candidates)) 

1732 

1733 # pick one in the tallest tree 

1734 treeHeight = dict() 

1735 for c in candidates: 

1736 height = c.getTreeRoot().getTreeHeight() 

1737 if treeHeight.get(height): 

1738 treeHeight[height].append(c) 

1739 else: 

1740 treeHeight[height] = [c] 

1741 

1742 freeSpace = self.getFreeSpace() 

1743 heights = list(treeHeight.keys()) 

1744 heights.sort(reverse=True) 

1745 for h in heights: 

1746 for c in treeHeight[h]: 

1747 spaceNeeded = c._calcExtraSpaceForCoalescing() 

1748 if spaceNeeded <= freeSpace: 

1749 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

1750 return c 

1751 else: 

1752 Util.log("No space to coalesce %s (free space: %d)" % \ 

1753 (c, freeSpace)) 

1754 return None 

1755 

1756 def getSwitch(self, key): 

1757 return self.xapi.srRecord["other_config"].get(key) 

1758 

1759 def forbiddenBySwitch(self, switch, condition, fail_msg): 

1760 srSwitch = self.getSwitch(switch) 

1761 ret = False 

1762 if srSwitch: 

1763 ret = srSwitch == condition 

1764 

1765 if ret: 

1766 Util.log(fail_msg) 

1767 

1768 return ret 

1769 

1770 def leafCoalesceForbidden(self): 

1771 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

1772 "false", 

1773 "Coalesce disabled for this SR") or 

1774 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

1775 VDI.LEAFCLSC_DISABLED, 

1776 "Leaf-coalesce disabled for this SR")) 

1777 

1778 def findLeafCoalesceable(self): 

1779 """Find leaf-coalesceable VDIs in each VHD tree""" 

1780 

1781 candidates = [] 

1782 if self.leafCoalesceForbidden(): 

1783 return candidates 

1784 

1785 self.gatherLeafCoalesceable(candidates) 

1786 

1787 self.xapi.update_task_progress("coalescable", len(candidates)) 

1788 

1789 freeSpace = self.getFreeSpace() 

1790 for candidate in candidates: 

1791 # check the space constraints to see if leaf-coalesce is actually 

1792 # feasible for this candidate 

1793 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

1794 spaceNeededLive = spaceNeeded 

1795 if spaceNeeded > freeSpace: 

1796 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

1797 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

1798 spaceNeeded = spaceNeededLive 

1799 

1800 if spaceNeeded <= freeSpace: 

1801 Util.log("Leaf-coalesce candidate: %s" % candidate) 

1802 return candidate 

1803 else: 

1804 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

1805 (candidate, freeSpace)) 

1806 if spaceNeededLive <= freeSpace: 

1807 Util.log("...but enough space if skip snap-coalesce") 

1808 candidate.setConfig(VDI.DB_LEAFCLSC, 

1809 VDI.LEAFCLSC_OFFLINE) 

1810 

1811 return None 

1812 

1813 def gatherLeafCoalesceable(self, candidates): 

1814 for vdi in self.vdis.values(): 

1815 if not vdi.isLeafCoalesceable(): 

1816 continue 

1817 if vdi in self._failedCoalesceTargets: 

1818 continue 

1819 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

1820 Util.log("Skipping reset-on-boot %s" % vdi) 

1821 continue 

1822 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

1823 Util.log("Skipping allow_caching=true %s" % vdi) 

1824 continue 

1825 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

1826 Util.log("Leaf-coalesce disabled for %s" % vdi) 

1827 continue 

1828 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

1829 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

1830 continue 

1831 candidates.append(vdi) 

1832 

1833 def coalesce(self, vdi, dryRun=False): 

1834 """Coalesce vdi onto parent""" 

1835 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

1836 if dryRun: 1836 ↛ 1837line 1836 didn't jump to line 1837, because the condition on line 1836 was never true

1837 return 

1838 

1839 try: 

1840 self._coalesce(vdi) 

1841 except util.SMException as e: 

1842 if isinstance(e, AbortException): 1842 ↛ 1843line 1842 didn't jump to line 1843, because the condition on line 1842 was never true

1843 self.cleanup() 

1844 raise 

1845 else: 

1846 self._failedCoalesceTargets.append(vdi) 

1847 Util.logException("coalesce") 

1848 Util.log("Coalesce failed, skipping") 

1849 self.cleanup() 

1850 

1851 def coalesceLeaf(self, vdi, dryRun=False): 

1852 """Leaf-coalesce vdi onto parent""" 

1853 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

1854 if dryRun: 

1855 return 

1856 

1857 try: 

1858 uuid = vdi.uuid 

1859 try: 

1860 # "vdi" object will no longer be valid after this call 

1861 self._coalesceLeaf(vdi) 

1862 finally: 

1863 vdi = self.getVDI(uuid) 

1864 if vdi: 

1865 vdi.delConfig(vdi.DB_LEAFCLSC) 

1866 except AbortException: 

1867 self.cleanup() 

1868 raise 

1869 except (util.SMException, XenAPI.Failure) as e: 

1870 self._failedCoalesceTargets.append(vdi) 

1871 Util.logException("leaf-coalesce") 

1872 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

1873 self.cleanup() 

1874 

1875 def garbageCollect(self, dryRun=False): 

1876 vdiList = self.findGarbage() 

1877 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

1878 for vdi in vdiList: 

1879 Util.log(" %s" % vdi) 

1880 if not dryRun: 

1881 self.deleteVDIs(vdiList) 

1882 self.cleanupJournals(dryRun) 

1883 

1884 def findGarbage(self): 

1885 vdiList = [] 

1886 for vdi in self.vdiTrees: 

1887 vdiList.extend(vdi.getAllPrunable()) 

1888 return vdiList 

1889 

1890 def deleteVDIs(self, vdiList): 

1891 for vdi in vdiList: 

1892 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

1893 raise AbortException("Aborting due to signal") 

1894 Util.log("Deleting unlinked VDI %s" % vdi) 

1895 self.deleteVDI(vdi) 

1896 

1897 def deleteVDI(self, vdi): 

1898 assert(len(vdi.children) == 0) 

1899 del self.vdis[vdi.uuid] 

1900 if vdi.parent: 1900 ↛ 1902line 1900 didn't jump to line 1902, because the condition on line 1900 was never false

1901 vdi.parent.children.remove(vdi) 

1902 if vdi in self.vdiTrees: 1902 ↛ 1903line 1902 didn't jump to line 1903, because the condition on line 1902 was never true

1903 self.vdiTrees.remove(vdi) 

1904 vdi.delete() 

1905 

1906 def forgetVDI(self, vdiUuid): 

1907 self.xapi.forgetVDI(self.uuid, vdiUuid) 

1908 

1909 def pauseVDIs(self, vdiList): 

1910 paused = [] 

1911 failed = False 

1912 for vdi in vdiList: 

1913 try: 

1914 vdi.pause() 

1915 paused.append(vdi) 

1916 except: 

1917 Util.logException("pauseVDIs") 

1918 failed = True 

1919 break 

1920 

1921 if failed: 

1922 self.unpauseVDIs(paused) 

1923 raise util.SMException("Failed to pause VDIs") 

1924 

1925 def unpauseVDIs(self, vdiList): 

1926 failed = False 

1927 for vdi in vdiList: 

1928 try: 

1929 vdi.unpause() 

1930 except: 

1931 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

1932 failed = True 

1933 if failed: 

1934 raise util.SMException("Failed to unpause VDIs") 

1935 

1936 def getFreeSpace(self): 

1937 return 0 

1938 

1939 def cleanup(self): 

1940 Util.log("In cleanup") 

1941 return 

1942 

1943 def __str__(self): 

1944 if self.name: 

1945 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

1946 else: 

1947 ret = "%s" % self.uuid 

1948 return ret 

1949 

1950 def lock(self): 

1951 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

1952 signal to avoid deadlocking (trying to acquire the SR lock while the 

1953 lock is held by a process that is trying to abort us)""" 

1954 if not self._srLock: 

1955 return 

1956 

1957 if self._locked == 0: 

1958 abortFlag = IPCFlag(self.uuid) 

1959 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

1960 if self._srLock.acquireNoblock(): 

1961 self._locked += 1 

1962 return 

1963 if abortFlag.test(FLAG_TYPE_ABORT): 

1964 raise AbortException("Abort requested") 

1965 time.sleep(SR.LOCK_RETRY_INTERVAL) 

1966 raise util.SMException("Unable to acquire the SR lock") 

1967 

1968 self._locked += 1 

1969 

1970 def unlock(self): 

1971 if not self._srLock: 1971 ↛ 1973line 1971 didn't jump to line 1973, because the condition on line 1971 was never false

1972 return 

1973 assert(self._locked > 0) 

1974 self._locked -= 1 

1975 if self._locked == 0: 

1976 self._srLock.release() 

1977 

1978 def needUpdateBlockInfo(self): 

1979 for vdi in self.vdis.values(): 

1980 if vdi.scanError or len(vdi.children) == 0: 

1981 continue 

1982 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

1983 return True 

1984 return False 

1985 

1986 def updateBlockInfo(self): 

1987 for vdi in self.vdis.values(): 

1988 if vdi.scanError or len(vdi.children) == 0: 

1989 continue 

1990 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

1991 vdi.updateBlockInfo() 

1992 

1993 def cleanupCoalesceJournals(self): 

1994 """Remove stale coalesce VDI indicators""" 

1995 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

1996 for uuid, jval in entries.items(): 

1997 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

1998 

1999 def cleanupJournals(self, dryRun=False): 

2000 """delete journal entries for non-existing VDIs""" 

2001 for t in [LVHDVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2002 entries = self.journaler.getAll(t) 

2003 for uuid, jval in entries.items(): 

2004 if self.getVDI(uuid): 

2005 continue 

2006 if t == SR.JRN_CLONE: 

2007 baseUuid, clonUuid = jval.split("_") 

2008 if self.getVDI(baseUuid): 

2009 continue 

2010 Util.log(" Deleting stale '%s' journal entry for %s " 

2011 "(%s)" % (t, uuid, jval)) 

2012 if not dryRun: 

2013 self.journaler.remove(t, uuid) 

2014 

2015 def cleanupCache(self, maxAge=-1): 

2016 return 0 

2017 

2018 def _coalesce(self, vdi): 

2019 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2019 ↛ 2022line 2019 didn't jump to line 2022, because the condition on line 2019 was never true

2020 # this means we had done the actual coalescing already and just 

2021 # need to finish relinking and/or refreshing the children 

2022 Util.log("==> Coalesce apparently already done: skipping") 

2023 else: 

2024 # JRN_COALESCE is used to check which VDI is being coalesced in 

2025 # order to decide whether to abort the coalesce. We remove the 

2026 # journal as soon as the VHD coalesce step is done, because we 

2027 # don't expect the rest of the process to take long 

2028 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2029 vdi._doCoalesce() 

2030 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2031 

2032 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2033 

2034 # we now need to relink the children: lock the SR to prevent ops 

2035 # like SM.clone from manipulating the VDIs we'll be relinking and 

2036 # rescan the SR first in case the children changed since the last 

2037 # scan 

2038 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2039 

2040 self.lock() 

2041 try: 

2042 vdi.parent._tagChildrenForRelink() 

2043 self.scan() 

2044 vdi._relinkSkip() 

2045 finally: 

2046 self.unlock() 

2047 # Reload the children to leave things consistent 

2048 vdi.parent._reloadChildren(vdi) 

2049 

2050 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2051 self.deleteVDI(vdi) 

2052 

2053 class CoalesceTracker: 

2054 GRACE_ITERATIONS = 1 

2055 MAX_ITERATIONS_NO_PROGRESS = 3 

2056 MAX_ITERATIONS = 10 

2057 MAX_INCREASE_FROM_MINIMUM = 1.2 

2058 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2059 " --> Final size {finSize}" 

2060 

2061 def __init__(self, sr): 

2062 self.itsNoProgress = 0 

2063 self.its = 0 

2064 self.minSize = float("inf") 

2065 self.history = [] 

2066 self.reason = "" 

2067 self.startSize = None 

2068 self.finishSize = None 

2069 self.sr = sr 

2070 

2071 def abortCoalesce(self, prevSize, curSize): 

2072 res = False 

2073 

2074 self.its += 1 

2075 self.history.append(self.HISTORY_STRING.format(its=self.its, 

2076 initSize=prevSize, 

2077 finSize=curSize)) 

2078 

2079 self.finishSize = curSize 

2080 

2081 if self.startSize is None: 

2082 self.startSize = prevSize 

2083 

2084 if curSize < self.minSize: 

2085 self.minSize = curSize 

2086 

2087 if prevSize < self.minSize: 

2088 self.minSize = prevSize 

2089 

2090 if prevSize < curSize: 

2091 self.itsNoProgress += 1 

2092 Util.log("No progress, attempt:" 

2093 " {attempt}".format(attempt=self.itsNoProgress)) 

2094 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2095 

2096 if (not res) and (self.its > self.MAX_ITERATIONS): 

2097 max = self.MAX_ITERATIONS 

2098 self.reason = \ 

2099 "Max iterations ({max}) exceeded".format(max=max) 

2100 res = True 

2101 

2102 if (not res) and (self.itsNoProgress > 

2103 self.MAX_ITERATIONS_NO_PROGRESS): 

2104 max = self.MAX_ITERATIONS_NO_PROGRESS 

2105 self.reason = \ 

2106 "No progress made for {max} iterations".format(max=max) 

2107 res = True 

2108 

2109 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2110 if (self.its > self.GRACE_ITERATIONS and 

2111 (not res) and (curSize > maxSizeFromMin)): 

2112 self.reason = "Unexpected bump in size," \ 

2113 " compared to minimum acheived" 

2114 res = True 

2115 

2116 return res 

2117 

2118 def printReasoning(self): 

2119 Util.log("Aborted coalesce") 

2120 for hist in self.history: 

2121 Util.log(hist) 

2122 Util.log(self.reason) 

2123 Util.log("Starting size was {size}" 

2124 .format(size=self.startSize)) 

2125 Util.log("Final size was {size}" 

2126 .format(size=self.finishSize)) 

2127 Util.log("Minimum size acheived was {size}" 

2128 .format(size=self.minSize)) 

2129 

2130 def _coalesceLeaf(self, vdi): 

2131 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2132 complete due to external changes, namely vdi_delete and vdi_snapshot  

2133 that alter leaf-coalescibility of vdi""" 

2134 tracker = self.CoalesceTracker(self) 

2135 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2136 prevSizeVHD = vdi.getSizeVHD() 

2137 if not self._snapshotCoalesce(vdi): 2137 ↛ 2138line 2137 didn't jump to line 2138, because the condition on line 2137 was never true

2138 return False 

2139 if tracker.abortCoalesce(prevSizeVHD, vdi.getSizeVHD()): 

2140 tracker.printReasoning() 

2141 raise util.SMException("VDI {uuid} could not be coalesced" 

2142 .format(uuid=vdi.uuid)) 

2143 return self._liveLeafCoalesce(vdi) 

2144 

2145 def calcStorageSpeed(self, startTime, endTime, vhdSize): 

2146 speed = None 

2147 total_time = endTime - startTime 

2148 if total_time > 0: 

2149 speed = float(vhdSize) / float(total_time) 

2150 return speed 

2151 

2152 def writeSpeedToFile(self, speed): 

2153 content = [] 

2154 speedFile = None 

2155 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2156 self.lock() 

2157 try: 

2158 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2159 lines = "" 

2160 if not os.path.isfile(path): 

2161 lines = str(speed) + "\n" 

2162 else: 

2163 speedFile = open(path, "r+") 

2164 content = speedFile.readlines() 

2165 content.append(str(speed) + "\n") 

2166 if len(content) > N_RUNNING_AVERAGE: 

2167 del content[0] 

2168 lines = "".join(content) 

2169 

2170 util.atomicFileWrite(path, VAR_RUN, lines) 

2171 finally: 

2172 if speedFile is not None: 

2173 speedFile.close() 

2174 Util.log("Closing file: {myfile}".format(myfile=path)) 

2175 self.unlock() 

2176 

2177 def recordStorageSpeed(self, startTime, endTime, vhdSize): 

2178 speed = self.calcStorageSpeed(startTime, endTime, vhdSize) 

2179 if speed is None: 

2180 return 

2181 

2182 self.writeSpeedToFile(speed) 

2183 

2184 def getStorageSpeed(self): 

2185 speedFile = None 

2186 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2187 self.lock() 

2188 try: 

2189 speed = None 

2190 if os.path.isfile(path): 

2191 speedFile = open(path) 

2192 content = speedFile.readlines() 

2193 try: 

2194 content = [float(i) for i in content] 

2195 except ValueError: 

2196 Util.log("Something bad in the speed log:{log}". 

2197 format(log=speedFile.readlines())) 

2198 return speed 

2199 

2200 if len(content): 

2201 speed = sum(content) / float(len(content)) 

2202 if speed <= 0: 2202 ↛ 2204line 2202 didn't jump to line 2204, because the condition on line 2202 was never true

2203 # Defensive, should be impossible. 

2204 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2205 format(speed=speed, uuid=self.uuid)) 

2206 speed = None 

2207 else: 

2208 Util.log("Speed file empty for SR: {uuid}". 

2209 format(uuid=self.uuid)) 

2210 else: 

2211 Util.log("Speed log missing for SR: {uuid}". 

2212 format(uuid=self.uuid)) 

2213 return speed 

2214 finally: 

2215 if not (speedFile is None): 

2216 speedFile.close() 

2217 self.unlock() 

2218 

2219 def _snapshotCoalesce(self, vdi): 

2220 # Note that because we are not holding any locks here, concurrent SM 

2221 # operations may change this tree under our feet. In particular, vdi 

2222 # can be deleted, or it can be snapshotted. 

2223 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2224 Util.log("Single-snapshotting %s" % vdi) 

2225 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2226 try: 

2227 ret = self.xapi.singleSnapshotVDI(vdi) 

2228 Util.log("Single-snapshot returned: %s" % ret) 

2229 except XenAPI.Failure as e: 

2230 if util.isInvalidVDI(e): 

2231 Util.log("The VDI appears to have been concurrently deleted") 

2232 return False 

2233 raise 

2234 self.scanLocked() 

2235 tempSnap = vdi.parent 

2236 if not tempSnap.isCoalesceable(): 

2237 Util.log("The VDI appears to have been concurrently snapshotted") 

2238 return False 

2239 Util.log("Coalescing parent %s" % tempSnap) 

2240 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2241 vhdSize = vdi.getSizeVHD() 

2242 self._coalesce(tempSnap) 

2243 if not vdi.isLeafCoalesceable(): 

2244 Util.log("The VDI tree appears to have been altered since") 

2245 return False 

2246 return True 

2247 

2248 def _liveLeafCoalesce(self, vdi): 

2249 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2250 self.lock() 

2251 try: 

2252 self.scan() 

2253 if not self.getVDI(vdi.uuid): 

2254 Util.log("The VDI appears to have been deleted meanwhile") 

2255 return False 

2256 if not vdi.isLeafCoalesceable(): 

2257 Util.log("The VDI is no longer leaf-coalesceable") 

2258 return False 

2259 

2260 uuid = vdi.uuid 

2261 vdi.pause(failfast=True) 

2262 try: 

2263 try: 

2264 # "vdi" object will no longer be valid after this call 

2265 self._doCoalesceLeaf(vdi) 

2266 except: 

2267 Util.logException("_doCoalesceLeaf") 

2268 self._handleInterruptedCoalesceLeaf() 

2269 raise 

2270 finally: 

2271 vdi = self.getVDI(uuid) 

2272 if vdi: 

2273 vdi.ensureUnpaused() 

2274 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2275 if vdiOld: 

2276 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2277 self.deleteVDI(vdiOld) 

2278 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2279 finally: 

2280 self.cleanup() 

2281 self.unlock() 

2282 self.logFilter.logState() 

2283 return True 

2284 

2285 def _doCoalesceLeaf(self, vdi): 

2286 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2287 offline/atomic context""" 

2288 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2289 self._prepareCoalesceLeaf(vdi) 

2290 vdi.parent._setHidden(False) 

2291 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2292 vdi.validate(True) 

2293 vdi.parent.validate(True) 

2294 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2295 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2296 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 

2297 Util.log("Leaf-coalesce forced, will not use timeout") 

2298 timeout = 0 

2299 vdi._coalesceVHD(timeout) 

2300 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2301 vdi.parent.validate(True) 

2302 #vdi._verifyContents(timeout / 2) 

2303 

2304 # rename 

2305 vdiUuid = vdi.uuid 

2306 oldName = vdi.fileName 

2307 origParentUuid = vdi.parent.uuid 

2308 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2309 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2310 vdi.parent.rename(vdiUuid) 

2311 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2312 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2313 

2314 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2315 # garbage 

2316 

2317 # update the VDI record 

2318 vdi.parent.delConfig(VDI.DB_VHD_PARENT) 

2319 if vdi.parent.raw: 

2320 vdi.parent.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_RAW) 

2321 vdi.parent.delConfig(VDI.DB_VHD_BLOCKS) 

2322 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2323 

2324 self._updateNode(vdi) 

2325 

2326 # delete the obsolete leaf & inflate the parent (in that order, to 

2327 # minimize free space requirements) 

2328 parent = vdi.parent 

2329 vdi._setHidden(True) 

2330 vdi.parent.children = [] 

2331 vdi.parent = None 

2332 

2333 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2334 freeSpace = self.getFreeSpace() 

2335 if freeSpace < extraSpace: 

2336 # don't delete unless we need the space: deletion is time-consuming 

2337 # because it requires contacting the slaves, and we're paused here 

2338 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2339 self.deleteVDI(vdi) 

2340 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2341 

2342 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2343 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2344 

2345 self.forgetVDI(origParentUuid) 

2346 self._finishCoalesceLeaf(parent) 

2347 self._updateSlavesOnResize(parent) 

2348 

2349 def _calcExtraSpaceNeeded(self, child, parent): 

2350 assert(not parent.raw) # raw parents not supported 

2351 extra = child.getSizeVHD() - parent.getSizeVHD() 

2352 if extra < 0: 

2353 extra = 0 

2354 return extra 

2355 

2356 def _prepareCoalesceLeaf(self, vdi): 

2357 pass 

2358 

2359 def _updateNode(self, vdi): 

2360 pass 

2361 

2362 def _finishCoalesceLeaf(self, parent): 

2363 pass 

2364 

2365 def _updateSlavesOnUndoLeafCoalesce(self, parent, child): 

2366 pass 

2367 

2368 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid): 

2369 pass 

2370 

2371 def _updateSlavesOnResize(self, vdi): 

2372 pass 

2373 

2374 def _removeStaleVDIs(self, uuidsPresent): 

2375 for uuid in list(self.vdis.keys()): 

2376 if not uuid in uuidsPresent: 

2377 Util.log("VDI %s disappeared since last scan" % \ 

2378 self.vdis[uuid]) 

2379 del self.vdis[uuid] 

2380 

2381 def _handleInterruptedCoalesceLeaf(self): 

2382 """An interrupted leaf-coalesce operation may leave the VHD tree in an  

2383 inconsistent state. If the old-leaf VDI is still present, we revert the  

2384 operation (in case the original error is persistent); otherwise we must  

2385 finish the operation""" 

2386 # abstract 

2387 pass 

2388 

2389 def _buildTree(self, force): 

2390 self.vdiTrees = [] 

2391 for vdi in self.vdis.values(): 

2392 if vdi.parentUuid: 

2393 parent = self.getVDI(vdi.parentUuid) 

2394 if not parent: 

2395 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2396 self.vdiTrees.append(vdi) 

2397 continue 

2398 if force: 

2399 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2400 (vdi.parentUuid, vdi.uuid)) 

2401 self.vdiTrees.append(vdi) 

2402 continue 

2403 else: 

2404 raise util.SMException("Parent VDI %s of %s not " \ 

2405 "found" % (vdi.parentUuid, vdi.uuid)) 

2406 vdi.parent = parent 

2407 parent.children.append(vdi) 

2408 else: 

2409 self.vdiTrees.append(vdi) 

2410 

2411 

2412class FileSR(SR): 

2413 TYPE = SR.TYPE_FILE 

2414 CACHE_FILE_EXT = ".vhdcache" 

2415 # cache cleanup actions 

2416 CACHE_ACTION_KEEP = 0 

2417 CACHE_ACTION_REMOVE = 1 

2418 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

2419 

2420 def __init__(self, uuid, xapi, createLock, force): 

2421 SR.__init__(self, uuid, xapi, createLock, force) 

2422 self.path = "/var/run/sr-mount/%s" % self.uuid 

2423 self.journaler = fjournaler.Journaler(self.path) 

2424 

2425 def scan(self, force=False): 

2426 if not util.pathexists(self.path): 

2427 raise util.SMException("directory %s not found!" % self.uuid) 

2428 vhds = self._scan(force) 

2429 for uuid, vhdInfo in vhds.items(): 

2430 vdi = self.getVDI(uuid) 

2431 if not vdi: 

2432 self.logFilter.logNewVDI(uuid) 

2433 vdi = FileVDI(self, uuid, False) 

2434 self.vdis[uuid] = vdi 

2435 vdi.load(vhdInfo) 

2436 uuidsPresent = list(vhds.keys()) 

2437 rawList = [x for x in os.listdir(self.path) if x.endswith(vhdutil.FILE_EXTN_RAW)] 

2438 for rawName in rawList: 

2439 uuid = FileVDI.extractUuid(rawName) 

2440 uuidsPresent.append(uuid) 

2441 vdi = self.getVDI(uuid) 

2442 if not vdi: 

2443 self.logFilter.logNewVDI(uuid) 

2444 vdi = FileVDI(self, uuid, True) 

2445 self.vdis[uuid] = vdi 

2446 self._removeStaleVDIs(uuidsPresent) 

2447 self._buildTree(force) 

2448 self.logFilter.logState() 

2449 self._handleInterruptedCoalesceLeaf() 

2450 

2451 def getFreeSpace(self): 

2452 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

2453 

2454 def deleteVDIs(self, vdiList): 

2455 rootDeleted = False 

2456 for vdi in vdiList: 

2457 if not vdi.parent: 

2458 rootDeleted = True 

2459 break 

2460 SR.deleteVDIs(self, vdiList) 

2461 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

2462 self.xapi.markCacheSRsDirty() 

2463 

2464 def cleanupCache(self, maxAge=-1): 

2465 """Clean up IntelliCache cache files. Caches for leaf nodes are  

2466 removed when the leaf node no longer exists or its allow-caching  

2467 attribute is not set. Caches for parent nodes are removed when the  

2468 parent node no longer exists or it hasn't been used in more than  

2469 <maxAge> hours. 

2470 Return number of caches removed. 

2471 """ 

2472 numRemoved = 0 

2473 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

2474 Util.log("Found %d cache files" % len(cacheFiles)) 

2475 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

2476 for cacheFile in cacheFiles: 

2477 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

2478 action = self.CACHE_ACTION_KEEP 

2479 rec = self.xapi.getRecordVDI(uuid) 

2480 if not rec: 

2481 Util.log("Cache %s: VDI doesn't exist" % uuid) 

2482 action = self.CACHE_ACTION_REMOVE 

2483 elif rec["managed"] and not rec["allow_caching"]: 

2484 Util.log("Cache %s: caching disabled" % uuid) 

2485 action = self.CACHE_ACTION_REMOVE 

2486 elif not rec["managed"] and maxAge >= 0: 

2487 lastAccess = datetime.datetime.fromtimestamp( \ 

2488 os.path.getatime(os.path.join(self.path, cacheFile))) 

2489 if lastAccess < cutoff: 

2490 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

2491 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

2492 

2493 if action == self.CACHE_ACTION_KEEP: 

2494 Util.log("Keeping cache %s" % uuid) 

2495 continue 

2496 

2497 lockId = uuid 

2498 parentUuid = None 

2499 if rec and rec["managed"]: 

2500 parentUuid = rec["sm_config"].get("vhd-parent") 

2501 if parentUuid: 

2502 lockId = parentUuid 

2503 

2504 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

2505 cacheLock.acquire() 

2506 try: 

2507 if self._cleanupCache(uuid, action): 

2508 numRemoved += 1 

2509 finally: 

2510 cacheLock.release() 

2511 return numRemoved 

2512 

2513 def _cleanupCache(self, uuid, action): 

2514 assert(action != self.CACHE_ACTION_KEEP) 

2515 rec = self.xapi.getRecordVDI(uuid) 

2516 if rec and rec["allow_caching"]: 

2517 Util.log("Cache %s appears to have become valid" % uuid) 

2518 return False 

2519 

2520 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

2521 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

2522 if tapdisk: 

2523 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

2524 Util.log("Cache %s still in use" % uuid) 

2525 return False 

2526 Util.log("Shutting down tapdisk for %s" % fullPath) 

2527 tapdisk.shutdown() 

2528 

2529 Util.log("Deleting file %s" % fullPath) 

2530 os.unlink(fullPath) 

2531 return True 

2532 

2533 def _isCacheFileName(self, name): 

2534 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

2535 name.endswith(self.CACHE_FILE_EXT) 

2536 

2537 def _scan(self, force): 

2538 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2539 error = False 

2540 pattern = os.path.join(self.path, "*%s" % vhdutil.FILE_EXTN_VHD) 

2541 vhds = vhdutil.getAllVHDs(pattern, FileVDI.extractUuid) 

2542 for uuid, vhdInfo in vhds.items(): 

2543 if vhdInfo.error: 

2544 error = True 

2545 break 

2546 if not error: 

2547 return vhds 

2548 Util.log("Scan error on attempt %d" % i) 

2549 if force: 

2550 return vhds 

2551 raise util.SMException("Scan error") 

2552 

2553 def deleteVDI(self, vdi): 

2554 self._checkSlaves(vdi) 

2555 SR.deleteVDI(self, vdi) 

2556 

2557 def _checkSlaves(self, vdi): 

2558 onlineHosts = self.xapi.getOnlineHosts() 

2559 abortFlag = IPCFlag(self.uuid) 

2560 for pbdRecord in self.xapi.getAttachedPBDs(): 

2561 hostRef = pbdRecord["host"] 

2562 if hostRef == self.xapi._hostRef: 

2563 continue 

2564 if abortFlag.test(FLAG_TYPE_ABORT): 

2565 raise AbortException("Aborting due to signal") 

2566 try: 

2567 self._checkSlave(hostRef, vdi) 

2568 except util.CommandException: 

2569 if hostRef in onlineHosts: 

2570 raise 

2571 

2572 def _checkSlave(self, hostRef, vdi): 

2573 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

2574 Util.log("Checking with slave: %s" % repr(call)) 

2575 _host = self.xapi.session.xenapi.host 

2576 text = _host.call_plugin( * call) 

2577 

2578 def _handleInterruptedCoalesceLeaf(self): 

2579 entries = self.journaler.getAll(VDI.JRN_LEAF) 

2580 for uuid, parentUuid in entries.items(): 

2581 fileList = os.listdir(self.path) 

2582 childName = uuid + vhdutil.FILE_EXTN_VHD 

2583 tmpChildName = self.TMP_RENAME_PREFIX + uuid + vhdutil.FILE_EXTN_VHD 

2584 parentName1 = parentUuid + vhdutil.FILE_EXTN_VHD 

2585 parentName2 = parentUuid + vhdutil.FILE_EXTN_RAW 

2586 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

2587 if parentPresent or tmpChildName in fileList: 

2588 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

2589 else: 

2590 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

2591 self.journaler.remove(VDI.JRN_LEAF, uuid) 

2592 vdi = self.getVDI(uuid) 

2593 if vdi: 

2594 vdi.ensureUnpaused() 

2595 

2596 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2597 Util.log("*** UNDO LEAF-COALESCE") 

2598 parent = self.getVDI(parentUuid) 

2599 if not parent: 

2600 parent = self.getVDI(childUuid) 

2601 if not parent: 

2602 raise util.SMException("Neither %s nor %s found" % \ 

2603 (parentUuid, childUuid)) 

2604 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

2605 parent.rename(parentUuid) 

2606 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

2607 

2608 child = self.getVDI(childUuid) 

2609 if not child: 

2610 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

2611 if not child: 

2612 raise util.SMException("Neither %s nor %s found" % \ 

2613 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

2614 Util.log("Renaming child back to %s" % childUuid) 

2615 child.rename(childUuid) 

2616 Util.log("Updating the VDI record") 

2617 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

2618 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

2619 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

2620 

2621 if child.hidden: 

2622 child._setHidden(False) 

2623 if not parent.hidden: 

2624 parent._setHidden(True) 

2625 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

2626 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

2627 Util.log("*** leaf-coalesce undo successful") 

2628 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

2629 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

2630 

2631 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2632 Util.log("*** FINISH LEAF-COALESCE") 

2633 vdi = self.getVDI(childUuid) 

2634 if not vdi: 

2635 raise util.SMException("VDI %s not found" % childUuid) 

2636 try: 

2637 self.forgetVDI(parentUuid) 

2638 except XenAPI.Failure: 

2639 pass 

2640 self._updateSlavesOnResize(vdi) 

2641 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

2642 Util.log("*** finished leaf-coalesce successfully") 

2643 

2644 

2645class LVHDSR(SR): 

2646 TYPE = SR.TYPE_LVHD 

2647 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

2648 

2649 def __init__(self, uuid, xapi, createLock, force): 

2650 SR.__init__(self, uuid, xapi, createLock, force) 

2651 self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid) 

2652 self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName) 

2653 self.lvmCache = lvmcache.LVMCache(self.vgName) 

2654 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

2655 self.journaler = journaler.Journaler(self.lvmCache) 

2656 

2657 def deleteVDI(self, vdi): 

2658 if self.lvActivator.get(vdi.uuid, False): 

2659 self.lvActivator.deactivate(vdi.uuid, False) 

2660 self._checkSlaves(vdi) 

2661 SR.deleteVDI(self, vdi) 

2662 

2663 def forgetVDI(self, vdiUuid): 

2664 SR.forgetVDI(self, vdiUuid) 

2665 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

2666 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

2667 

2668 def getFreeSpace(self): 

2669 stats = lvutil._getVGstats(self.vgName) 

2670 return stats['physical_size'] - stats['physical_utilisation'] 

2671 

2672 def cleanup(self): 

2673 if not self.lvActivator.deactivateAll(): 

2674 Util.log("ERROR deactivating LVs while cleaning up") 

2675 

2676 def needUpdateBlockInfo(self): 

2677 for vdi in self.vdis.values(): 

2678 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

2679 continue 

2680 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2681 return True 

2682 return False 

2683 

2684 def updateBlockInfo(self): 

2685 numUpdated = 0 

2686 for vdi in self.vdis.values(): 

2687 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

2688 continue 

2689 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2690 vdi.updateBlockInfo() 

2691 numUpdated += 1 

2692 if numUpdated: 

2693 # deactivate the LVs back sooner rather than later. If we don't 

2694 # now, by the time this thread gets to deactivations, another one 

2695 # might have leaf-coalesced a node and deleted it, making the child 

2696 # inherit the refcount value and preventing the correct decrement 

2697 self.cleanup() 

2698 

2699 def scan(self, force=False): 

2700 vdis = self._scan(force) 

2701 for uuid, vdiInfo in vdis.items(): 

2702 vdi = self.getVDI(uuid) 

2703 if not vdi: 

2704 self.logFilter.logNewVDI(uuid) 

2705 vdi = LVHDVDI(self, uuid, 

2706 vdiInfo.vdiType == vhdutil.VDI_TYPE_RAW) 

2707 self.vdis[uuid] = vdi 

2708 vdi.load(vdiInfo) 

2709 self._removeStaleVDIs(vdis.keys()) 

2710 self._buildTree(force) 

2711 self.logFilter.logState() 

2712 self._handleInterruptedCoalesceLeaf() 

2713 

2714 def _scan(self, force): 

2715 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2716 error = False 

2717 self.lvmCache.refresh() 

2718 vdis = lvhdutil.getVDIInfo(self.lvmCache) 

2719 for uuid, vdiInfo in vdis.items(): 

2720 if vdiInfo.scanError: 

2721 error = True 

2722 break 

2723 if not error: 

2724 return vdis 

2725 Util.log("Scan error, retrying (%d)" % i) 

2726 if force: 

2727 return vdis 

2728 raise util.SMException("Scan error") 

2729 

2730 def _removeStaleVDIs(self, uuidsPresent): 

2731 for uuid in list(self.vdis.keys()): 

2732 if not uuid in uuidsPresent: 

2733 Util.log("VDI %s disappeared since last scan" % \ 

2734 self.vdis[uuid]) 

2735 del self.vdis[uuid] 

2736 if self.lvActivator.get(uuid, False): 

2737 self.lvActivator.remove(uuid, False) 

2738 

2739 def _liveLeafCoalesce(self, vdi): 

2740 """If the parent is raw and the child was resized (virt. size), then 

2741 we'll need to resize the parent, which can take a while due to zeroing 

2742 out of the extended portion of the LV. Do it before pausing the child 

2743 to avoid a protracted downtime""" 

2744 if vdi.parent.raw and vdi.sizeVirt > vdi.parent.sizeVirt: 

2745 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

2746 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

2747 

2748 return SR._liveLeafCoalesce(self, vdi) 

2749 

2750 def _prepareCoalesceLeaf(self, vdi): 

2751 vdi._activateChain() 

2752 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

2753 vdi.deflate() 

2754 vdi.inflateParentForCoalesce() 

2755 

2756 def _updateNode(self, vdi): 

2757 # fix the refcounts: the remaining node should inherit the binary 

2758 # refcount from the leaf (because if it was online, it should remain 

2759 # refcounted as such), but the normal refcount from the parent (because 

2760 # this node is really the parent node) - minus 1 if it is online (since 

2761 # non-leaf nodes increment their normal counts when they are online and 

2762 # we are now a leaf, storing that 1 in the binary refcount). 

2763 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

2764 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

2765 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

2766 pCnt = pCnt - cBcnt 

2767 assert(pCnt >= 0) 

2768 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

2769 

2770 def _finishCoalesceLeaf(self, parent): 

2771 if not parent.isSnapshot() or parent.isAttachedRW(): 

2772 parent.inflateFully() 

2773 else: 

2774 parent.deflate() 

2775 

2776 def _calcExtraSpaceNeeded(self, child, parent): 

2777 return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV 

2778 

2779 def _handleInterruptedCoalesceLeaf(self): 

2780 entries = self.journaler.getAll(VDI.JRN_LEAF) 

2781 for uuid, parentUuid in entries.items(): 

2782 childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid 

2783 tmpChildLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

2784 self.TMP_RENAME_PREFIX + uuid 

2785 parentLV1 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + parentUuid 

2786 parentLV2 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + parentUuid 

2787 parentPresent = (self.lvmCache.checkLV(parentLV1) or \ 

2788 self.lvmCache.checkLV(parentLV2)) 

2789 if parentPresent or self.lvmCache.checkLV(tmpChildLV): 

2790 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

2791 else: 

2792 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

2793 self.journaler.remove(VDI.JRN_LEAF, uuid) 

2794 vdi = self.getVDI(uuid) 

2795 if vdi: 

2796 vdi.ensureUnpaused() 

2797 

2798 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2799 Util.log("*** UNDO LEAF-COALESCE") 

2800 parent = self.getVDI(parentUuid) 

2801 if not parent: 

2802 parent = self.getVDI(childUuid) 

2803 if not parent: 

2804 raise util.SMException("Neither %s nor %s found" % \ 

2805 (parentUuid, childUuid)) 

2806 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

2807 parent.rename(parentUuid) 

2808 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

2809 

2810 child = self.getVDI(childUuid) 

2811 if not child: 

2812 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

2813 if not child: 

2814 raise util.SMException("Neither %s nor %s found" % \ 

2815 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

2816 Util.log("Renaming child back to %s" % childUuid) 

2817 child.rename(childUuid) 

2818 Util.log("Updating the VDI record") 

2819 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

2820 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

2821 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

2822 

2823 # refcount (best effort - assume that it had succeeded if the 

2824 # second rename succeeded; if not, this adjustment will be wrong, 

2825 # leading to a non-deactivation of the LV) 

2826 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

2827 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

2828 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

2829 pCnt = pCnt + cBcnt 

2830 RefCounter.set(parent.uuid, pCnt, 0, ns) 

2831 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

2832 

2833 parent.deflate() 

2834 child.inflateFully() 

2835 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

2836 if child.hidden: 

2837 child._setHidden(False) 

2838 if not parent.hidden: 

2839 parent._setHidden(True) 

2840 if not parent.lvReadonly: 

2841 self.lvmCache.setReadonly(parent.fileName, True) 

2842 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

2843 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

2844 Util.log("*** leaf-coalesce undo successful") 

2845 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

2846 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

2847 

2848 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2849 Util.log("*** FINISH LEAF-COALESCE") 

2850 vdi = self.getVDI(childUuid) 

2851 if not vdi: 

2852 raise util.SMException("VDI %s not found" % childUuid) 

2853 vdi.inflateFully() 

2854 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

2855 try: 

2856 self.forgetVDI(parentUuid) 

2857 except XenAPI.Failure: 

2858 pass 

2859 self._updateSlavesOnResize(vdi) 

2860 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

2861 Util.log("*** finished leaf-coalesce successfully") 

2862 

2863 def _checkSlaves(self, vdi): 

2864 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

2865 try to check all slaves, including those that the Agent believes are 

2866 offline, but ignore failures for offline hosts. This is to avoid cases 

2867 where the Agent thinks a host is offline but the host is up.""" 

2868 args = {"vgName": self.vgName, 

2869 "action1": "deactivateNoRefcount", 

2870 "lvName1": vdi.fileName, 

2871 "action2": "cleanupLockAndRefcount", 

2872 "uuid2": vdi.uuid, 

2873 "ns2": lvhdutil.NS_PREFIX_LVM + self.uuid} 

2874 onlineHosts = self.xapi.getOnlineHosts() 

2875 abortFlag = IPCFlag(self.uuid) 

2876 for pbdRecord in self.xapi.getAttachedPBDs(): 

2877 hostRef = pbdRecord["host"] 

2878 if hostRef == self.xapi._hostRef: 

2879 continue 

2880 if abortFlag.test(FLAG_TYPE_ABORT): 

2881 raise AbortException("Aborting due to signal") 

2882 Util.log("Checking with slave %s (path %s)" % ( 

2883 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

2884 try: 

2885 self.xapi.ensureInactive(hostRef, args) 

2886 except XenAPI.Failure: 

2887 if hostRef in onlineHosts: 

2888 raise 

2889 

2890 def _updateSlavesOnUndoLeafCoalesce(self, parent, child): 

2891 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

2892 if not slaves: 

2893 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

2894 child) 

2895 return 

2896 

2897 tmpName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

2898 self.TMP_RENAME_PREFIX + child.uuid 

2899 args = {"vgName": self.vgName, 

2900 "action1": "deactivateNoRefcount", 

2901 "lvName1": tmpName, 

2902 "action2": "deactivateNoRefcount", 

2903 "lvName2": child.fileName, 

2904 "action3": "refresh", 

2905 "lvName3": child.fileName, 

2906 "action4": "refresh", 

2907 "lvName4": parent.fileName} 

2908 for slave in slaves: 

2909 Util.log("Updating %s, %s, %s on slave %s" % \ 

2910 (tmpName, child.fileName, parent.fileName, 

2911 self.xapi.getRecordHost(slave)['hostname'])) 

2912 text = self.xapi.session.xenapi.host.call_plugin( \ 

2913 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

2914 Util.log("call-plugin returned: '%s'" % text) 

2915 

2916 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid): 

2917 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

2918 if not slaves: 

2919 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

2920 return 

2921 

2922 args = {"vgName": self.vgName, 

2923 "action1": "deactivateNoRefcount", 

2924 "lvName1": oldNameLV, 

2925 "action2": "refresh", 

2926 "lvName2": vdi.fileName, 

2927 "action3": "cleanupLockAndRefcount", 

2928 "uuid3": origParentUuid, 

2929 "ns3": lvhdutil.NS_PREFIX_LVM + self.uuid} 

2930 for slave in slaves: 

2931 Util.log("Updating %s to %s on slave %s" % \ 

2932 (oldNameLV, vdi.fileName, 

2933 self.xapi.getRecordHost(slave)['hostname'])) 

2934 text = self.xapi.session.xenapi.host.call_plugin( \ 

2935 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

2936 Util.log("call-plugin returned: '%s'" % text) 

2937 

2938 def _updateSlavesOnResize(self, vdi): 

2939 uuids = [x.uuid for x in vdi.getAllLeaves()] 

2940 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

2941 if not slaves: 

2942 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

2943 return 

2944 lvhdutil.lvRefreshOnSlaves(self.xapi.session, self.uuid, self.vgName, 

2945 vdi.fileName, vdi.uuid, slaves) 

2946 

2947 

2948class LinstorSR(SR): 

2949 TYPE = SR.TYPE_LINSTOR 

2950 

2951 def __init__(self, uuid, xapi, createLock, force): 

2952 if not LINSTOR_AVAILABLE: 

2953 raise util.SMException( 

2954 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

2955 ) 

2956 

2957 SR.__init__(self, uuid, xapi, createLock, force) 

2958 self._master_uri = 'linstor://localhost' 

2959 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

2960 self._reloadLinstor() 

2961 

2962 def deleteVDI(self, vdi): 

2963 self._checkSlaves(vdi) 

2964 SR.deleteVDI(self, vdi) 

2965 

2966 def getFreeSpace(self): 

2967 return self._linstor.max_volume_size_allowed 

2968 

2969 def scan(self, force=False): 

2970 all_vdi_info = self._scan(force) 

2971 for uuid, vdiInfo in all_vdi_info.items(): 

2972 # When vdiInfo is None, the VDI is RAW. 

2973 vdi = self.getVDI(uuid) 

2974 if not vdi: 

2975 self.logFilter.logNewVDI(uuid) 

2976 vdi = LinstorVDI(self, uuid, not vdiInfo) 

2977 self.vdis[uuid] = vdi 

2978 if vdiInfo: 

2979 vdi.load(vdiInfo) 

2980 self._removeStaleVDIs(all_vdi_info.keys()) 

2981 self._buildTree(force) 

2982 self.logFilter.logState() 

2983 self._handleInterruptedCoalesceLeaf() 

2984 

2985 def _reloadLinstor(self): 

2986 session = self.xapi.session 

2987 host_ref = util.get_this_host_ref(session) 

2988 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

2989 

2990 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

2991 if pbd is None: 

2992 raise util.SMException('Failed to find PBD') 

2993 

2994 dconf = session.xenapi.PBD.get_device_config(pbd) 

2995 group_name = dconf['group-name'] 

2996 

2997 self.journaler = LinstorJournaler( 

2998 self._master_uri, group_name, logger=util.SMlog 

2999 ) 

3000 

3001 self._linstor = LinstorVolumeManager( 

3002 self._master_uri, 

3003 group_name, 

3004 repair=True, 

3005 logger=util.SMlog 

3006 ) 

3007 self._vhdutil = LinstorVhdUtil(session, self._linstor) 

3008 

3009 def _scan(self, force): 

3010 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3011 self._reloadLinstor() 

3012 error = False 

3013 try: 

3014 all_vdi_info = self._load_vdi_info() 

3015 for uuid, vdiInfo in all_vdi_info.items(): 

3016 if vdiInfo and vdiInfo.error: 

3017 error = True 

3018 break 

3019 if not error: 

3020 return all_vdi_info 

3021 Util.log('Scan error, retrying ({})'.format(i)) 

3022 except Exception as e: 

3023 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3024 Util.log(traceback.format_exc()) 

3025 

3026 if force: 

3027 return all_vdi_info 

3028 raise util.SMException('Scan error') 

3029 

3030 def _load_vdi_info(self): 

3031 all_vdi_info = {} 

3032 

3033 # TODO: Ensure metadata contains the right info. 

3034 

3035 all_volume_info = self._linstor.volumes_with_info 

3036 volumes_metadata = self._linstor.volumes_with_metadata 

3037 for vdi_uuid, volume_info in all_volume_info.items(): 

3038 try: 

3039 if not volume_info.name and \ 

3040 not list(volumes_metadata[vdi_uuid].items()): 

3041 continue # Ignore it, probably deleted. 

3042 

3043 vdi_type = volumes_metadata[vdi_uuid][VDI_TYPE_TAG] 

3044 if vdi_type == vhdutil.VDI_TYPE_VHD: 

3045 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3046 else: 

3047 info = None 

3048 except Exception as e: 

3049 Util.log( 

3050 ' [VDI {}: failed to load VDI info]: {}' 

3051 .format(self.uuid, e) 

3052 ) 

3053 info = vhdutil.VHDInfo(vdi_uuid) 

3054 info.error = 1 

3055 all_vdi_info[vdi_uuid] = info 

3056 return all_vdi_info 

3057 

3058 # TODO: Maybe implement _liveLeafCoalesce/_prepareCoalesceLeaf/ 

3059 # _finishCoalesceLeaf/_updateSlavesOnResize like LVM plugin. 

3060 

3061 def _calcExtraSpaceNeeded(self, child, parent): 

3062 meta_overhead = vhdutil.calcOverheadEmpty(LinstorVDI.MAX_SIZE) 

3063 bitmap_overhead = vhdutil.calcOverheadBitmap(parent.sizeVirt) 

3064 virtual_size = LinstorVolumeManager.round_up_volume_size( 

3065 parent.sizeVirt + meta_overhead + bitmap_overhead 

3066 ) 

3067 # TODO: Check result. 

3068 return virtual_size - self._linstor.get_volume_size(parent.uuid) 

3069 

3070 def _hasValidDevicePath(self, uuid): 

3071 try: 

3072 self._linstor.get_device_path(uuid) 

3073 except Exception: 

3074 # TODO: Maybe log exception. 

3075 return False 

3076 return True 

3077 

3078 def _handleInterruptedCoalesceLeaf(self): 

3079 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3080 for uuid, parentUuid in entries.items(): 

3081 if self._hasValidDevicePath(parentUuid) or \ 

3082 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3083 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3084 else: 

3085 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3086 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3087 vdi = self.getVDI(uuid) 

3088 if vdi: 

3089 vdi.ensureUnpaused() 

3090 

3091 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3092 Util.log('*** UNDO LEAF-COALESCE') 

3093 parent = self.getVDI(parentUuid) 

3094 if not parent: 

3095 parent = self.getVDI(childUuid) 

3096 if not parent: 

3097 raise util.SMException( 

3098 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3099 ) 

3100 Util.log( 

3101 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3102 ) 

3103 parent.rename(parentUuid) 

3104 util.fistpoint.activate('LVHDRT_coaleaf_undo_after_rename', self.uuid) 

3105 

3106 child = self.getVDI(childUuid) 

3107 if not child: 

3108 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3109 if not child: 

3110 raise util.SMException( 

3111 'Neither {} nor {} found'.format( 

3112 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3113 ) 

3114 ) 

3115 Util.log('Renaming child back to {}'.format(childUuid)) 

3116 child.rename(childUuid) 

3117 Util.log('Updating the VDI record') 

3118 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3119 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3120 util.fistpoint.activate( 

3121 'LVHDRT_coaleaf_undo_after_rename2', self.uuid 

3122 ) 

3123 

3124 # TODO: Maybe deflate here. 

3125 

3126 if child.hidden: 

3127 child._setHidden(False) 

3128 if not parent.hidden: 

3129 parent._setHidden(True) 

3130 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3131 util.fistpoint.activate('LVHDRT_coaleaf_undo_end', self.uuid) 

3132 Util.log('*** leaf-coalesce undo successful') 

3133 if util.fistpoint.is_active('LVHDRT_coaleaf_stop_after_recovery'): 

3134 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3135 

3136 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3137 Util.log('*** FINISH LEAF-COALESCE') 

3138 vdi = self.getVDI(childUuid) 

3139 if not vdi: 

3140 raise util.SMException('VDI {} not found'.format(childUuid)) 

3141 # TODO: Maybe inflate. 

3142 try: 

3143 self.forgetVDI(parentUuid) 

3144 except XenAPI.Failure: 

3145 pass 

3146 self._updateSlavesOnResize(vdi) 

3147 util.fistpoint.activate('LVHDRT_coaleaf_finish_end', self.uuid) 

3148 Util.log('*** finished leaf-coalesce successfully') 

3149 

3150 def _checkSlaves(self, vdi): 

3151 try: 

3152 states = self._linstor.get_usage_states(vdi.uuid) 

3153 for node_name, state in states.items(): 

3154 self._checkSlave(node_name, vdi, state) 

3155 except LinstorVolumeManagerError as e: 

3156 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3157 raise 

3158 

3159 @staticmethod 

3160 def _checkSlave(node_name, vdi, state): 

3161 # If state is None, LINSTOR doesn't know the host state 

3162 # (bad connection?). 

3163 if state is None: 

3164 raise util.SMException( 

3165 'Unknown state for VDI {} on {}'.format(vdi.uuid, node_name) 

3166 ) 

3167 

3168 if state: 

3169 raise util.SMException( 

3170 'VDI {} is in use on {}'.format(vdi.uuid, node_name) 

3171 ) 

3172 

3173 

3174################################################################################ 

3175# 

3176# Helpers 

3177# 

3178def daemonize(): 

3179 pid = os.fork() 

3180 if pid: 3180 ↛ 3184line 3180 didn't jump to line 3184, because the condition on line 3180 was never false

3181 os.waitpid(pid, 0) 

3182 Util.log("New PID [%d]" % pid) 

3183 return False 

3184 os.chdir("/") 

3185 os.setsid() 

3186 pid = os.fork() 

3187 if pid: 

3188 Util.log("Will finish as PID [%d]" % pid) 

3189 os._exit(0) 

3190 for fd in [0, 1, 2]: 

3191 try: 

3192 os.close(fd) 

3193 except OSError: 

3194 pass 

3195 # we need to fill those special fd numbers or pread won't work 

3196 sys.stdin = open("/dev/null", 'r') 

3197 sys.stderr = open("/dev/null", 'w') 

3198 sys.stdout = open("/dev/null", 'w') 

3199 # As we're a new process we need to clear the lock objects 

3200 lock.Lock.clearAll() 

3201 return True 

3202 

3203 

3204def normalizeType(type): 

3205 if type in LVHDSR.SUBTYPES: 

3206 type = SR.TYPE_LVHD 

3207 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3208 # temporary while LVHD is symlinked as LVM 

3209 type = SR.TYPE_LVHD 

3210 if type in [ 

3211 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3212 "moosefs", "xfs", "zfs", "ext4" 

3213 ]: 

3214 type = SR.TYPE_FILE 

3215 if type in ["linstor"]: 

3216 type = SR.TYPE_LINSTOR 

3217 if type not in SR.TYPES: 

3218 raise util.SMException("Unsupported SR type: %s" % type) 

3219 return type 

3220 

3221GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3222 

3223 

3224def _gc_init_file(sr_uuid): 

3225 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3226 

3227 

3228def _create_init_file(sr_uuid): 

3229 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3230 with open(os.path.join( 

3231 NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init'), 'w+') as f: 

3232 f.write('1') 

3233 

3234 

3235def _gcLoopPause(sr, dryRun=False, immediate=False): 

3236 if immediate: 

3237 return 

3238 

3239 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3240 # point will just return. Otherwise, fall back on an abortable sleep. 

3241 

3242 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3243 

3244 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3244 ↛ exitline 3244 didn't jump to the function exit

3245 lambda *args: None) 

3246 elif os.path.exists(_gc_init_file(sr.uuid)): 

3247 def abortTest(): 

3248 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3249 

3250 # If time.sleep hangs we are in deep trouble, however for 

3251 # completeness we set the timeout of the abort thread to 

3252 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3253 Util.log("GC active, about to go quiet") 

3254 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3254 ↛ exitline 3254 didn't run the lambda on line 3254

3255 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3256 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3257 Util.log("GC active, quiet period ended") 

3258 

3259 

3260def _gcLoop(sr, dryRun=False, immediate=False): 

3261 if not lockActive.acquireNoblock(): 3261 ↛ 3262line 3261 didn't jump to line 3262, because the condition on line 3261 was never true

3262 Util.log("Another GC instance already active, exiting") 

3263 return 

3264 # Track how many we do 

3265 coalesced = 0 

3266 task_status = "success" 

3267 try: 

3268 # Check if any work needs to be done 

3269 sr.scanLocked() 

3270 if not sr.hasWork(): 

3271 Util.log("No work, exiting") 

3272 return 

3273 sr.xapi.create_task( 

3274 "Garbage Collection", 

3275 "Garbage collection for SR %s" % sr.uuid) 

3276 _gcLoopPause(sr, dryRun, immediate=immediate) 

3277 while True: 

3278 if not sr.xapi.isPluggedHere(): 3278 ↛ 3279line 3278 didn't jump to line 3279, because the condition on line 3278 was never true

3279 Util.log("SR no longer attached, exiting") 

3280 break 

3281 sr.scanLocked() 

3282 if not sr.hasWork(): 

3283 Util.log("No work, exiting") 

3284 break 

3285 

3286 if not lockRunning.acquireNoblock(): 3286 ↛ 3287line 3286 didn't jump to line 3287, because the condition on line 3286 was never true

3287 Util.log("Unable to acquire GC running lock.") 

3288 return 

3289 try: 

3290 if not sr.gcEnabled(): 3290 ↛ 3291line 3290 didn't jump to line 3291, because the condition on line 3290 was never true

3291 break 

3292 

3293 sr.xapi.update_task_progress("done", coalesced) 

3294 

3295 sr.cleanupCoalesceJournals() 

3296 # Create the init file here in case startup is waiting on it 

3297 _create_init_file(sr.uuid) 

3298 sr.scanLocked() 

3299 sr.updateBlockInfo() 

3300 

3301 howmany = len(sr.findGarbage()) 

3302 if howmany > 0: 

3303 Util.log("Found %d orphaned vdis" % howmany) 

3304 sr.lock() 

3305 try: 

3306 sr.garbageCollect(dryRun) 

3307 finally: 

3308 sr.unlock() 

3309 sr.xapi.srUpdate() 

3310 

3311 candidate = sr.findCoalesceable() 

3312 if candidate: 

3313 util.fistpoint.activate( 

3314 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

3315 sr.coalesce(candidate, dryRun) 

3316 sr.xapi.srUpdate() 

3317 coalesced += 1 

3318 continue 

3319 

3320 candidate = sr.findLeafCoalesceable() 

3321 if candidate: 3321 ↛ 3328line 3321 didn't jump to line 3328, because the condition on line 3321 was never false

3322 sr.coalesceLeaf(candidate, dryRun) 

3323 sr.xapi.srUpdate() 

3324 coalesced += 1 

3325 continue 

3326 

3327 finally: 

3328 lockRunning.release() 3328 ↛ 3333line 3328 didn't jump to line 3333, because the break on line 3291 wasn't executed

3329 except: 

3330 task_status = "failure" 

3331 raise 

3332 finally: 

3333 sr.xapi.set_task_status(task_status) 

3334 Util.log("GC process exiting, no work left") 

3335 _create_init_file(sr.uuid) 

3336 lockActive.release() 

3337 

3338 

3339def _xapi_enabled(session, hostref): 

3340 host = session.xenapi.host.get_record(hostref) 

3341 return host['enabled'] 

3342 

3343 

3344def _ensure_xapi_initialised(session): 

3345 """ 

3346 Don't want to start GC until Xapi is fully initialised 

3347 """ 

3348 local_session = None 

3349 if session is None: 

3350 local_session = util.get_localAPI_session() 

3351 session = local_session 

3352 

3353 try: 

3354 hostref = session.xenapi.host.get_by_uuid(util.get_this_host()) 

3355 while not _xapi_enabled(session, hostref): 

3356 util.SMlog("Xapi not ready, GC waiting") 

3357 time.sleep(15) 

3358 finally: 

3359 if local_session is not None: 

3360 local_session.logout() 

3361 

3362def _gc(session, srUuid, dryRun=False, immediate=False): 

3363 init(srUuid) 

3364 _ensure_xapi_initialised(session) 

3365 sr = SR.getInstance(srUuid, session) 

3366 if not sr.gcEnabled(False): 3366 ↛ 3367line 3366 didn't jump to line 3367, because the condition on line 3366 was never true

3367 return 

3368 

3369 sr.cleanupCache() 

3370 try: 

3371 _gcLoop(sr, dryRun, immediate=immediate) 

3372 finally: 

3373 sr.cleanup() 

3374 sr.logFilter.logState() 

3375 del sr.xapi 

3376 

3377 

3378def _abort(srUuid, soft=False): 

3379 """Aborts an GC/coalesce. 

3380 

3381 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

3382 soft: If set to True and there is a pending abort signal, the function 

3383 doesn't do anything. If set to False, a new abort signal is issued. 

3384 

3385 returns: If soft is set to False, we return True holding lockActive. If 

3386 soft is set to False and an abort signal is pending, we return False 

3387 without holding lockActive. An exception is raised in case of error.""" 

3388 Util.log("=== SR %s: abort ===" % (srUuid)) 

3389 init(srUuid) 

3390 if not lockActive.acquireNoblock(): 

3391 gotLock = False 

3392 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

3393 abortFlag = IPCFlag(srUuid) 

3394 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

3395 return False 

3396 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

3397 gotLock = lockActive.acquireNoblock() 

3398 if gotLock: 

3399 break 

3400 time.sleep(SR.LOCK_RETRY_INTERVAL) 

3401 abortFlag.clear(FLAG_TYPE_ABORT) 

3402 if not gotLock: 

3403 raise util.CommandException(code=errno.ETIMEDOUT, 

3404 reason="SR %s: error aborting existing process" % srUuid) 

3405 return True 

3406 

3407 

3408def init(srUuid): 

3409 global lockRunning 

3410 if not lockRunning: 3410 ↛ 3411line 3410 didn't jump to line 3411, because the condition on line 3410 was never true

3411 lockRunning = lock.Lock(LOCK_TYPE_RUNNING, srUuid) 

3412 global lockActive 

3413 if not lockActive: 3413 ↛ 3414line 3413 didn't jump to line 3414, because the condition on line 3413 was never true

3414 lockActive = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

3415 

3416 

3417def usage(): 

3418 output = """Garbage collect and/or coalesce VHDs in a VHD-based SR 

3419 

3420Parameters: 

3421 -u --uuid UUID SR UUID 

3422 and one of: 

3423 -g --gc garbage collect, coalesce, and repeat while there is work 

3424 -G --gc_force garbage collect once, aborting any current operations 

3425 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

3426 max_age hours 

3427 -a --abort abort any currently running operation (GC or coalesce) 

3428 -q --query query the current state (GC'ing, coalescing or not running) 

3429 -x --disable disable GC/coalesce (will be in effect until you exit) 

3430 -t --debug see Debug below 

3431 

3432Options: 

3433 -b --background run in background (return immediately) (valid for -g only) 

3434 -f --force continue in the presence of VHDs with errors (when doing 

3435 GC, this might cause removal of any such VHDs) (only valid 

3436 for -G) (DANGEROUS) 

3437 

3438Debug: 

3439 The --debug parameter enables manipulation of LVHD VDIs for debugging 

3440 purposes. ** NEVER USE IT ON A LIVE VM ** 

3441 The following parameters are required: 

3442 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

3443 "deflate". 

3444 -v --vdi_uuid VDI UUID 

3445 """ 

3446 #-d --dry-run don't actually perform any SR-modifying operations 

3447 print(output) 

3448 Util.log("(Invalid usage)") 

3449 sys.exit(1) 

3450 

3451 

3452############################################################################## 

3453# 

3454# API 

3455# 

3456def abort(srUuid, soft=False): 

3457 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

3458 """ 

3459 if _abort(srUuid, soft): 

3460 Util.log("abort: releasing the process lock") 

3461 lockActive.release() 

3462 return True 

3463 else: 

3464 return False 

3465 

3466 

3467def gc(session, srUuid, inBackground, dryRun=False): 

3468 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return  

3469 immediately if inBackground=True.  

3470  

3471 The following algorithm is used: 

3472 1. If we are already GC'ing in this SR, return 

3473 2. If we are already coalescing a VDI pair: 

3474 a. Scan the SR and determine if the VDI pair is GC'able 

3475 b. If the pair is not GC'able, return 

3476 c. If the pair is GC'able, abort coalesce 

3477 3. Scan the SR 

3478 4. If there is nothing to collect, nor to coalesce, return 

3479 5. If there is something to collect, GC all, then goto 3 

3480 6. If there is something to coalesce, coalesce one pair, then goto 3 

3481 """ 

3482 Util.log("=== SR %s: gc ===" % srUuid) 

3483 if inBackground: 

3484 if daemonize(): 

3485 # we are now running in the background. Catch & log any errors 

3486 # because there is no other way to propagate them back at this 

3487 # point 

3488 

3489 try: 

3490 _gc(None, srUuid, dryRun) 

3491 except AbortException: 

3492 Util.log("Aborted") 

3493 except Exception: 

3494 Util.logException("gc") 

3495 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

3496 os._exit(0) 

3497 else: 

3498 _gc(session, srUuid, dryRun, immediate=True) 

3499 

3500 

3501def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

3502 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

3503 the SR lock is held. 

3504 The following algorithm is used: 

3505 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

3506 2. Scan the SR 

3507 3. GC 

3508 4. return 

3509 """ 

3510 Util.log("=== SR %s: gc_force ===" % srUuid) 

3511 init(srUuid) 

3512 sr = SR.getInstance(srUuid, session, lockSR, True) 

3513 if not lockActive.acquireNoblock(): 

3514 abort(srUuid) 

3515 else: 

3516 Util.log("Nothing was running, clear to proceed") 

3517 

3518 if force: 

3519 Util.log("FORCED: will continue even if there are VHD errors") 

3520 sr.scanLocked(force) 

3521 sr.cleanupCoalesceJournals() 

3522 

3523 try: 

3524 sr.cleanupCache() 

3525 sr.garbageCollect(dryRun) 

3526 finally: 

3527 sr.cleanup() 

3528 sr.logFilter.logState() 

3529 lockActive.release() 

3530 

3531 

3532def get_state(srUuid): 

3533 """Return whether GC/coalesce is currently running or not. The information 

3534 is not guaranteed for any length of time if the call is not protected by 

3535 locking. 

3536 """ 

3537 init(srUuid) 

3538 if lockActive.acquireNoblock(): 

3539 lockActive.release() 

3540 return False 

3541 return True 

3542 

3543 

3544def should_preempt(session, srUuid): 

3545 sr = SR.getInstance(srUuid, session) 

3546 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

3547 if len(entries) == 0: 

3548 return False 

3549 elif len(entries) > 1: 

3550 raise util.SMException("More than one coalesce entry: " + str(entries)) 

3551 sr.scanLocked() 

3552 coalescedUuid = entries.popitem()[0] 

3553 garbage = sr.findGarbage() 

3554 for vdi in garbage: 

3555 if vdi.uuid == coalescedUuid: 

3556 return True 

3557 return False 

3558 

3559 

3560def get_coalesceable_leaves(session, srUuid, vdiUuids): 

3561 coalesceable = [] 

3562 sr = SR.getInstance(srUuid, session) 

3563 sr.scanLocked() 

3564 for uuid in vdiUuids: 

3565 vdi = sr.getVDI(uuid) 

3566 if not vdi: 

3567 raise util.SMException("VDI %s not found" % uuid) 

3568 if vdi.isLeafCoalesceable(): 

3569 coalesceable.append(uuid) 

3570 return coalesceable 

3571 

3572 

3573def cache_cleanup(session, srUuid, maxAge): 

3574 sr = SR.getInstance(srUuid, session) 

3575 return sr.cleanupCache(maxAge) 

3576 

3577 

3578def debug(sr_uuid, cmd, vdi_uuid): 

3579 Util.log("Debug command: %s" % cmd) 

3580 sr = SR.getInstance(sr_uuid, None) 

3581 if not isinstance(sr, LVHDSR): 

3582 print("Error: not an LVHD SR") 

3583 return 

3584 sr.scanLocked() 

3585 vdi = sr.getVDI(vdi_uuid) 

3586 if not vdi: 

3587 print("Error: VDI %s not found") 

3588 return 

3589 print("Running %s on SR %s" % (cmd, sr)) 

3590 print("VDI before: %s" % vdi) 

3591 if cmd == "activate": 

3592 vdi._activate() 

3593 print("VDI file: %s" % vdi.path) 

3594 if cmd == "deactivate": 

3595 ns = lvhdutil.NS_PREFIX_LVM + sr.uuid 

3596 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

3597 if cmd == "inflate": 

3598 vdi.inflateFully() 

3599 sr.cleanup() 

3600 if cmd == "deflate": 

3601 vdi.deflate() 

3602 sr.cleanup() 

3603 sr.scanLocked() 

3604 print("VDI after: %s" % vdi) 

3605 

3606 

3607def abort_optional_reenable(uuid): 

3608 print("Disabling GC/coalesce for %s" % uuid) 

3609 ret = _abort(uuid) 

3610 input("Press enter to re-enable...") 

3611 print("GC/coalesce re-enabled") 

3612 lockRunning.release() 

3613 if ret: 

3614 lockActive.release() 

3615 

3616 

3617############################################################################## 

3618# 

3619# CLI 

3620# 

3621def main(): 

3622 action = "" 

3623 uuid = "" 

3624 background = False 

3625 force = False 

3626 dryRun = False 

3627 debug_cmd = "" 

3628 vdi_uuid = "" 

3629 shortArgs = "gGc:aqxu:bfdt:v:" 

3630 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

3631 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

3632 

3633 try: 

3634 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

3635 except getopt.GetoptError: 

3636 usage() 

3637 for o, a in opts: 

3638 if o in ("-g", "--gc"): 

3639 action = "gc" 

3640 if o in ("-G", "--gc_force"): 

3641 action = "gc_force" 

3642 if o in ("-c", "--clean_cache"): 

3643 action = "clean_cache" 

3644 maxAge = int(a) 

3645 if o in ("-a", "--abort"): 

3646 action = "abort" 

3647 if o in ("-q", "--query"): 

3648 action = "query" 

3649 if o in ("-x", "--disable"): 

3650 action = "disable" 

3651 if o in ("-u", "--uuid"): 

3652 uuid = a 

3653 if o in ("-b", "--background"): 

3654 background = True 

3655 if o in ("-f", "--force"): 

3656 force = True 

3657 if o in ("-d", "--dry-run"): 

3658 Util.log("Dry run mode") 

3659 dryRun = True 

3660 if o in ("-t", "--debug"): 

3661 action = "debug" 

3662 debug_cmd = a 

3663 if o in ("-v", "--vdi_uuid"): 

3664 vdi_uuid = a 

3665 

3666 if not action or not uuid: 

3667 usage() 

3668 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

3669 action != "debug" and (debug_cmd or vdi_uuid): 

3670 usage() 

3671 

3672 if action != "query" and action != "debug": 

3673 print("All output goes to log") 

3674 

3675 if action == "gc": 

3676 gc(None, uuid, background, dryRun) 

3677 elif action == "gc_force": 

3678 gc_force(None, uuid, force, dryRun, True) 

3679 elif action == "clean_cache": 

3680 cache_cleanup(None, uuid, maxAge) 

3681 elif action == "abort": 

3682 abort(uuid) 

3683 elif action == "query": 

3684 print("Currently running: %s" % get_state(uuid)) 

3685 elif action == "disable": 

3686 abort_optional_reenable(uuid) 

3687 elif action == "debug": 

3688 debug(uuid, debug_cmd, vdi_uuid) 

3689 

3690 

3691if __name__ == '__main__': 3691 ↛ 3692line 3691 didn't jump to line 3692, because the condition on line 3691 was never true

3692 main()