Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect VHD-based SR's in the background 

19# 

20 

21import os 

22import os.path 

23import sys 

24import time 

25import signal 

26import subprocess 

27import getopt 

28import datetime 

29import traceback 

30import base64 

31import zlib 

32import errno 

33import stat 

34 

35import XenAPI # pylint: disable=import-error 

36import util 

37import lvutil 

38import vhdutil 

39import lvhdutil 

40import lvmcache 

41import journaler 

42import fjournaler 

43import lock 

44import blktap2 

45import xs_errors 

46from refcounter import RefCounter 

47from ipc import IPCFlag 

48from lvmanager import LVActivator 

49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

50from functools import reduce 

51from time import monotonic as _time 

52 

53try: 

54 from linstorjournaler import LinstorJournaler 

55 from linstorvhdutil import LinstorVhdUtil 

56 from linstorvolumemanager import get_controller_uri 

57 from linstorvolumemanager import LinstorVolumeManager 

58 from linstorvolumemanager import LinstorVolumeManagerError 

59 

60 LINSTOR_AVAILABLE = True 

61except ImportError: 

62 LINSTOR_AVAILABLE = False 

63 

64# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

65# possible due to lvhd_stop_using_() not working correctly. However, we leave 

66# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

67# record for use by the offline tool (which makes the operation safe by pausing 

68# the VM first) 

69AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

70 

71FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

72 

73# process "lock", used simply as an indicator that a process already exists 

74# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

75# check for the fact by trying the lock). 

76LOCK_TYPE_RUNNING = "running" 

77lockRunning = None 

78 

79# process "lock" to indicate that the GC process has been activated but may not 

80# yet be running, stops a second process from being started. 

81LOCK_TYPE_GC_ACTIVE = "gc_active" 

82lockActive = None 

83 

84# Default coalesce error rate limit, in messages per minute. A zero value 

85# disables throttling, and a negative value disables error reporting. 

86DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

87 

88COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

89COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

90VAR_RUN = "/var/run/" 

91SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

92 

93N_RUNNING_AVERAGE = 10 

94 

95NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

96 

97 

98class AbortException(util.SMException): 

99 pass 

100 

101 

102################################################################################ 

103# 

104# Util 

105# 

106class Util: 

107 RET_RC = 1 

108 RET_STDOUT = 2 

109 RET_STDERR = 4 

110 

111 UUID_LEN = 36 

112 

113 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

114 

115 def log(text): 

116 util.SMlog(text, ident="SMGC") 

117 log = staticmethod(log) 

118 

119 def logException(tag): 

120 info = sys.exc_info() 

121 if info[0] == SystemExit: 121 ↛ 123line 121 didn't jump to line 123, because the condition on line 121 was never true

122 # this should not be happening when catching "Exception", but it is 

123 sys.exit(0) 

124 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

125 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

126 Util.log(" ***********************") 

127 Util.log(" * E X C E P T I O N *") 

128 Util.log(" ***********************") 

129 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

130 Util.log(tb) 

131 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

132 logException = staticmethod(logException) 

133 

134 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

135 "Execute a subprocess, then return its return code, stdout, stderr" 

136 proc = subprocess.Popen(args, 

137 stdin=subprocess.PIPE, \ 

138 stdout=subprocess.PIPE, \ 

139 stderr=subprocess.PIPE, \ 

140 shell=True, \ 

141 close_fds=True) 

142 (stdout, stderr) = proc.communicate(inputtext) 

143 stdout = str(stdout) 

144 stderr = str(stderr) 

145 rc = proc.returncode 

146 if log: 

147 Util.log("`%s`: %s" % (args, rc)) 

148 if type(expectedRC) != type([]): 

149 expectedRC = [expectedRC] 

150 if not rc in expectedRC: 

151 reason = stderr.strip() 

152 if stdout.strip(): 

153 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

154 Util.log("Failed: %s" % reason) 

155 raise util.CommandException(rc, args, reason) 

156 

157 if ret == Util.RET_RC: 

158 return rc 

159 if ret == Util.RET_STDERR: 

160 return stderr 

161 return stdout 

162 doexec = staticmethod(doexec) 

163 

164 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): 

165 """execute func in a separate thread and kill it if abortTest signals 

166 so""" 

167 abortSignaled = abortTest() # check now before we clear resultFlag 

168 resultFlag = IPCFlag(ns) 

169 resultFlag.clearAll() 

170 pid = os.fork() 

171 if pid: 

172 startTime = _time() 

173 try: 

174 while True: 

175 if resultFlag.test("success"): 

176 Util.log(" Child process completed successfully") 

177 resultFlag.clear("success") 

178 return 

179 if resultFlag.test("failure"): 

180 resultFlag.clear("failure") 

181 raise util.SMException("Child process exited with error") 

182 if abortTest() or abortSignaled: 

183 os.killpg(pid, signal.SIGKILL) 

184 raise AbortException("Aborting due to signal") 

185 if timeOut and _time() - startTime > timeOut: 

186 os.killpg(pid, signal.SIGKILL) 

187 resultFlag.clearAll() 

188 raise util.SMException("Timed out") 

189 time.sleep(pollInterval) 

190 finally: 

191 wait_pid = 0 

192 rc = -1 

193 count = 0 

194 while wait_pid == 0 and count < 10: 

195 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

196 if wait_pid == 0: 

197 time.sleep(2) 

198 count += 1 

199 

200 if wait_pid == 0: 

201 Util.log("runAbortable: wait for process completion timed out") 

202 else: 

203 os.setpgrp() 

204 try: 

205 if func() == ret: 

206 resultFlag.set("success") 

207 else: 

208 resultFlag.set("failure") 

209 except Exception as e: 

210 Util.log("Child process failed with : (%s)" % e) 

211 resultFlag.set("failure") 

212 Util.logException("This exception has occured") 

213 os._exit(0) 

214 runAbortable = staticmethod(runAbortable) 

215 

216 def num2str(number): 

217 for prefix in ("G", "M", "K"): 

218 if number >= Util.PREFIX[prefix]: 

219 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

220 return "%s" % number 

221 num2str = staticmethod(num2str) 

222 

223 def numBits(val): 

224 count = 0 

225 while val: 

226 count += val & 1 

227 val = val >> 1 

228 return count 

229 numBits = staticmethod(numBits) 

230 

231 def countBits(bitmap1, bitmap2): 

232 """return bit count in the bitmap produced by ORing the two bitmaps""" 

233 len1 = len(bitmap1) 

234 len2 = len(bitmap2) 

235 lenLong = len1 

236 lenShort = len2 

237 bitmapLong = bitmap1 

238 if len2 > len1: 

239 lenLong = len2 

240 lenShort = len1 

241 bitmapLong = bitmap2 

242 

243 count = 0 

244 for i in range(lenShort): 

245 val = bitmap1[i] | bitmap2[i] 

246 count += Util.numBits(val) 

247 

248 for i in range(i + 1, lenLong): 

249 val = bitmapLong[i] 

250 count += Util.numBits(val) 

251 return count 

252 countBits = staticmethod(countBits) 

253 

254 def getThisScript(): 

255 thisScript = util.get_real_path(__file__) 

256 if thisScript.endswith(".pyc"): 

257 thisScript = thisScript[:-1] 

258 return thisScript 

259 getThisScript = staticmethod(getThisScript) 

260 

261 

262################################################################################ 

263# 

264# XAPI 

265# 

266class XAPI: 

267 USER = "root" 

268 PLUGIN_ON_SLAVE = "on-slave" 

269 

270 CONFIG_SM = 0 

271 CONFIG_OTHER = 1 

272 CONFIG_ON_BOOT = 2 

273 CONFIG_ALLOW_CACHING = 3 

274 

275 CONFIG_NAME = { 

276 CONFIG_SM: "sm-config", 

277 CONFIG_OTHER: "other-config", 

278 CONFIG_ON_BOOT: "on-boot", 

279 CONFIG_ALLOW_CACHING: "allow_caching" 

280 } 

281 

282 class LookupError(util.SMException): 

283 pass 

284 

285 def getSession(): 

286 session = XenAPI.xapi_local() 

287 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

288 return session 

289 getSession = staticmethod(getSession) 

290 

291 def __init__(self, session, srUuid): 

292 self.sessionPrivate = False 

293 self.session = session 

294 if self.session is None: 

295 self.session = self.getSession() 

296 self.sessionPrivate = True 

297 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

298 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

299 self.hostUuid = util.get_this_host() 

300 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

301 self.task = None 

302 self.task_progress = {"coalescable": 0, "done": 0} 

303 

304 def __del__(self): 

305 if self.sessionPrivate: 

306 self.session.xenapi.session.logout() 

307 

308 def isPluggedHere(self): 

309 pbds = self.getAttachedPBDs() 

310 for pbdRec in pbds: 

311 if pbdRec["host"] == self._hostRef: 

312 return True 

313 return False 

314 

315 def poolOK(self): 

316 host_recs = self.session.xenapi.host.get_all_records() 

317 for host_ref, host_rec in host_recs.items(): 

318 if not host_rec["enabled"]: 

319 Util.log("Host %s not enabled" % host_rec["uuid"]) 

320 return False 

321 return True 

322 

323 def isMaster(self): 

324 if self.srRecord["shared"]: 

325 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

326 return pool["master"] == self._hostRef 

327 else: 

328 pbds = self.getAttachedPBDs() 

329 if len(pbds) < 1: 

330 raise util.SMException("Local SR not attached") 

331 elif len(pbds) > 1: 

332 raise util.SMException("Local SR multiply attached") 

333 return pbds[0]["host"] == self._hostRef 

334 

335 def getAttachedPBDs(self): 

336 """Return PBD records for all PBDs of this SR that are currently 

337 attached""" 

338 attachedPBDs = [] 

339 pbds = self.session.xenapi.PBD.get_all_records() 

340 for pbdRec in pbds.values(): 

341 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

342 attachedPBDs.append(pbdRec) 

343 return attachedPBDs 

344 

345 def getOnlineHosts(self): 

346 return util.get_online_hosts(self.session) 

347 

348 def ensureInactive(self, hostRef, args): 

349 text = self.session.xenapi.host.call_plugin( \ 

350 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

351 Util.log("call-plugin returned: '%s'" % text) 

352 

353 def getRecordHost(self, hostRef): 

354 return self.session.xenapi.host.get_record(hostRef) 

355 

356 def _getRefVDI(self, uuid): 

357 return self.session.xenapi.VDI.get_by_uuid(uuid) 

358 

359 def getRefVDI(self, vdi): 

360 return self._getRefVDI(vdi.uuid) 

361 

362 def getRecordVDI(self, uuid): 

363 try: 

364 ref = self._getRefVDI(uuid) 

365 return self.session.xenapi.VDI.get_record(ref) 

366 except XenAPI.Failure: 

367 return None 

368 

369 def singleSnapshotVDI(self, vdi): 

370 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

371 {"type": "internal"}) 

372 

373 def forgetVDI(self, srUuid, vdiUuid): 

374 """Forget the VDI, but handle the case where the VDI has already been 

375 forgotten (i.e. ignore errors)""" 

376 try: 

377 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

378 self.session.xenapi.VDI.forget(vdiRef) 

379 except XenAPI.Failure: 

380 pass 

381 

382 def getConfigVDI(self, vdi, key): 

383 kind = vdi.CONFIG_TYPE[key] 

384 if kind == self.CONFIG_SM: 

385 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

386 elif kind == self.CONFIG_OTHER: 

387 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

388 elif kind == self.CONFIG_ON_BOOT: 

389 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

390 elif kind == self.CONFIG_ALLOW_CACHING: 

391 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

392 else: 

393 assert(False) 

394 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

395 return cfg 

396 

397 def removeFromConfigVDI(self, vdi, key): 

398 kind = vdi.CONFIG_TYPE[key] 

399 if kind == self.CONFIG_SM: 

400 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

401 elif kind == self.CONFIG_OTHER: 

402 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

403 else: 

404 assert(False) 

405 

406 def addToConfigVDI(self, vdi, key, val): 

407 kind = vdi.CONFIG_TYPE[key] 

408 if kind == self.CONFIG_SM: 

409 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

410 elif kind == self.CONFIG_OTHER: 

411 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

412 else: 

413 assert(False) 

414 

415 def isSnapshot(self, vdi): 

416 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

417 

418 def markCacheSRsDirty(self): 

419 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

420 'field "local_cache_enabled" = "true"') 

421 for sr_ref in sr_refs: 

422 Util.log("Marking SR %s dirty" % sr_ref) 

423 util.set_dirty(self.session, sr_ref) 

424 

425 def srUpdate(self): 

426 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

427 abortFlag = IPCFlag(self.srRecord["uuid"]) 

428 task = self.session.xenapi.Async.SR.update(self._srRef) 

429 cancelTask = True 

430 try: 

431 for i in range(60): 

432 status = self.session.xenapi.task.get_status(task) 

433 if not status == "pending": 

434 Util.log("SR.update_asynch status changed to [%s]" % status) 

435 cancelTask = False 

436 return 

437 if abortFlag.test(FLAG_TYPE_ABORT): 

438 Util.log("Abort signalled during srUpdate, cancelling task...") 

439 try: 

440 self.session.xenapi.task.cancel(task) 

441 cancelTask = False 

442 Util.log("Task cancelled") 

443 except: 

444 pass 

445 return 

446 time.sleep(1) 

447 finally: 

448 if cancelTask: 

449 self.session.xenapi.task.cancel(task) 

450 self.session.xenapi.task.destroy(task) 

451 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

452 

453 def update_task(self): 

454 self.session.xenapi.task.set_other_config( 

455 self.task, 

456 { 

457 "applies_to": self._srRef 

458 }) 

459 total = self.task_progress['coalescable'] + self.task_progress['done'] 

460 if (total > 0): 

461 self.session.xenapi.task.set_progress( 

462 self.task, float(self.task_progress['done']) / total) 

463 

464 def create_task(self, label, description): 

465 self.task = self.session.xenapi.task.create(label, description) 

466 self.update_task() 

467 

468 def update_task_progress(self, key, value): 

469 self.task_progress[key] = value 

470 if self.task: 

471 self.update_task() 

472 

473 def set_task_status(self, status): 

474 if self.task: 

475 self.session.xenapi.task.set_status(self.task, status) 

476 

477 

478################################################################################ 

479# 

480# VDI 

481# 

482class VDI(object): 

483 """Object representing a VDI of a VHD-based SR""" 

484 

485 POLL_INTERVAL = 1 

486 POLL_TIMEOUT = 30 

487 DEVICE_MAJOR = 202 

488 DRIVER_NAME_VHD = "vhd" 

489 

490 # config keys & values 

491 DB_VHD_PARENT = "vhd-parent" 

492 DB_VDI_TYPE = "vdi_type" 

493 DB_VHD_BLOCKS = "vhd-blocks" 

494 DB_VDI_PAUSED = "paused" 

495 DB_VDI_RELINKING = "relinking" 

496 DB_VDI_ACTIVATING = "activating" 

497 DB_GC = "gc" 

498 DB_COALESCE = "coalesce" 

499 DB_LEAFCLSC = "leaf-coalesce" # config key 

500 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

501 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

502 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

503 # no space to snap-coalesce or unable to keep 

504 # up with VDI. This is not used by the SM, it 

505 # might be used by external components. 

506 DB_ONBOOT = "on-boot" 

507 ONBOOT_RESET = "reset" 

508 DB_ALLOW_CACHING = "allow_caching" 

509 

510 CONFIG_TYPE = { 

511 DB_VHD_PARENT: XAPI.CONFIG_SM, 

512 DB_VDI_TYPE: XAPI.CONFIG_SM, 

513 DB_VHD_BLOCKS: XAPI.CONFIG_SM, 

514 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

515 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

516 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

517 DB_GC: XAPI.CONFIG_OTHER, 

518 DB_COALESCE: XAPI.CONFIG_OTHER, 

519 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

520 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

521 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

522 } 

523 

524 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

525 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

526 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

527 # feasibility of leaf coalesce 

528 

529 JRN_RELINK = "relink" # journal entry type for relinking children 

530 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

531 JRN_LEAF = "leaf" # used in coalesce-leaf 

532 

533 STR_TREE_INDENT = 4 

534 

535 def __init__(self, sr, uuid, raw): 

536 self.sr = sr 

537 self.scanError = True 

538 self.uuid = uuid 

539 self.raw = raw 

540 self.fileName = "" 

541 self.parentUuid = "" 

542 self.sizeVirt = -1 

543 self._sizeVHD = -1 

544 self.hidden = False 

545 self.parent = None 

546 self.children = [] 

547 self._vdiRef = None 

548 self._clearRef() 

549 

550 @staticmethod 

551 def extractUuid(path): 

552 raise NotImplementedError("Implement in sub class") 

553 

554 def load(self): 

555 """Load VDI info""" 

556 pass # abstract 

557 

558 def getDriverName(self): 

559 return self.DRIVER_NAME_VHD 

560 

561 def getRef(self): 

562 if self._vdiRef is None: 

563 self._vdiRef = self.sr.xapi.getRefVDI(self) 

564 return self._vdiRef 

565 

566 def getConfig(self, key, default=None): 

567 config = self.sr.xapi.getConfigVDI(self, key) 

568 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 568 ↛ 569line 568 didn't jump to line 569, because the condition on line 568 was never true

569 val = config 

570 else: 

571 val = config.get(key) 

572 if val: 

573 return val 

574 return default 

575 

576 def setConfig(self, key, val): 

577 self.sr.xapi.removeFromConfigVDI(self, key) 

578 self.sr.xapi.addToConfigVDI(self, key, val) 

579 Util.log("Set %s = %s for %s" % (key, val, self)) 

580 

581 def delConfig(self, key): 

582 self.sr.xapi.removeFromConfigVDI(self, key) 

583 Util.log("Removed %s from %s" % (key, self)) 

584 

585 def ensureUnpaused(self): 

586 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

587 Util.log("Unpausing VDI %s" % self) 

588 self.unpause() 

589 

590 def pause(self, failfast=False): 

591 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

592 self.uuid, failfast): 

593 raise util.SMException("Failed to pause VDI %s" % self) 

594 

595 def _report_tapdisk_unpause_error(self): 

596 try: 

597 xapi = self.sr.xapi.session.xenapi 

598 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

599 msg_name = "failed to unpause tapdisk" 

600 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

601 "VMs using this tapdisk have lost access " \ 

602 "to the corresponding disk(s)" % self.uuid 

603 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

604 except Exception as e: 

605 util.SMlog("failed to generate message: %s" % e) 

606 

607 def unpause(self): 

608 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

609 self.uuid): 

610 self._report_tapdisk_unpause_error() 

611 raise util.SMException("Failed to unpause VDI %s" % self) 

612 

613 def refresh(self, ignoreNonexistent=True): 

614 """Pause-unpause in one step""" 

615 self.sr.lock() 

616 try: 

617 try: 

618 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 618 ↛ 620line 618 didn't jump to line 620, because the condition on line 618 was never true

619 self.sr.uuid, self.uuid): 

620 self._report_tapdisk_unpause_error() 

621 raise util.SMException("Failed to refresh %s" % self) 

622 except XenAPI.Failure as e: 

623 if util.isInvalidVDI(e) and ignoreNonexistent: 

624 Util.log("VDI %s not found, ignoring" % self) 

625 return 

626 raise 

627 finally: 

628 self.sr.unlock() 

629 

630 def isSnapshot(self): 

631 return self.sr.xapi.isSnapshot(self) 

632 

633 def isAttachedRW(self): 

634 return util.is_attached_rw( 

635 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

636 

637 def getVHDBlocks(self): 

638 val = self.updateBlockInfo() 

639 bitmap = zlib.decompress(base64.b64decode(val)) 

640 return bitmap 

641 

642 def isCoalesceable(self): 

643 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

644 return not self.scanError and \ 

645 self.parent and \ 

646 len(self.parent.children) == 1 and \ 

647 self.hidden and \ 

648 len(self.children) > 0 

649 

650 def isLeafCoalesceable(self): 

651 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

652 return not self.scanError and \ 

653 self.parent and \ 

654 len(self.parent.children) == 1 and \ 

655 not self.hidden and \ 

656 len(self.children) == 0 

657 

658 def canLiveCoalesce(self, speed): 

659 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

660 isLeafCoalesceable() already""" 

661 feasibleSize = False 

662 allowedDownTime = \ 

663 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

664 if speed: 

665 feasibleSize = \ 

666 self.getSizeVHD() // speed < allowedDownTime 

667 else: 

668 feasibleSize = \ 

669 self.getSizeVHD() < self.LIVE_LEAF_COALESCE_MAX_SIZE 

670 

671 return (feasibleSize or 

672 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

673 

674 def getAllPrunable(self): 

675 if len(self.children) == 0: # base case 

676 # it is possible to have a hidden leaf that was recently coalesced 

677 # onto its parent, its children already relinked but not yet 

678 # reloaded - in which case it may not be garbage collected yet: 

679 # some tapdisks could still be using the file. 

680 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

681 return [] 

682 if not self.scanError and self.hidden: 

683 return [self] 

684 return [] 

685 

686 thisPrunable = True 

687 vdiList = [] 

688 for child in self.children: 

689 childList = child.getAllPrunable() 

690 vdiList.extend(childList) 

691 if child not in childList: 

692 thisPrunable = False 

693 

694 # We can destroy the current VDI if all childs are hidden BUT the 

695 # current VDI must be hidden too to do that! 

696 # Example in this case (after a failed live leaf coalesce): 

697 # 

698 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

699 # SMGC: [32436] b5458d61(1.000G/4.127M) 

700 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

701 # 

702 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

703 # Normally we are not in this function when the delete action is 

704 # executed but in `_liveLeafCoalesce`. 

705 

706 if not self.scanError and not self.hidden and thisPrunable: 

707 vdiList.append(self) 

708 return vdiList 

709 

710 def getSizeVHD(self): 

711 return self._sizeVHD 

712 

713 def getTreeRoot(self): 

714 "Get the root of the tree that self belongs to" 

715 root = self 

716 while root.parent: 

717 root = root.parent 

718 return root 

719 

720 def getTreeHeight(self): 

721 "Get the height of the subtree rooted at self" 

722 if len(self.children) == 0: 

723 return 1 

724 

725 maxChildHeight = 0 

726 for child in self.children: 

727 childHeight = child.getTreeHeight() 

728 if childHeight > maxChildHeight: 

729 maxChildHeight = childHeight 

730 

731 return maxChildHeight + 1 

732 

733 def getAllLeaves(self): 

734 "Get all leaf nodes in the subtree rooted at self" 

735 if len(self.children) == 0: 

736 return [self] 

737 

738 leaves = [] 

739 for child in self.children: 

740 leaves.extend(child.getAllLeaves()) 

741 return leaves 

742 

743 def updateBlockInfo(self): 

744 val = base64.b64encode(self._queryVHDBlocks()).decode() 

745 self.setConfig(VDI.DB_VHD_BLOCKS, val) 

746 return val 

747 

748 def rename(self, uuid): 

749 "Rename the VDI file" 

750 assert(not self.sr.vdis.get(uuid)) 

751 self._clearRef() 

752 oldUuid = self.uuid 

753 self.uuid = uuid 

754 self.children = [] 

755 # updating the children themselves is the responsibility of the caller 

756 del self.sr.vdis[oldUuid] 

757 self.sr.vdis[self.uuid] = self 

758 

759 def delete(self): 

760 "Physically delete the VDI" 

761 lock.Lock.cleanup(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

762 lock.Lock.cleanupAll(self.uuid) 

763 self._clear() 

764 

765 def getParent(self): 

766 return vhdutil.getParent(self.path, lambda x: x.strip()) 766 ↛ exitline 766 didn't run the lambda on line 766

767 

768 def repair(self, parent): 

769 vhdutil.repair(parent) 

770 

771 def __str__(self): 

772 strHidden = "" 

773 if self.hidden: 773 ↛ 774line 773 didn't jump to line 774, because the condition on line 773 was never true

774 strHidden = "*" 

775 strSizeVirt = "?" 

776 if self.sizeVirt > 0: 776 ↛ 777line 776 didn't jump to line 777, because the condition on line 776 was never true

777 strSizeVirt = Util.num2str(self.sizeVirt) 

778 strSizeVHD = "?" 

779 if self._sizeVHD > 0: 779 ↛ 780line 779 didn't jump to line 780, because the condition on line 779 was never true

780 strSizeVHD = "/%s" % Util.num2str(self._sizeVHD) 

781 strType = "" 

782 if self.raw: 

783 strType = "[RAW]" 

784 strSizeVHD = "" 

785 

786 return "%s%s(%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

787 strSizeVHD, strType) 

788 

789 def validate(self, fast=False): 

790 if not vhdutil.check(self.path, fast=fast): 790 ↛ 791line 790 didn't jump to line 791, because the condition on line 790 was never true

791 raise util.SMException("VHD %s corrupted" % self) 

792 

793 def _clear(self): 

794 self.uuid = "" 

795 self.path = "" 

796 self.parentUuid = "" 

797 self.parent = None 

798 self._clearRef() 

799 

800 def _clearRef(self): 

801 self._vdiRef = None 

802 

803 def _doCoalesce(self): 

804 """Coalesce self onto parent. Only perform the actual coalescing of 

805 VHD, but not the subsequent relinking. We'll do that as the next step, 

806 after reloading the entire SR in case things have changed while we 

807 were coalescing""" 

808 self.validate() 

809 self.parent.validate(True) 

810 self.parent._increaseSizeVirt(self.sizeVirt) 

811 self.sr._updateSlavesOnResize(self.parent) 

812 self._coalesceVHD(0) 

813 self.parent.validate(True) 

814 #self._verifyContents(0) 

815 self.parent.updateBlockInfo() 

816 

817 def _verifyContents(self, timeOut): 

818 Util.log(" Coalesce verification on %s" % self) 

819 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

820 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

821 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

822 Util.log(" Coalesce verification succeeded") 

823 

824 def _runTapdiskDiff(self): 

825 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

826 (self.getDriverName(), self.path, \ 

827 self.parent.getDriverName(), self.parent.path) 

828 Util.doexec(cmd, 0) 

829 return True 

830 

831 def _reportCoalesceError(vdi, ce): 

832 """Reports a coalesce error to XenCenter. 

833 

834 vdi: the VDI object on which the coalesce error occured 

835 ce: the CommandException that was raised""" 

836 

837 msg_name = os.strerror(ce.code) 

838 if ce.code == errno.ENOSPC: 

839 # TODO We could add more information here, e.g. exactly how much 

840 # space is required for the particular coalesce, as well as actions 

841 # to be taken by the user and consequences of not taking these 

842 # actions. 

843 msg_body = 'Run out of space while coalescing.' 

844 elif ce.code == errno.EIO: 

845 msg_body = 'I/O error while coalescing.' 

846 else: 

847 msg_body = '' 

848 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

849 % (vdi.sr.uuid, msg_name, msg_body)) 

850 

851 # Create a XenCenter message, but don't spam. 

852 xapi = vdi.sr.xapi.session.xenapi 

853 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

854 oth_cfg = xapi.SR.get_other_config(sr_ref) 

855 if COALESCE_ERR_RATE_TAG in oth_cfg: 

856 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

857 else: 

858 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

859 

860 xcmsg = False 

861 if coalesce_err_rate == 0: 

862 xcmsg = True 

863 elif coalesce_err_rate > 0: 

864 now = datetime.datetime.now() 

865 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

866 if COALESCE_LAST_ERR_TAG in sm_cfg: 

867 # seconds per message (minimum distance in time between two 

868 # messages in seconds) 

869 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

870 last = datetime.datetime.fromtimestamp( 

871 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

872 if now - last >= spm: 

873 xapi.SR.remove_from_sm_config(sr_ref, 

874 COALESCE_LAST_ERR_TAG) 

875 xcmsg = True 

876 else: 

877 xcmsg = True 

878 if xcmsg: 

879 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

880 str(now.strftime('%s'))) 

881 if xcmsg: 

882 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

883 _reportCoalesceError = staticmethod(_reportCoalesceError) 

884 

885 def coalesce(self): 

886 vhdutil.coalesce(self.path) 

887 

888 def _doCoalesceVHD(vdi): 

889 try: 

890 startTime = time.time() 

891 vhdSize = vdi.getSizeVHD() 

892 vdi.coalesce() 

893 endTime = time.time() 

894 vdi.sr.recordStorageSpeed(startTime, endTime, vhdSize) 

895 except util.CommandException as ce: 

896 # We use try/except for the following piece of code because it runs 

897 # in a separate process context and errors will not be caught and 

898 # reported by anyone. 

899 try: 

900 # Report coalesce errors back to user via XC 

901 VDI._reportCoalesceError(vdi, ce) 

902 except Exception as e: 

903 util.SMlog('failed to create XenCenter message: %s' % e) 

904 raise ce 

905 except: 

906 raise 

907 _doCoalesceVHD = staticmethod(_doCoalesceVHD) 

908 

909 def _vdi_is_raw(self, vdi_path): 

910 """ 

911 Given path to vdi determine if it is raw 

912 """ 

913 uuid = self.extractUuid(vdi_path) 

914 return self.sr.vdis[uuid].raw 

915 

916 def _coalesceVHD(self, timeOut): 

917 Util.log(" Running VHD coalesce on %s" % self) 

918 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 918 ↛ exitline 918 didn't run the lambda on line 918

919 try: 

920 util.fistpoint.activate_custom_fn( 

921 "cleanup_coalesceVHD_inject_failure", 

922 util.inject_failure) 

923 Util.runAbortable(lambda: VDI._doCoalesceVHD(self), None, 

924 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

925 except: 

926 #exception at this phase could indicate a failure in vhd coalesce 

927 # or a kill of vhd coalesce by runAbortable due to timeOut 

928 # Try a repair and reraise the exception 

929 parent = "" 

930 try: 

931 parent = self.getParent() 

932 if not self._vdi_is_raw(parent): 

933 # Repair error is logged and ignored. Error reraised later 

934 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

935 'parent %s' % (self.uuid, parent)) 

936 self.repair(parent) 

937 except Exception as e: 

938 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

939 'after failed coalesce on %s, err: %s' % 

940 (parent, self.path, e)) 

941 raise 

942 

943 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

944 

945 def _relinkSkip(self): 

946 """Relink children of this VDI to point to the parent of this VDI""" 

947 abortFlag = IPCFlag(self.sr.uuid) 

948 for child in self.children: 

949 if abortFlag.test(FLAG_TYPE_ABORT): 949 ↛ 950line 949 didn't jump to line 950, because the condition on line 949 was never true

950 raise AbortException("Aborting due to signal") 

951 Util.log(" Relinking %s from %s to %s" % \ 

952 (child, self, self.parent)) 

953 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

954 child._setParent(self.parent) 

955 self.children = [] 

956 

957 def _reloadChildren(self, vdiSkip): 

958 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

959 the VHD metadata for this file in any online VDI""" 

960 abortFlag = IPCFlag(self.sr.uuid) 

961 for child in self.children: 

962 if child == vdiSkip: 

963 continue 

964 if abortFlag.test(FLAG_TYPE_ABORT): 964 ↛ 965line 964 didn't jump to line 965, because the condition on line 964 was never true

965 raise AbortException("Aborting due to signal") 

966 Util.log(" Reloading VDI %s" % child) 

967 child._reload() 

968 

969 def _reload(self): 

970 """Pause & unpause to cause blktap to reload the VHD metadata""" 

971 for child in self.children: 971 ↛ 972line 971 didn't jump to line 972, because the loop on line 971 never started

972 child._reload() 

973 

974 # only leaves can be attached 

975 if len(self.children) == 0: 975 ↛ exitline 975 didn't return from function '_reload', because the condition on line 975 was never false

976 try: 

977 self.delConfig(VDI.DB_VDI_RELINKING) 

978 except XenAPI.Failure as e: 

979 if not util.isInvalidVDI(e): 

980 raise 

981 self.refresh() 

982 

983 def _tagChildrenForRelink(self): 

984 if len(self.children) == 0: 

985 retries = 0 

986 try: 

987 while retries < 15: 

988 retries += 1 

989 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

990 Util.log("VDI %s is activating, wait to relink" % 

991 self.uuid) 

992 else: 

993 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

994 

995 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

996 self.delConfig(VDI.DB_VDI_RELINKING) 

997 Util.log("VDI %s started activating while tagging" % 

998 self.uuid) 

999 else: 

1000 return 

1001 time.sleep(2) 

1002 

1003 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1004 except XenAPI.Failure as e: 

1005 if not util.isInvalidVDI(e): 

1006 raise 

1007 

1008 for child in self.children: 

1009 child._tagChildrenForRelink() 

1010 

1011 def _loadInfoParent(self): 

1012 ret = vhdutil.getParent(self.path, lvhdutil.extractUuid) 

1013 if ret: 

1014 self.parentUuid = ret 

1015 

1016 def _setParent(self, parent): 

1017 vhdutil.setParent(self.path, parent.path, False) 

1018 self.parent = parent 

1019 self.parentUuid = parent.uuid 

1020 parent.children.append(self) 

1021 try: 

1022 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1023 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1024 (self.uuid, self.parentUuid)) 

1025 except: 

1026 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1027 (self.uuid, self.parentUuid)) 

1028 

1029 def _loadInfoHidden(self): 

1030 hidden = vhdutil.getHidden(self.path) 

1031 self.hidden = (hidden != 0) 

1032 

1033 def _setHidden(self, hidden=True): 

1034 vhdutil.setHidden(self.path, hidden) 

1035 self.hidden = hidden 

1036 

1037 def _increaseSizeVirt(self, size, atomic=True): 

1038 """ensure the virtual size of 'self' is at least 'size'. Note that 

1039 resizing a VHD must always be offline and atomically: the file must 

1040 not be open by anyone and no concurrent operations may take place. 

1041 Thus we use the Agent API call for performing paused atomic 

1042 operations. If the caller is already in the atomic context, it must 

1043 call with atomic = False""" 

1044 if self.sizeVirt >= size: 1044 ↛ 1046line 1044 didn't jump to line 1046, because the condition on line 1044 was never false

1045 return 

1046 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ 

1047 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1048 

1049 msize = vhdutil.getMaxResizeSize(self.path) * 1024 * 1024 

1050 if (size <= msize): 

1051 vhdutil.setSizeVirtFast(self.path, size) 

1052 else: 

1053 if atomic: 

1054 vdiList = self._getAllSubtree() 

1055 self.sr.lock() 

1056 try: 

1057 self.sr.pauseVDIs(vdiList) 

1058 try: 

1059 self._setSizeVirt(size) 

1060 finally: 

1061 self.sr.unpauseVDIs(vdiList) 

1062 finally: 

1063 self.sr.unlock() 

1064 else: 

1065 self._setSizeVirt(size) 

1066 

1067 self.sizeVirt = vhdutil.getSizeVirt(self.path) 

1068 

1069 def _setSizeVirt(self, size): 

1070 """WARNING: do not call this method directly unless all VDIs in the 

1071 subtree are guaranteed to be unplugged (and remain so for the duration 

1072 of the operation): this operation is only safe for offline VHDs""" 

1073 jFile = os.path.join(self.sr.path, self.uuid) 

1074 vhdutil.setSizeVirt(self.path, size, jFile) 

1075 

1076 def _queryVHDBlocks(self): 

1077 return vhdutil.getBlockBitmap(self.path) 

1078 

1079 def _getCoalescedSizeData(self): 

1080 """Get the data size of the resulting VHD if we coalesce self onto 

1081 parent. We calculate the actual size by using the VHD block allocation 

1082 information (as opposed to just adding up the two VHD sizes to get an 

1083 upper bound)""" 

1084 # make sure we don't use stale BAT info from vdi_rec since the child 

1085 # was writable all this time 

1086 self.delConfig(VDI.DB_VHD_BLOCKS) 

1087 blocksChild = self.getVHDBlocks() 

1088 blocksParent = self.parent.getVHDBlocks() 

1089 numBlocks = Util.countBits(blocksChild, blocksParent) 

1090 Util.log("Num combined blocks = %d" % numBlocks) 

1091 sizeData = numBlocks * vhdutil.VHD_BLOCK_SIZE 

1092 assert(sizeData <= self.sizeVirt) 

1093 return sizeData 

1094 

1095 def _calcExtraSpaceForCoalescing(self): 

1096 sizeData = self._getCoalescedSizeData() 

1097 sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \ 

1098 vhdutil.calcOverheadEmpty(self.sizeVirt) 

1099 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1100 return sizeCoalesced - self.parent.getSizeVHD() 

1101 

1102 def _calcExtraSpaceForLeafCoalescing(self): 

1103 """How much extra space in the SR will be required to 

1104 [live-]leaf-coalesce this VDI""" 

1105 # the space requirements are the same as for inline coalesce 

1106 return self._calcExtraSpaceForCoalescing() 

1107 

1108 def _calcExtraSpaceForSnapshotCoalescing(self): 

1109 """How much extra space in the SR will be required to 

1110 snapshot-coalesce this VDI""" 

1111 return self._calcExtraSpaceForCoalescing() + \ 

1112 vhdutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1113 

1114 def _getAllSubtree(self): 

1115 """Get self and all VDIs in the subtree of self as a flat list""" 

1116 vdiList = [self] 

1117 for child in self.children: 

1118 vdiList.extend(child._getAllSubtree()) 

1119 return vdiList 

1120 

1121 

1122class FileVDI(VDI): 

1123 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1124 

1125 @staticmethod 

1126 def extractUuid(path): 

1127 path = os.path.basename(path.strip()) 

1128 if not (path.endswith(vhdutil.FILE_EXTN_VHD) or \ 1128 ↛ 1130line 1128 didn't jump to line 1130, because the condition on line 1128 was never true

1129 path.endswith(vhdutil.FILE_EXTN_RAW)): 

1130 return None 

1131 uuid = path.replace(vhdutil.FILE_EXTN_VHD, "").replace( \ 

1132 vhdutil.FILE_EXTN_RAW, "") 

1133 # TODO: validate UUID format 

1134 return uuid 

1135 

1136 def __init__(self, sr, uuid, raw): 

1137 VDI.__init__(self, sr, uuid, raw) 

1138 if self.raw: 1138 ↛ 1139line 1138 didn't jump to line 1139, because the condition on line 1138 was never true

1139 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_RAW) 

1140 else: 

1141 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1142 

1143 def load(self, info=None): 

1144 if not info: 

1145 if not util.pathexists(self.path): 

1146 raise util.SMException("%s not found" % self.path) 

1147 try: 

1148 info = vhdutil.getVHDInfo(self.path, self.extractUuid) 

1149 except util.SMException: 

1150 Util.log(" [VDI %s: failed to read VHD metadata]" % self.uuid) 

1151 return 

1152 self.parent = None 

1153 self.children = [] 

1154 self.parentUuid = info.parentUuid 

1155 self.sizeVirt = info.sizeVirt 

1156 self._sizeVHD = info.sizePhys 

1157 self.hidden = info.hidden 

1158 self.scanError = False 

1159 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1160 (self.uuid, vhdutil.FILE_EXTN_VHD)) 

1161 

1162 def rename(self, uuid): 

1163 oldPath = self.path 

1164 VDI.rename(self, uuid) 

1165 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1166 self.path = os.path.join(self.sr.path, self.fileName) 

1167 assert(not util.pathexists(self.path)) 

1168 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1169 os.rename(oldPath, self.path) 

1170 

1171 def delete(self): 

1172 if len(self.children) > 0: 1172 ↛ 1173line 1172 didn't jump to line 1173, because the condition on line 1172 was never true

1173 raise util.SMException("VDI %s has children, can't delete" % \ 

1174 self.uuid) 

1175 try: 

1176 self.sr.lock() 

1177 try: 

1178 os.unlink(self.path) 

1179 self.sr.forgetVDI(self.uuid) 

1180 finally: 

1181 self.sr.unlock() 

1182 except OSError: 

1183 raise util.SMException("os.unlink(%s) failed" % self.path) 

1184 VDI.delete(self) 

1185 

1186 

1187class LVHDVDI(VDI): 

1188 """Object representing a VDI in an LVHD SR""" 

1189 

1190 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1191 DRIVER_NAME_RAW = "aio" 

1192 

1193 def load(self, vdiInfo): 

1194 self.parent = None 

1195 self.children = [] 

1196 self._sizeVHD = -1 

1197 self.scanError = vdiInfo.scanError 

1198 self.sizeLV = vdiInfo.sizeLV 

1199 self.sizeVirt = vdiInfo.sizeVirt 

1200 self.fileName = vdiInfo.lvName 

1201 self.lvActive = vdiInfo.lvActive 

1202 self.lvOpen = vdiInfo.lvOpen 

1203 self.lvReadonly = vdiInfo.lvReadonly 

1204 self.hidden = vdiInfo.hidden 

1205 self.parentUuid = vdiInfo.parentUuid 

1206 self.path = os.path.join(self.sr.path, self.fileName) 

1207 

1208 @staticmethod 

1209 def extractUuid(path): 

1210 return lvhdutil.extractUuid(path) 

1211 

1212 def getDriverName(self): 

1213 if self.raw: 

1214 return self.DRIVER_NAME_RAW 

1215 return self.DRIVER_NAME_VHD 

1216 

1217 def inflate(self, size): 

1218 """inflate the LV containing the VHD to 'size'""" 

1219 if self.raw: 

1220 return 

1221 self._activate() 

1222 self.sr.lock() 

1223 try: 

1224 lvhdutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, size) 

1225 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1226 finally: 

1227 self.sr.unlock() 

1228 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1229 self._sizeVHD = -1 

1230 

1231 def deflate(self): 

1232 """deflate the LV containing the VHD to minimum""" 

1233 if self.raw: 

1234 return 

1235 self._activate() 

1236 self.sr.lock() 

1237 try: 

1238 lvhdutil.deflate(self.sr.lvmCache, self.fileName, self.getSizeVHD()) 

1239 finally: 

1240 self.sr.unlock() 

1241 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1242 self._sizeVHD = -1 

1243 

1244 def inflateFully(self): 

1245 self.inflate(lvhdutil.calcSizeVHDLV(self.sizeVirt)) 

1246 

1247 def inflateParentForCoalesce(self): 

1248 """Inflate the parent only as much as needed for the purposes of 

1249 coalescing""" 

1250 if self.parent.raw: 

1251 return 

1252 inc = self._calcExtraSpaceForCoalescing() 

1253 if inc > 0: 

1254 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1255 self.parent.inflate(self.parent.sizeLV + inc) 

1256 

1257 def updateBlockInfo(self): 

1258 if not self.raw: 

1259 return VDI.updateBlockInfo(self) 

1260 

1261 def rename(self, uuid): 

1262 oldUuid = self.uuid 

1263 oldLVName = self.fileName 

1264 VDI.rename(self, uuid) 

1265 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + self.uuid 

1266 if self.raw: 

1267 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + self.uuid 

1268 self.path = os.path.join(self.sr.path, self.fileName) 

1269 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1270 

1271 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1272 if self.sr.lvActivator.get(oldUuid, False): 

1273 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1274 

1275 ns = lvhdutil.NS_PREFIX_LVM + self.sr.uuid 

1276 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1277 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1278 RefCounter.reset(oldUuid, ns) 

1279 

1280 def delete(self): 

1281 if len(self.children) > 0: 

1282 raise util.SMException("VDI %s has children, can't delete" % \ 

1283 self.uuid) 

1284 self.sr.lock() 

1285 try: 

1286 self.sr.lvmCache.remove(self.fileName) 

1287 self.sr.forgetVDI(self.uuid) 

1288 finally: 

1289 self.sr.unlock() 

1290 RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

1291 VDI.delete(self) 

1292 

1293 def getSizeVHD(self): 

1294 if self._sizeVHD == -1: 

1295 self._loadInfoSizeVHD() 

1296 return self._sizeVHD 

1297 

1298 def _loadInfoSizeVHD(self): 

1299 """Get the physical utilization of the VHD file. We do it individually 

1300 (and not using the VHD batch scanner) as an optimization: this info is 

1301 relatively expensive and we need it only for VDI's involved in 

1302 coalescing.""" 

1303 if self.raw: 

1304 return 

1305 self._activate() 

1306 self._sizeVHD = vhdutil.getSizePhys(self.path) 

1307 if self._sizeVHD <= 0: 

1308 raise util.SMException("phys size of %s = %d" % \ 

1309 (self, self._sizeVHD)) 

1310 

1311 def _loadInfoHidden(self): 

1312 if self.raw: 

1313 self.hidden = self.sr.lvmCache.getHidden(self.fileName) 

1314 else: 

1315 VDI._loadInfoHidden(self) 

1316 

1317 def _setHidden(self, hidden=True): 

1318 if self.raw: 

1319 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1320 self.hidden = hidden 

1321 else: 

1322 VDI._setHidden(self, hidden) 

1323 

1324 def __str__(self): 

1325 strType = "VHD" 

1326 if self.raw: 

1327 strType = "RAW" 

1328 strHidden = "" 

1329 if self.hidden: 

1330 strHidden = "*" 

1331 strSizeVHD = "" 

1332 if self._sizeVHD > 0: 

1333 strSizeVHD = Util.num2str(self._sizeVHD) 

1334 strActive = "n" 

1335 if self.lvActive: 

1336 strActive = "a" 

1337 if self.lvOpen: 

1338 strActive += "o" 

1339 return "%s%s[%s](%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1340 Util.num2str(self.sizeVirt), strSizeVHD, 

1341 Util.num2str(self.sizeLV), strActive) 

1342 

1343 def validate(self, fast=False): 

1344 if not self.raw: 

1345 VDI.validate(self, fast) 

1346 

1347 def _doCoalesce(self): 

1348 """LVHD parents must first be activated, inflated, and made writable""" 

1349 try: 

1350 self._activateChain() 

1351 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1352 self.parent.validate() 

1353 self.inflateParentForCoalesce() 

1354 VDI._doCoalesce(self) 

1355 finally: 

1356 self.parent._loadInfoSizeVHD() 

1357 self.parent.deflate() 

1358 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1359 

1360 def _setParent(self, parent): 

1361 self._activate() 

1362 if self.lvReadonly: 

1363 self.sr.lvmCache.setReadonly(self.fileName, False) 

1364 

1365 try: 

1366 vhdutil.setParent(self.path, parent.path, parent.raw) 

1367 finally: 

1368 if self.lvReadonly: 

1369 self.sr.lvmCache.setReadonly(self.fileName, True) 

1370 self._deactivate() 

1371 self.parent = parent 

1372 self.parentUuid = parent.uuid 

1373 parent.children.append(self) 

1374 try: 

1375 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1376 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1377 (self.uuid, self.parentUuid)) 

1378 except: 

1379 Util.log("Failed to update the vhd-parent with %s for child %s" % \ 

1380 (self.parentUuid, self.uuid)) 

1381 

1382 def _activate(self): 

1383 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1384 

1385 def _activateChain(self): 

1386 vdi = self 

1387 while vdi: 

1388 vdi._activate() 

1389 vdi = vdi.parent 

1390 

1391 def _deactivate(self): 

1392 self.sr.lvActivator.deactivate(self.uuid, False) 

1393 

1394 def _increaseSizeVirt(self, size, atomic=True): 

1395 "ensure the virtual size of 'self' is at least 'size'" 

1396 self._activate() 

1397 if not self.raw: 

1398 VDI._increaseSizeVirt(self, size, atomic) 

1399 return 

1400 

1401 # raw VDI case 

1402 offset = self.sizeLV 

1403 if self.sizeVirt < size: 

1404 oldSize = self.sizeLV 

1405 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1406 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1407 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1408 offset = oldSize 

1409 unfinishedZero = False 

1410 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1411 if jval: 

1412 unfinishedZero = True 

1413 offset = int(jval) 

1414 length = self.sizeLV - offset 

1415 if not length: 

1416 return 

1417 

1418 if unfinishedZero: 

1419 Util.log(" ==> Redoing unfinished zeroing out") 

1420 else: 

1421 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1422 str(offset)) 

1423 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1424 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1425 func = lambda: util.zeroOut(self.path, offset, length) 

1426 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1427 VDI.POLL_INTERVAL, 0) 

1428 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1429 

1430 def _setSizeVirt(self, size): 

1431 """WARNING: do not call this method directly unless all VDIs in the 

1432 subtree are guaranteed to be unplugged (and remain so for the duration 

1433 of the operation): this operation is only safe for offline VHDs""" 

1434 self._activate() 

1435 jFile = lvhdutil.createVHDJournalLV(self.sr.lvmCache, self.uuid, 

1436 vhdutil.MAX_VHD_JOURNAL_SIZE) 

1437 try: 

1438 lvhdutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, 

1439 size, jFile) 

1440 finally: 

1441 lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid) 

1442 

1443 def _queryVHDBlocks(self): 

1444 self._activate() 

1445 return VDI._queryVHDBlocks(self) 

1446 

1447 def _calcExtraSpaceForCoalescing(self): 

1448 if self.parent.raw: 

1449 return 0 # raw parents are never deflated in the first place 

1450 sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData()) 

1451 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1452 return sizeCoalesced - self.parent.sizeLV 

1453 

1454 def _calcExtraSpaceForLeafCoalescing(self): 

1455 """How much extra space in the SR will be required to 

1456 [live-]leaf-coalesce this VDI""" 

1457 # we can deflate the leaf to minimize the space requirements 

1458 deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD()) 

1459 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1460 

1461 def _calcExtraSpaceForSnapshotCoalescing(self): 

1462 return self._calcExtraSpaceForCoalescing() + \ 

1463 lvhdutil.calcSizeLV(self.getSizeVHD()) 

1464 

1465 

1466class LinstorVDI(VDI): 

1467 """Object representing a VDI in a LINSTOR SR""" 

1468 

1469 MAX_SIZE = 2 * 1024 * 1024 * 1024 * 1024 # Max VHD size. 

1470 

1471 VOLUME_LOCK_TIMEOUT = 30 

1472 

1473 def load(self, info=None): 

1474 self.parentUuid = info.parentUuid 

1475 self.scanError = True 

1476 self.parent = None 

1477 self.children = [] 

1478 

1479 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1480 self.path = self.sr._linstor.build_device_path(self.fileName) 

1481 

1482 if not info: 

1483 try: 

1484 info = self.sr._vhdutil.get_vhd_info(self.uuid) 

1485 except util.SMException: 

1486 Util.log( 

1487 ' [VDI {}: failed to read VHD metadata]'.format(self.uuid) 

1488 ) 

1489 return 

1490 

1491 self.parentUuid = info.parentUuid 

1492 self.sizeVirt = info.sizeVirt 

1493 self._sizeVHD = info.sizePhys 

1494 self.hidden = info.hidden 

1495 self.scanError = False 

1496 

1497 def rename(self, uuid): 

1498 Util.log('Renaming {} -> {} (path={})'.format( 

1499 self.uuid, uuid, self.path 

1500 )) 

1501 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1502 VDI.rename(self, uuid) 

1503 

1504 def delete(self): 

1505 if len(self.children) > 0: 

1506 raise util.SMException( 

1507 'VDI {} has children, can\'t delete'.format(self.uuid) 

1508 ) 

1509 self.sr.lock() 

1510 try: 

1511 self.sr._linstor.destroy_volume(self.uuid) 

1512 self.sr.forgetVDI(self.uuid) 

1513 finally: 

1514 self.sr.unlock() 

1515 VDI.delete(self) 

1516 

1517 def validate(self, fast=False): 

1518 if not self.sr._vhdutil.check(self.uuid, fast=fast): 

1519 raise util.SMException('VHD {} corrupted'.format(self)) 

1520 

1521 def pause(self, failfast=False): 

1522 self.sr._linstor.ensure_volume_is_not_locked( 

1523 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1524 ) 

1525 return super(LinstorVDI, self).pause(failfast) 

1526 

1527 def coalesce(self): 

1528 self.sr._vhdutil.force_coalesce(self.path) 

1529 

1530 def getParent(self): 

1531 return self.sr._vhdutil.get_parent( 

1532 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1533 ) 

1534 

1535 def repair(self, parent_uuid): 

1536 self.sr._vhdutil.force_repair( 

1537 self.sr._linstor.get_device_path(parent_uuid) 

1538 ) 

1539 

1540 def _relinkSkip(self): 

1541 abortFlag = IPCFlag(self.sr.uuid) 

1542 for child in self.children: 

1543 if abortFlag.test(FLAG_TYPE_ABORT): 

1544 raise AbortException('Aborting due to signal') 

1545 Util.log( 

1546 ' Relinking {} from {} to {}'.format( 

1547 child, self, self.parent 

1548 ) 

1549 ) 

1550 

1551 session = child.sr.xapi.session 

1552 sr_uuid = child.sr.uuid 

1553 vdi_uuid = child.uuid 

1554 try: 

1555 self.sr._linstor.ensure_volume_is_not_locked( 

1556 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1557 ) 

1558 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1559 child._setParent(self.parent) 

1560 finally: 

1561 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1562 self.children = [] 

1563 

1564 def _setParent(self, parent): 

1565 self.sr._vhdutil.force_parent(self.path, parent.path) 

1566 self.parent = parent 

1567 self.parentUuid = parent.uuid 

1568 parent.children.append(self) 

1569 try: 

1570 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1571 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1572 (self.uuid, self.parentUuid)) 

1573 except: 

1574 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1575 (self.uuid, self.parentUuid)) 

1576 

1577 def _setHidden(self, hidden=True): 

1578 HIDDEN_TAG = 'hidden' 

1579 

1580 if self.raw: 

1581 self.sr._linstor.update_volume_metadata(self.uuid, { 

1582 HIDDEN_TAG: hidden 

1583 }) 

1584 self.hidden = hidden 

1585 else: 

1586 VDI._setHidden(self, hidden) 

1587 

1588 def _queryVHDBlocks(self): 

1589 return self.sr._vhdutil.get_block_bitmap(self.uuid) 

1590 

1591################################################################################ 

1592# 

1593# SR 

1594# 

1595class SR(object): 

1596 class LogFilter: 

1597 def __init__(self, sr): 

1598 self.sr = sr 

1599 self.stateLogged = False 

1600 self.prevState = {} 

1601 self.currState = {} 

1602 

1603 def logState(self): 

1604 changes = "" 

1605 self.currState.clear() 

1606 for vdi in self.sr.vdiTrees: 

1607 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

1608 if not self.prevState.get(vdi.uuid) or \ 

1609 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

1610 changes += self.currState[vdi.uuid] 

1611 

1612 for uuid in self.prevState: 

1613 if not self.currState.get(uuid): 

1614 changes += "Tree %s gone\n" % uuid 

1615 

1616 result = "SR %s (%d VDIs in %d VHD trees): " % \ 

1617 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

1618 

1619 if len(changes) > 0: 

1620 if self.stateLogged: 

1621 result += "showing only VHD trees that changed:" 

1622 result += "\n%s" % changes 

1623 else: 

1624 result += "no changes" 

1625 

1626 for line in result.split("\n"): 

1627 Util.log("%s" % line) 

1628 self.prevState.clear() 

1629 for key, val in self.currState.items(): 

1630 self.prevState[key] = val 

1631 self.stateLogged = True 

1632 

1633 def logNewVDI(self, uuid): 

1634 if self.stateLogged: 

1635 Util.log("Found new VDI when scanning: %s" % uuid) 

1636 

1637 def _getTreeStr(self, vdi, indent=8): 

1638 treeStr = "%s%s\n" % (" " * indent, vdi) 

1639 for child in vdi.children: 

1640 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

1641 return treeStr 

1642 

1643 TYPE_FILE = "file" 

1644 TYPE_LVHD = "lvhd" 

1645 TYPE_LINSTOR = "linstor" 

1646 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

1647 

1648 LOCK_RETRY_INTERVAL = 3 

1649 LOCK_RETRY_ATTEMPTS = 20 

1650 LOCK_RETRY_ATTEMPTS_LOCK = 100 

1651 

1652 SCAN_RETRY_ATTEMPTS = 3 

1653 

1654 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

1655 TMP_RENAME_PREFIX = "OLD_" 

1656 

1657 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

1658 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

1659 

1660 def getInstance(uuid, xapiSession, createLock=True, force=False): 

1661 xapi = XAPI(xapiSession, uuid) 

1662 type = normalizeType(xapi.srRecord["type"]) 

1663 if type == SR.TYPE_FILE: 

1664 return FileSR(uuid, xapi, createLock, force) 

1665 elif type == SR.TYPE_LVHD: 

1666 return LVHDSR(uuid, xapi, createLock, force) 

1667 elif type == SR.TYPE_LINSTOR: 

1668 return LinstorSR(uuid, xapi, createLock, force) 

1669 raise util.SMException("SR type %s not recognized" % type) 

1670 getInstance = staticmethod(getInstance) 

1671 

1672 def __init__(self, uuid, xapi, createLock, force): 

1673 self.logFilter = self.LogFilter(self) 

1674 self.uuid = uuid 

1675 self.path = "" 

1676 self.name = "" 

1677 self.vdis = {} 

1678 self.vdiTrees = [] 

1679 self.journaler = None 

1680 self.xapi = xapi 

1681 self._locked = 0 

1682 self._srLock = None 

1683 if createLock: 1683 ↛ 1684line 1683 didn't jump to line 1684, because the condition on line 1683 was never true

1684 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, self.uuid) 

1685 else: 

1686 Util.log("Requested no SR locking") 

1687 self.name = self.xapi.srRecord["name_label"] 

1688 self._failedCoalesceTargets = [] 

1689 

1690 if not self.xapi.isPluggedHere(): 1690 ↛ 1691line 1690 didn't jump to line 1691, because the condition on line 1690 was never true

1691 if force: 

1692 Util.log("SR %s not attached on this host, ignoring" % uuid) 

1693 else: 

1694 raise util.SMException("SR %s not attached on this host" % uuid) 

1695 

1696 if force: 1696 ↛ 1697line 1696 didn't jump to line 1697, because the condition on line 1696 was never true

1697 Util.log("Not checking if we are Master (SR %s)" % uuid) 

1698 elif not self.xapi.isMaster(): 1698 ↛ 1699line 1698 didn't jump to line 1699, because the condition on line 1698 was never true

1699 raise util.SMException("This host is NOT master, will not run") 

1700 

1701 def gcEnabled(self, refresh=True): 

1702 if refresh: 

1703 self.xapi.srRecord = \ 

1704 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

1705 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

1706 Util.log("GC is disabled for this SR, abort") 

1707 return False 

1708 return True 

1709 

1710 def scan(self, force=False): 

1711 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

1712 update VDI objects if they already exist""" 

1713 pass # abstract 

1714 

1715 def scanLocked(self, force=False): 

1716 self.lock() 

1717 try: 

1718 self.scan(force) 

1719 finally: 

1720 self.unlock() 

1721 

1722 def getVDI(self, uuid): 

1723 return self.vdis.get(uuid) 

1724 

1725 def hasWork(self): 

1726 if len(self.findGarbage()) > 0: 

1727 return True 

1728 if self.findCoalesceable(): 

1729 return True 

1730 if self.findLeafCoalesceable(): 

1731 return True 

1732 if self.needUpdateBlockInfo(): 

1733 return True 

1734 return False 

1735 

1736 def findCoalesceable(self): 

1737 """Find a coalesceable VDI. Return a vdi that should be coalesced 

1738 (choosing one among all coalesceable candidates according to some 

1739 criteria) or None if there is no VDI that could be coalesced""" 

1740 

1741 candidates = [] 

1742 

1743 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

1744 if srSwitch == "false": 

1745 Util.log("Coalesce disabled for this SR") 

1746 return candidates 

1747 

1748 # finish any VDI for which a relink journal entry exists first 

1749 journals = self.journaler.getAll(VDI.JRN_RELINK) 

1750 for uuid in journals: 

1751 vdi = self.getVDI(uuid) 

1752 if vdi and vdi not in self._failedCoalesceTargets: 

1753 return vdi 

1754 

1755 for vdi in self.vdis.values(): 

1756 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

1757 candidates.append(vdi) 

1758 Util.log("%s is coalescable" % vdi.uuid) 

1759 

1760 self.xapi.update_task_progress("coalescable", len(candidates)) 

1761 

1762 # pick one in the tallest tree 

1763 treeHeight = dict() 

1764 for c in candidates: 

1765 height = c.getTreeRoot().getTreeHeight() 

1766 if treeHeight.get(height): 

1767 treeHeight[height].append(c) 

1768 else: 

1769 treeHeight[height] = [c] 

1770 

1771 freeSpace = self.getFreeSpace() 

1772 heights = list(treeHeight.keys()) 

1773 heights.sort(reverse=True) 

1774 for h in heights: 

1775 for c in treeHeight[h]: 

1776 spaceNeeded = c._calcExtraSpaceForCoalescing() 

1777 if spaceNeeded <= freeSpace: 

1778 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

1779 return c 

1780 else: 

1781 Util.log("No space to coalesce %s (free space: %d)" % \ 

1782 (c, freeSpace)) 

1783 return None 

1784 

1785 def getSwitch(self, key): 

1786 return self.xapi.srRecord["other_config"].get(key) 

1787 

1788 def forbiddenBySwitch(self, switch, condition, fail_msg): 

1789 srSwitch = self.getSwitch(switch) 

1790 ret = False 

1791 if srSwitch: 

1792 ret = srSwitch == condition 

1793 

1794 if ret: 

1795 Util.log(fail_msg) 

1796 

1797 return ret 

1798 

1799 def leafCoalesceForbidden(self): 

1800 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

1801 "false", 

1802 "Coalesce disabled for this SR") or 

1803 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

1804 VDI.LEAFCLSC_DISABLED, 

1805 "Leaf-coalesce disabled for this SR")) 

1806 

1807 def findLeafCoalesceable(self): 

1808 """Find leaf-coalesceable VDIs in each VHD tree""" 

1809 

1810 candidates = [] 

1811 if self.leafCoalesceForbidden(): 

1812 return candidates 

1813 

1814 self.gatherLeafCoalesceable(candidates) 

1815 

1816 self.xapi.update_task_progress("coalescable", len(candidates)) 

1817 

1818 freeSpace = self.getFreeSpace() 

1819 for candidate in candidates: 

1820 # check the space constraints to see if leaf-coalesce is actually 

1821 # feasible for this candidate 

1822 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

1823 spaceNeededLive = spaceNeeded 

1824 if spaceNeeded > freeSpace: 

1825 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

1826 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

1827 spaceNeeded = spaceNeededLive 

1828 

1829 if spaceNeeded <= freeSpace: 

1830 Util.log("Leaf-coalesce candidate: %s" % candidate) 

1831 return candidate 

1832 else: 

1833 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

1834 (candidate, freeSpace)) 

1835 if spaceNeededLive <= freeSpace: 

1836 Util.log("...but enough space if skip snap-coalesce") 

1837 candidate.setConfig(VDI.DB_LEAFCLSC, 

1838 VDI.LEAFCLSC_OFFLINE) 

1839 

1840 return None 

1841 

1842 def gatherLeafCoalesceable(self, candidates): 

1843 for vdi in self.vdis.values(): 

1844 if not vdi.isLeafCoalesceable(): 

1845 continue 

1846 if vdi in self._failedCoalesceTargets: 

1847 continue 

1848 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

1849 Util.log("Skipping reset-on-boot %s" % vdi) 

1850 continue 

1851 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

1852 Util.log("Skipping allow_caching=true %s" % vdi) 

1853 continue 

1854 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

1855 Util.log("Leaf-coalesce disabled for %s" % vdi) 

1856 continue 

1857 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

1858 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

1859 continue 

1860 candidates.append(vdi) 

1861 

1862 def coalesce(self, vdi, dryRun=False): 

1863 """Coalesce vdi onto parent""" 

1864 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

1865 if dryRun: 1865 ↛ 1866line 1865 didn't jump to line 1866, because the condition on line 1865 was never true

1866 return 

1867 

1868 try: 

1869 self._coalesce(vdi) 

1870 except util.SMException as e: 

1871 if isinstance(e, AbortException): 1871 ↛ 1872line 1871 didn't jump to line 1872, because the condition on line 1871 was never true

1872 self.cleanup() 

1873 raise 

1874 else: 

1875 self._failedCoalesceTargets.append(vdi) 

1876 Util.logException("coalesce") 

1877 Util.log("Coalesce failed, skipping") 

1878 self.cleanup() 

1879 

1880 def coalesceLeaf(self, vdi, dryRun=False): 

1881 """Leaf-coalesce vdi onto parent""" 

1882 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

1883 if dryRun: 

1884 return 

1885 

1886 try: 

1887 uuid = vdi.uuid 

1888 try: 

1889 # "vdi" object will no longer be valid after this call 

1890 self._coalesceLeaf(vdi) 

1891 finally: 

1892 vdi = self.getVDI(uuid) 

1893 if vdi: 

1894 vdi.delConfig(vdi.DB_LEAFCLSC) 

1895 except AbortException: 

1896 self.cleanup() 

1897 raise 

1898 except (util.SMException, XenAPI.Failure) as e: 

1899 self._failedCoalesceTargets.append(vdi) 

1900 Util.logException("leaf-coalesce") 

1901 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

1902 self.cleanup() 

1903 

1904 def garbageCollect(self, dryRun=False): 

1905 vdiList = self.findGarbage() 

1906 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

1907 for vdi in vdiList: 

1908 Util.log(" %s" % vdi) 

1909 if not dryRun: 

1910 self.deleteVDIs(vdiList) 

1911 self.cleanupJournals(dryRun) 

1912 

1913 def findGarbage(self): 

1914 vdiList = [] 

1915 for vdi in self.vdiTrees: 

1916 vdiList.extend(vdi.getAllPrunable()) 

1917 return vdiList 

1918 

1919 def deleteVDIs(self, vdiList): 

1920 for vdi in vdiList: 

1921 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

1922 raise AbortException("Aborting due to signal") 

1923 Util.log("Deleting unlinked VDI %s" % vdi) 

1924 self.deleteVDI(vdi) 

1925 

1926 def deleteVDI(self, vdi): 

1927 assert(len(vdi.children) == 0) 

1928 del self.vdis[vdi.uuid] 

1929 if vdi.parent: 1929 ↛ 1931line 1929 didn't jump to line 1931, because the condition on line 1929 was never false

1930 vdi.parent.children.remove(vdi) 

1931 if vdi in self.vdiTrees: 1931 ↛ 1932line 1931 didn't jump to line 1932, because the condition on line 1931 was never true

1932 self.vdiTrees.remove(vdi) 

1933 vdi.delete() 

1934 

1935 def forgetVDI(self, vdiUuid): 

1936 self.xapi.forgetVDI(self.uuid, vdiUuid) 

1937 

1938 def pauseVDIs(self, vdiList): 

1939 paused = [] 

1940 failed = False 

1941 for vdi in vdiList: 

1942 try: 

1943 vdi.pause() 

1944 paused.append(vdi) 

1945 except: 

1946 Util.logException("pauseVDIs") 

1947 failed = True 

1948 break 

1949 

1950 if failed: 

1951 self.unpauseVDIs(paused) 

1952 raise util.SMException("Failed to pause VDIs") 

1953 

1954 def unpauseVDIs(self, vdiList): 

1955 failed = False 

1956 for vdi in vdiList: 

1957 try: 

1958 vdi.unpause() 

1959 except: 

1960 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

1961 failed = True 

1962 if failed: 

1963 raise util.SMException("Failed to unpause VDIs") 

1964 

1965 def getFreeSpace(self): 

1966 return 0 

1967 

1968 def cleanup(self): 

1969 Util.log("In cleanup") 

1970 return 

1971 

1972 def __str__(self): 

1973 if self.name: 

1974 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

1975 else: 

1976 ret = "%s" % self.uuid 

1977 return ret 

1978 

1979 def lock(self): 

1980 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

1981 signal to avoid deadlocking (trying to acquire the SR lock while the 

1982 lock is held by a process that is trying to abort us)""" 

1983 if not self._srLock: 

1984 return 

1985 

1986 if self._locked == 0: 

1987 abortFlag = IPCFlag(self.uuid) 

1988 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

1989 if self._srLock.acquireNoblock(): 

1990 self._locked += 1 

1991 return 

1992 if abortFlag.test(FLAG_TYPE_ABORT): 

1993 raise AbortException("Abort requested") 

1994 time.sleep(SR.LOCK_RETRY_INTERVAL) 

1995 raise util.SMException("Unable to acquire the SR lock") 

1996 

1997 self._locked += 1 

1998 

1999 def unlock(self): 

2000 if not self._srLock: 2000 ↛ 2002line 2000 didn't jump to line 2002, because the condition on line 2000 was never false

2001 return 

2002 assert(self._locked > 0) 

2003 self._locked -= 1 

2004 if self._locked == 0: 

2005 self._srLock.release() 

2006 

2007 def needUpdateBlockInfo(self): 

2008 for vdi in self.vdis.values(): 

2009 if vdi.scanError or len(vdi.children) == 0: 

2010 continue 

2011 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2012 return True 

2013 return False 

2014 

2015 def updateBlockInfo(self): 

2016 for vdi in self.vdis.values(): 

2017 if vdi.scanError or len(vdi.children) == 0: 

2018 continue 

2019 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2020 vdi.updateBlockInfo() 

2021 

2022 def cleanupCoalesceJournals(self): 

2023 """Remove stale coalesce VDI indicators""" 

2024 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2025 for uuid, jval in entries.items(): 

2026 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2027 

2028 def cleanupJournals(self, dryRun=False): 

2029 """delete journal entries for non-existing VDIs""" 

2030 for t in [LVHDVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2031 entries = self.journaler.getAll(t) 

2032 for uuid, jval in entries.items(): 

2033 if self.getVDI(uuid): 

2034 continue 

2035 if t == SR.JRN_CLONE: 

2036 baseUuid, clonUuid = jval.split("_") 

2037 if self.getVDI(baseUuid): 

2038 continue 

2039 Util.log(" Deleting stale '%s' journal entry for %s " 

2040 "(%s)" % (t, uuid, jval)) 

2041 if not dryRun: 

2042 self.journaler.remove(t, uuid) 

2043 

2044 def cleanupCache(self, maxAge=-1): 

2045 return 0 

2046 

2047 def _coalesce(self, vdi): 

2048 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2048 ↛ 2051line 2048 didn't jump to line 2051, because the condition on line 2048 was never true

2049 # this means we had done the actual coalescing already and just 

2050 # need to finish relinking and/or refreshing the children 

2051 Util.log("==> Coalesce apparently already done: skipping") 

2052 else: 

2053 # JRN_COALESCE is used to check which VDI is being coalesced in 

2054 # order to decide whether to abort the coalesce. We remove the 

2055 # journal as soon as the VHD coalesce step is done, because we 

2056 # don't expect the rest of the process to take long 

2057 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2058 vdi._doCoalesce() 

2059 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2060 

2061 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2062 

2063 # we now need to relink the children: lock the SR to prevent ops 

2064 # like SM.clone from manipulating the VDIs we'll be relinking and 

2065 # rescan the SR first in case the children changed since the last 

2066 # scan 

2067 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2068 

2069 self.lock() 

2070 try: 

2071 vdi.parent._tagChildrenForRelink() 

2072 self.scan() 

2073 vdi._relinkSkip() 

2074 finally: 

2075 self.unlock() 

2076 # Reload the children to leave things consistent 

2077 vdi.parent._reloadChildren(vdi) 

2078 

2079 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2080 self.deleteVDI(vdi) 

2081 

2082 class CoalesceTracker: 

2083 GRACE_ITERATIONS = 1 

2084 MAX_ITERATIONS_NO_PROGRESS = 3 

2085 MAX_ITERATIONS = 10 

2086 MAX_INCREASE_FROM_MINIMUM = 1.2 

2087 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2088 " --> Final size {finSize}" 

2089 

2090 def __init__(self, sr): 

2091 self.itsNoProgress = 0 

2092 self.its = 0 

2093 self.minSize = float("inf") 

2094 self.history = [] 

2095 self.reason = "" 

2096 self.startSize = None 

2097 self.finishSize = None 

2098 self.sr = sr 

2099 

2100 def abortCoalesce(self, prevSize, curSize): 

2101 res = False 

2102 

2103 self.its += 1 

2104 self.history.append(self.HISTORY_STRING.format(its=self.its, 

2105 initSize=prevSize, 

2106 finSize=curSize)) 

2107 

2108 self.finishSize = curSize 

2109 

2110 if self.startSize is None: 

2111 self.startSize = prevSize 

2112 

2113 if curSize < self.minSize: 

2114 self.minSize = curSize 

2115 

2116 if prevSize < self.minSize: 

2117 self.minSize = prevSize 

2118 

2119 if prevSize < curSize: 

2120 self.itsNoProgress += 1 

2121 Util.log("No progress, attempt:" 

2122 " {attempt}".format(attempt=self.itsNoProgress)) 

2123 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2124 

2125 if (not res) and (self.its > self.MAX_ITERATIONS): 

2126 max = self.MAX_ITERATIONS 

2127 self.reason = \ 

2128 "Max iterations ({max}) exceeded".format(max=max) 

2129 res = True 

2130 

2131 if (not res) and (self.itsNoProgress > 

2132 self.MAX_ITERATIONS_NO_PROGRESS): 

2133 max = self.MAX_ITERATIONS_NO_PROGRESS 

2134 self.reason = \ 

2135 "No progress made for {max} iterations".format(max=max) 

2136 res = True 

2137 

2138 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2139 if (self.its > self.GRACE_ITERATIONS and 

2140 (not res) and (curSize > maxSizeFromMin)): 

2141 self.reason = "Unexpected bump in size," \ 

2142 " compared to minimum acheived" 

2143 res = True 

2144 

2145 return res 

2146 

2147 def printReasoning(self): 

2148 Util.log("Aborted coalesce") 

2149 for hist in self.history: 

2150 Util.log(hist) 

2151 Util.log(self.reason) 

2152 Util.log("Starting size was {size}" 

2153 .format(size=self.startSize)) 

2154 Util.log("Final size was {size}" 

2155 .format(size=self.finishSize)) 

2156 Util.log("Minimum size acheived was {size}" 

2157 .format(size=self.minSize)) 

2158 

2159 def _coalesceLeaf(self, vdi): 

2160 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2161 complete due to external changes, namely vdi_delete and vdi_snapshot 

2162 that alter leaf-coalescibility of vdi""" 

2163 tracker = self.CoalesceTracker(self) 

2164 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2165 prevSizeVHD = vdi.getSizeVHD() 

2166 if not self._snapshotCoalesce(vdi): 2166 ↛ 2167line 2166 didn't jump to line 2167, because the condition on line 2166 was never true

2167 return False 

2168 if tracker.abortCoalesce(prevSizeVHD, vdi.getSizeVHD()): 

2169 tracker.printReasoning() 

2170 raise util.SMException("VDI {uuid} could not be coalesced" 

2171 .format(uuid=vdi.uuid)) 

2172 return self._liveLeafCoalesce(vdi) 

2173 

2174 def calcStorageSpeed(self, startTime, endTime, vhdSize): 

2175 speed = None 

2176 total_time = endTime - startTime 

2177 if total_time > 0: 

2178 speed = float(vhdSize) / float(total_time) 

2179 return speed 

2180 

2181 def writeSpeedToFile(self, speed): 

2182 content = [] 

2183 speedFile = None 

2184 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2185 self.lock() 

2186 try: 

2187 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2188 lines = "" 

2189 if not os.path.isfile(path): 

2190 lines = str(speed) + "\n" 

2191 else: 

2192 speedFile = open(path, "r+") 

2193 content = speedFile.readlines() 

2194 content.append(str(speed) + "\n") 

2195 if len(content) > N_RUNNING_AVERAGE: 

2196 del content[0] 

2197 lines = "".join(content) 

2198 

2199 util.atomicFileWrite(path, VAR_RUN, lines) 

2200 finally: 

2201 if speedFile is not None: 

2202 speedFile.close() 

2203 Util.log("Closing file: {myfile}".format(myfile=path)) 

2204 self.unlock() 

2205 

2206 def recordStorageSpeed(self, startTime, endTime, vhdSize): 

2207 speed = self.calcStorageSpeed(startTime, endTime, vhdSize) 

2208 if speed is None: 

2209 return 

2210 

2211 self.writeSpeedToFile(speed) 

2212 

2213 def getStorageSpeed(self): 

2214 speedFile = None 

2215 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2216 self.lock() 

2217 try: 

2218 speed = None 

2219 if os.path.isfile(path): 

2220 speedFile = open(path) 

2221 content = speedFile.readlines() 

2222 try: 

2223 content = [float(i) for i in content] 

2224 except ValueError: 

2225 Util.log("Something bad in the speed log:{log}". 

2226 format(log=speedFile.readlines())) 

2227 return speed 

2228 

2229 if len(content): 

2230 speed = sum(content) / float(len(content)) 

2231 if speed <= 0: 2231 ↛ 2233line 2231 didn't jump to line 2233, because the condition on line 2231 was never true

2232 # Defensive, should be impossible. 

2233 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2234 format(speed=speed, uuid=self.uuid)) 

2235 speed = None 

2236 else: 

2237 Util.log("Speed file empty for SR: {uuid}". 

2238 format(uuid=self.uuid)) 

2239 else: 

2240 Util.log("Speed log missing for SR: {uuid}". 

2241 format(uuid=self.uuid)) 

2242 return speed 

2243 finally: 

2244 if not (speedFile is None): 

2245 speedFile.close() 

2246 self.unlock() 

2247 

2248 def _snapshotCoalesce(self, vdi): 

2249 # Note that because we are not holding any locks here, concurrent SM 

2250 # operations may change this tree under our feet. In particular, vdi 

2251 # can be deleted, or it can be snapshotted. 

2252 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2253 Util.log("Single-snapshotting %s" % vdi) 

2254 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2255 try: 

2256 ret = self.xapi.singleSnapshotVDI(vdi) 

2257 Util.log("Single-snapshot returned: %s" % ret) 

2258 except XenAPI.Failure as e: 

2259 if util.isInvalidVDI(e): 

2260 Util.log("The VDI appears to have been concurrently deleted") 

2261 return False 

2262 raise 

2263 self.scanLocked() 

2264 tempSnap = vdi.parent 

2265 if not tempSnap.isCoalesceable(): 

2266 Util.log("The VDI appears to have been concurrently snapshotted") 

2267 return False 

2268 Util.log("Coalescing parent %s" % tempSnap) 

2269 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2270 vhdSize = vdi.getSizeVHD() 

2271 self._coalesce(tempSnap) 

2272 if not vdi.isLeafCoalesceable(): 

2273 Util.log("The VDI tree appears to have been altered since") 

2274 return False 

2275 return True 

2276 

2277 def _liveLeafCoalesce(self, vdi): 

2278 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2279 self.lock() 

2280 try: 

2281 self.scan() 

2282 if not self.getVDI(vdi.uuid): 

2283 Util.log("The VDI appears to have been deleted meanwhile") 

2284 return False 

2285 if not vdi.isLeafCoalesceable(): 

2286 Util.log("The VDI is no longer leaf-coalesceable") 

2287 return False 

2288 

2289 uuid = vdi.uuid 

2290 vdi.pause(failfast=True) 

2291 try: 

2292 try: 

2293 # "vdi" object will no longer be valid after this call 

2294 self._doCoalesceLeaf(vdi) 

2295 except: 

2296 Util.logException("_doCoalesceLeaf") 

2297 self._handleInterruptedCoalesceLeaf() 

2298 raise 

2299 finally: 

2300 vdi = self.getVDI(uuid) 

2301 if vdi: 

2302 vdi.ensureUnpaused() 

2303 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2304 if vdiOld: 

2305 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2306 self.deleteVDI(vdiOld) 

2307 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2308 finally: 

2309 self.cleanup() 

2310 self.unlock() 

2311 self.logFilter.logState() 

2312 return True 

2313 

2314 def _doCoalesceLeaf(self, vdi): 

2315 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2316 offline/atomic context""" 

2317 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2318 self._prepareCoalesceLeaf(vdi) 

2319 vdi.parent._setHidden(False) 

2320 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2321 vdi.validate(True) 

2322 vdi.parent.validate(True) 

2323 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2324 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2325 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 

2326 Util.log("Leaf-coalesce forced, will not use timeout") 

2327 timeout = 0 

2328 vdi._coalesceVHD(timeout) 

2329 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2330 vdi.parent.validate(True) 

2331 #vdi._verifyContents(timeout / 2) 

2332 

2333 # rename 

2334 vdiUuid = vdi.uuid 

2335 oldName = vdi.fileName 

2336 origParentUuid = vdi.parent.uuid 

2337 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2338 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2339 vdi.parent.rename(vdiUuid) 

2340 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2341 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2342 

2343 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2344 # garbage 

2345 

2346 # update the VDI record 

2347 vdi.parent.delConfig(VDI.DB_VHD_PARENT) 

2348 if vdi.parent.raw: 

2349 vdi.parent.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_RAW) 

2350 vdi.parent.delConfig(VDI.DB_VHD_BLOCKS) 

2351 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2352 

2353 self._updateNode(vdi) 

2354 

2355 # delete the obsolete leaf & inflate the parent (in that order, to 

2356 # minimize free space requirements) 

2357 parent = vdi.parent 

2358 vdi._setHidden(True) 

2359 vdi.parent.children = [] 

2360 vdi.parent = None 

2361 

2362 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2363 freeSpace = self.getFreeSpace() 

2364 if freeSpace < extraSpace: 

2365 # don't delete unless we need the space: deletion is time-consuming 

2366 # because it requires contacting the slaves, and we're paused here 

2367 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2368 self.deleteVDI(vdi) 

2369 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2370 

2371 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2372 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2373 

2374 self.forgetVDI(origParentUuid) 

2375 self._finishCoalesceLeaf(parent) 

2376 self._updateSlavesOnResize(parent) 

2377 

2378 def _calcExtraSpaceNeeded(self, child, parent): 

2379 assert(not parent.raw) # raw parents not supported 

2380 extra = child.getSizeVHD() - parent.getSizeVHD() 

2381 if extra < 0: 

2382 extra = 0 

2383 return extra 

2384 

2385 def _prepareCoalesceLeaf(self, vdi): 

2386 pass 

2387 

2388 def _updateNode(self, vdi): 

2389 pass 

2390 

2391 def _finishCoalesceLeaf(self, parent): 

2392 pass 

2393 

2394 def _updateSlavesOnUndoLeafCoalesce(self, parent, child): 

2395 pass 

2396 

2397 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid): 

2398 pass 

2399 

2400 def _updateSlavesOnResize(self, vdi): 

2401 pass 

2402 

2403 def _removeStaleVDIs(self, uuidsPresent): 

2404 for uuid in list(self.vdis.keys()): 

2405 if not uuid in uuidsPresent: 

2406 Util.log("VDI %s disappeared since last scan" % \ 

2407 self.vdis[uuid]) 

2408 del self.vdis[uuid] 

2409 

2410 def _handleInterruptedCoalesceLeaf(self): 

2411 """An interrupted leaf-coalesce operation may leave the VHD tree in an 

2412 inconsistent state. If the old-leaf VDI is still present, we revert the 

2413 operation (in case the original error is persistent); otherwise we must 

2414 finish the operation""" 

2415 # abstract 

2416 pass 

2417 

2418 def _buildTree(self, force): 

2419 self.vdiTrees = [] 

2420 for vdi in self.vdis.values(): 

2421 if vdi.parentUuid: 

2422 parent = self.getVDI(vdi.parentUuid) 

2423 if not parent: 

2424 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2425 self.vdiTrees.append(vdi) 

2426 continue 

2427 if force: 

2428 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2429 (vdi.parentUuid, vdi.uuid)) 

2430 self.vdiTrees.append(vdi) 

2431 continue 

2432 else: 

2433 raise util.SMException("Parent VDI %s of %s not " \ 

2434 "found" % (vdi.parentUuid, vdi.uuid)) 

2435 vdi.parent = parent 

2436 parent.children.append(vdi) 

2437 else: 

2438 self.vdiTrees.append(vdi) 

2439 

2440 

2441class FileSR(SR): 

2442 TYPE = SR.TYPE_FILE 

2443 CACHE_FILE_EXT = ".vhdcache" 

2444 # cache cleanup actions 

2445 CACHE_ACTION_KEEP = 0 

2446 CACHE_ACTION_REMOVE = 1 

2447 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

2448 

2449 def __init__(self, uuid, xapi, createLock, force): 

2450 SR.__init__(self, uuid, xapi, createLock, force) 

2451 self.path = "/var/run/sr-mount/%s" % self.uuid 

2452 self.journaler = fjournaler.Journaler(self.path) 

2453 

2454 def scan(self, force=False): 

2455 if not util.pathexists(self.path): 

2456 raise util.SMException("directory %s not found!" % self.uuid) 

2457 vhds = self._scan(force) 

2458 for uuid, vhdInfo in vhds.items(): 

2459 vdi = self.getVDI(uuid) 

2460 if not vdi: 

2461 self.logFilter.logNewVDI(uuid) 

2462 vdi = FileVDI(self, uuid, False) 

2463 self.vdis[uuid] = vdi 

2464 vdi.load(vhdInfo) 

2465 uuidsPresent = list(vhds.keys()) 

2466 rawList = [x for x in os.listdir(self.path) if x.endswith(vhdutil.FILE_EXTN_RAW)] 

2467 for rawName in rawList: 

2468 uuid = FileVDI.extractUuid(rawName) 

2469 uuidsPresent.append(uuid) 

2470 vdi = self.getVDI(uuid) 

2471 if not vdi: 

2472 self.logFilter.logNewVDI(uuid) 

2473 vdi = FileVDI(self, uuid, True) 

2474 self.vdis[uuid] = vdi 

2475 self._removeStaleVDIs(uuidsPresent) 

2476 self._buildTree(force) 

2477 self.logFilter.logState() 

2478 self._handleInterruptedCoalesceLeaf() 

2479 

2480 def getFreeSpace(self): 

2481 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

2482 

2483 def deleteVDIs(self, vdiList): 

2484 rootDeleted = False 

2485 for vdi in vdiList: 

2486 if not vdi.parent: 

2487 rootDeleted = True 

2488 break 

2489 SR.deleteVDIs(self, vdiList) 

2490 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

2491 self.xapi.markCacheSRsDirty() 

2492 

2493 def cleanupCache(self, maxAge=-1): 

2494 """Clean up IntelliCache cache files. Caches for leaf nodes are 

2495 removed when the leaf node no longer exists or its allow-caching 

2496 attribute is not set. Caches for parent nodes are removed when the 

2497 parent node no longer exists or it hasn't been used in more than 

2498 <maxAge> hours. 

2499 Return number of caches removed. 

2500 """ 

2501 numRemoved = 0 

2502 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

2503 Util.log("Found %d cache files" % len(cacheFiles)) 

2504 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

2505 for cacheFile in cacheFiles: 

2506 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

2507 action = self.CACHE_ACTION_KEEP 

2508 rec = self.xapi.getRecordVDI(uuid) 

2509 if not rec: 

2510 Util.log("Cache %s: VDI doesn't exist" % uuid) 

2511 action = self.CACHE_ACTION_REMOVE 

2512 elif rec["managed"] and not rec["allow_caching"]: 

2513 Util.log("Cache %s: caching disabled" % uuid) 

2514 action = self.CACHE_ACTION_REMOVE 

2515 elif not rec["managed"] and maxAge >= 0: 

2516 lastAccess = datetime.datetime.fromtimestamp( \ 

2517 os.path.getatime(os.path.join(self.path, cacheFile))) 

2518 if lastAccess < cutoff: 

2519 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

2520 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

2521 

2522 if action == self.CACHE_ACTION_KEEP: 

2523 Util.log("Keeping cache %s" % uuid) 

2524 continue 

2525 

2526 lockId = uuid 

2527 parentUuid = None 

2528 if rec and rec["managed"]: 

2529 parentUuid = rec["sm_config"].get("vhd-parent") 

2530 if parentUuid: 

2531 lockId = parentUuid 

2532 

2533 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

2534 cacheLock.acquire() 

2535 try: 

2536 if self._cleanupCache(uuid, action): 

2537 numRemoved += 1 

2538 finally: 

2539 cacheLock.release() 

2540 return numRemoved 

2541 

2542 def _cleanupCache(self, uuid, action): 

2543 assert(action != self.CACHE_ACTION_KEEP) 

2544 rec = self.xapi.getRecordVDI(uuid) 

2545 if rec and rec["allow_caching"]: 

2546 Util.log("Cache %s appears to have become valid" % uuid) 

2547 return False 

2548 

2549 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

2550 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

2551 if tapdisk: 

2552 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

2553 Util.log("Cache %s still in use" % uuid) 

2554 return False 

2555 Util.log("Shutting down tapdisk for %s" % fullPath) 

2556 tapdisk.shutdown() 

2557 

2558 Util.log("Deleting file %s" % fullPath) 

2559 os.unlink(fullPath) 

2560 return True 

2561 

2562 def _isCacheFileName(self, name): 

2563 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

2564 name.endswith(self.CACHE_FILE_EXT) 

2565 

2566 def _scan(self, force): 

2567 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2568 error = False 

2569 pattern = os.path.join(self.path, "*%s" % vhdutil.FILE_EXTN_VHD) 

2570 vhds = vhdutil.getAllVHDs(pattern, FileVDI.extractUuid) 

2571 for uuid, vhdInfo in vhds.items(): 

2572 if vhdInfo.error: 

2573 error = True 

2574 break 

2575 if not error: 

2576 return vhds 

2577 Util.log("Scan error on attempt %d" % i) 

2578 if force: 

2579 return vhds 

2580 raise util.SMException("Scan error") 

2581 

2582 def deleteVDI(self, vdi): 

2583 self._checkSlaves(vdi) 

2584 SR.deleteVDI(self, vdi) 

2585 

2586 def _checkSlaves(self, vdi): 

2587 onlineHosts = self.xapi.getOnlineHosts() 

2588 abortFlag = IPCFlag(self.uuid) 

2589 for pbdRecord in self.xapi.getAttachedPBDs(): 

2590 hostRef = pbdRecord["host"] 

2591 if hostRef == self.xapi._hostRef: 

2592 continue 

2593 if abortFlag.test(FLAG_TYPE_ABORT): 

2594 raise AbortException("Aborting due to signal") 

2595 try: 

2596 self._checkSlave(hostRef, vdi) 

2597 except util.CommandException: 

2598 if hostRef in onlineHosts: 

2599 raise 

2600 

2601 def _checkSlave(self, hostRef, vdi): 

2602 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

2603 Util.log("Checking with slave: %s" % repr(call)) 

2604 _host = self.xapi.session.xenapi.host 

2605 text = _host.call_plugin( * call) 

2606 

2607 def _handleInterruptedCoalesceLeaf(self): 

2608 entries = self.journaler.getAll(VDI.JRN_LEAF) 

2609 for uuid, parentUuid in entries.items(): 

2610 fileList = os.listdir(self.path) 

2611 childName = uuid + vhdutil.FILE_EXTN_VHD 

2612 tmpChildName = self.TMP_RENAME_PREFIX + uuid + vhdutil.FILE_EXTN_VHD 

2613 parentName1 = parentUuid + vhdutil.FILE_EXTN_VHD 

2614 parentName2 = parentUuid + vhdutil.FILE_EXTN_RAW 

2615 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

2616 if parentPresent or tmpChildName in fileList: 

2617 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

2618 else: 

2619 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

2620 self.journaler.remove(VDI.JRN_LEAF, uuid) 

2621 vdi = self.getVDI(uuid) 

2622 if vdi: 

2623 vdi.ensureUnpaused() 

2624 

2625 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2626 Util.log("*** UNDO LEAF-COALESCE") 

2627 parent = self.getVDI(parentUuid) 

2628 if not parent: 

2629 parent = self.getVDI(childUuid) 

2630 if not parent: 

2631 raise util.SMException("Neither %s nor %s found" % \ 

2632 (parentUuid, childUuid)) 

2633 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

2634 parent.rename(parentUuid) 

2635 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

2636 

2637 child = self.getVDI(childUuid) 

2638 if not child: 

2639 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

2640 if not child: 

2641 raise util.SMException("Neither %s nor %s found" % \ 

2642 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

2643 Util.log("Renaming child back to %s" % childUuid) 

2644 child.rename(childUuid) 

2645 Util.log("Updating the VDI record") 

2646 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

2647 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

2648 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

2649 

2650 if child.hidden: 

2651 child._setHidden(False) 

2652 if not parent.hidden: 

2653 parent._setHidden(True) 

2654 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

2655 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

2656 Util.log("*** leaf-coalesce undo successful") 

2657 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

2658 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

2659 

2660 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2661 Util.log("*** FINISH LEAF-COALESCE") 

2662 vdi = self.getVDI(childUuid) 

2663 if not vdi: 

2664 raise util.SMException("VDI %s not found" % childUuid) 

2665 try: 

2666 self.forgetVDI(parentUuid) 

2667 except XenAPI.Failure: 

2668 pass 

2669 self._updateSlavesOnResize(vdi) 

2670 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

2671 Util.log("*** finished leaf-coalesce successfully") 

2672 

2673 

2674class LVHDSR(SR): 

2675 TYPE = SR.TYPE_LVHD 

2676 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

2677 

2678 def __init__(self, uuid, xapi, createLock, force): 

2679 SR.__init__(self, uuid, xapi, createLock, force) 

2680 self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid) 

2681 self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName) 

2682 self.lvmCache = lvmcache.LVMCache(self.vgName) 

2683 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

2684 self.journaler = journaler.Journaler(self.lvmCache) 

2685 

2686 def deleteVDI(self, vdi): 

2687 if self.lvActivator.get(vdi.uuid, False): 

2688 self.lvActivator.deactivate(vdi.uuid, False) 

2689 self._checkSlaves(vdi) 

2690 SR.deleteVDI(self, vdi) 

2691 

2692 def forgetVDI(self, vdiUuid): 

2693 SR.forgetVDI(self, vdiUuid) 

2694 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

2695 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

2696 

2697 def getFreeSpace(self): 

2698 stats = lvutil._getVGstats(self.vgName) 

2699 return stats['physical_size'] - stats['physical_utilisation'] 

2700 

2701 def cleanup(self): 

2702 if not self.lvActivator.deactivateAll(): 

2703 Util.log("ERROR deactivating LVs while cleaning up") 

2704 

2705 def needUpdateBlockInfo(self): 

2706 for vdi in self.vdis.values(): 

2707 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

2708 continue 

2709 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2710 return True 

2711 return False 

2712 

2713 def updateBlockInfo(self): 

2714 numUpdated = 0 

2715 for vdi in self.vdis.values(): 

2716 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

2717 continue 

2718 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2719 vdi.updateBlockInfo() 

2720 numUpdated += 1 

2721 if numUpdated: 

2722 # deactivate the LVs back sooner rather than later. If we don't 

2723 # now, by the time this thread gets to deactivations, another one 

2724 # might have leaf-coalesced a node and deleted it, making the child 

2725 # inherit the refcount value and preventing the correct decrement 

2726 self.cleanup() 

2727 

2728 def scan(self, force=False): 

2729 vdis = self._scan(force) 

2730 for uuid, vdiInfo in vdis.items(): 

2731 vdi = self.getVDI(uuid) 

2732 if not vdi: 

2733 self.logFilter.logNewVDI(uuid) 

2734 vdi = LVHDVDI(self, uuid, 

2735 vdiInfo.vdiType == vhdutil.VDI_TYPE_RAW) 

2736 self.vdis[uuid] = vdi 

2737 vdi.load(vdiInfo) 

2738 self._removeStaleVDIs(vdis.keys()) 

2739 self._buildTree(force) 

2740 self.logFilter.logState() 

2741 self._handleInterruptedCoalesceLeaf() 

2742 

2743 def _scan(self, force): 

2744 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2745 error = False 

2746 self.lvmCache.refresh() 

2747 vdis = lvhdutil.getVDIInfo(self.lvmCache) 

2748 for uuid, vdiInfo in vdis.items(): 

2749 if vdiInfo.scanError: 

2750 error = True 

2751 break 

2752 if not error: 

2753 return vdis 

2754 Util.log("Scan error, retrying (%d)" % i) 

2755 if force: 

2756 return vdis 

2757 raise util.SMException("Scan error") 

2758 

2759 def _removeStaleVDIs(self, uuidsPresent): 

2760 for uuid in list(self.vdis.keys()): 

2761 if not uuid in uuidsPresent: 

2762 Util.log("VDI %s disappeared since last scan" % \ 

2763 self.vdis[uuid]) 

2764 del self.vdis[uuid] 

2765 if self.lvActivator.get(uuid, False): 

2766 self.lvActivator.remove(uuid, False) 

2767 

2768 def _liveLeafCoalesce(self, vdi): 

2769 """If the parent is raw and the child was resized (virt. size), then 

2770 we'll need to resize the parent, which can take a while due to zeroing 

2771 out of the extended portion of the LV. Do it before pausing the child 

2772 to avoid a protracted downtime""" 

2773 if vdi.parent.raw and vdi.sizeVirt > vdi.parent.sizeVirt: 

2774 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

2775 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

2776 

2777 return SR._liveLeafCoalesce(self, vdi) 

2778 

2779 def _prepareCoalesceLeaf(self, vdi): 

2780 vdi._activateChain() 

2781 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

2782 vdi.deflate() 

2783 vdi.inflateParentForCoalesce() 

2784 

2785 def _updateNode(self, vdi): 

2786 # fix the refcounts: the remaining node should inherit the binary 

2787 # refcount from the leaf (because if it was online, it should remain 

2788 # refcounted as such), but the normal refcount from the parent (because 

2789 # this node is really the parent node) - minus 1 if it is online (since 

2790 # non-leaf nodes increment their normal counts when they are online and 

2791 # we are now a leaf, storing that 1 in the binary refcount). 

2792 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

2793 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

2794 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

2795 pCnt = pCnt - cBcnt 

2796 assert(pCnt >= 0) 

2797 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

2798 

2799 def _finishCoalesceLeaf(self, parent): 

2800 if not parent.isSnapshot() or parent.isAttachedRW(): 

2801 parent.inflateFully() 

2802 else: 

2803 parent.deflate() 

2804 

2805 def _calcExtraSpaceNeeded(self, child, parent): 

2806 return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV 

2807 

2808 def _handleInterruptedCoalesceLeaf(self): 

2809 entries = self.journaler.getAll(VDI.JRN_LEAF) 

2810 for uuid, parentUuid in entries.items(): 

2811 childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid 

2812 tmpChildLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

2813 self.TMP_RENAME_PREFIX + uuid 

2814 parentLV1 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + parentUuid 

2815 parentLV2 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + parentUuid 

2816 parentPresent = (self.lvmCache.checkLV(parentLV1) or \ 

2817 self.lvmCache.checkLV(parentLV2)) 

2818 if parentPresent or self.lvmCache.checkLV(tmpChildLV): 

2819 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

2820 else: 

2821 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

2822 self.journaler.remove(VDI.JRN_LEAF, uuid) 

2823 vdi = self.getVDI(uuid) 

2824 if vdi: 

2825 vdi.ensureUnpaused() 

2826 

2827 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2828 Util.log("*** UNDO LEAF-COALESCE") 

2829 parent = self.getVDI(parentUuid) 

2830 if not parent: 

2831 parent = self.getVDI(childUuid) 

2832 if not parent: 

2833 raise util.SMException("Neither %s nor %s found" % \ 

2834 (parentUuid, childUuid)) 

2835 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

2836 parent.rename(parentUuid) 

2837 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

2838 

2839 child = self.getVDI(childUuid) 

2840 if not child: 

2841 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

2842 if not child: 

2843 raise util.SMException("Neither %s nor %s found" % \ 

2844 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

2845 Util.log("Renaming child back to %s" % childUuid) 

2846 child.rename(childUuid) 

2847 Util.log("Updating the VDI record") 

2848 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

2849 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

2850 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

2851 

2852 # refcount (best effort - assume that it had succeeded if the 

2853 # second rename succeeded; if not, this adjustment will be wrong, 

2854 # leading to a non-deactivation of the LV) 

2855 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

2856 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

2857 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

2858 pCnt = pCnt + cBcnt 

2859 RefCounter.set(parent.uuid, pCnt, 0, ns) 

2860 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

2861 

2862 parent.deflate() 

2863 child.inflateFully() 

2864 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

2865 if child.hidden: 

2866 child._setHidden(False) 

2867 if not parent.hidden: 

2868 parent._setHidden(True) 

2869 if not parent.lvReadonly: 

2870 self.lvmCache.setReadonly(parent.fileName, True) 

2871 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

2872 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

2873 Util.log("*** leaf-coalesce undo successful") 

2874 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

2875 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

2876 

2877 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2878 Util.log("*** FINISH LEAF-COALESCE") 

2879 vdi = self.getVDI(childUuid) 

2880 if not vdi: 

2881 raise util.SMException("VDI %s not found" % childUuid) 

2882 vdi.inflateFully() 

2883 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

2884 try: 

2885 self.forgetVDI(parentUuid) 

2886 except XenAPI.Failure: 

2887 pass 

2888 self._updateSlavesOnResize(vdi) 

2889 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

2890 Util.log("*** finished leaf-coalesce successfully") 

2891 

2892 def _checkSlaves(self, vdi): 

2893 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

2894 try to check all slaves, including those that the Agent believes are 

2895 offline, but ignore failures for offline hosts. This is to avoid cases 

2896 where the Agent thinks a host is offline but the host is up.""" 

2897 args = {"vgName": self.vgName, 

2898 "action1": "deactivateNoRefcount", 

2899 "lvName1": vdi.fileName, 

2900 "action2": "cleanupLockAndRefcount", 

2901 "uuid2": vdi.uuid, 

2902 "ns2": lvhdutil.NS_PREFIX_LVM + self.uuid} 

2903 onlineHosts = self.xapi.getOnlineHosts() 

2904 abortFlag = IPCFlag(self.uuid) 

2905 for pbdRecord in self.xapi.getAttachedPBDs(): 

2906 hostRef = pbdRecord["host"] 

2907 if hostRef == self.xapi._hostRef: 

2908 continue 

2909 if abortFlag.test(FLAG_TYPE_ABORT): 

2910 raise AbortException("Aborting due to signal") 

2911 Util.log("Checking with slave %s (path %s)" % ( 

2912 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

2913 try: 

2914 self.xapi.ensureInactive(hostRef, args) 

2915 except XenAPI.Failure: 

2916 if hostRef in onlineHosts: 

2917 raise 

2918 

2919 def _updateSlavesOnUndoLeafCoalesce(self, parent, child): 

2920 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

2921 if not slaves: 

2922 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

2923 child) 

2924 return 

2925 

2926 tmpName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

2927 self.TMP_RENAME_PREFIX + child.uuid 

2928 args = {"vgName": self.vgName, 

2929 "action1": "deactivateNoRefcount", 

2930 "lvName1": tmpName, 

2931 "action2": "deactivateNoRefcount", 

2932 "lvName2": child.fileName, 

2933 "action3": "refresh", 

2934 "lvName3": child.fileName, 

2935 "action4": "refresh", 

2936 "lvName4": parent.fileName} 

2937 for slave in slaves: 

2938 Util.log("Updating %s, %s, %s on slave %s" % \ 

2939 (tmpName, child.fileName, parent.fileName, 

2940 self.xapi.getRecordHost(slave)['hostname'])) 

2941 text = self.xapi.session.xenapi.host.call_plugin( \ 

2942 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

2943 Util.log("call-plugin returned: '%s'" % text) 

2944 

2945 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid): 

2946 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

2947 if not slaves: 

2948 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

2949 return 

2950 

2951 args = {"vgName": self.vgName, 

2952 "action1": "deactivateNoRefcount", 

2953 "lvName1": oldNameLV, 

2954 "action2": "refresh", 

2955 "lvName2": vdi.fileName, 

2956 "action3": "cleanupLockAndRefcount", 

2957 "uuid3": origParentUuid, 

2958 "ns3": lvhdutil.NS_PREFIX_LVM + self.uuid} 

2959 for slave in slaves: 

2960 Util.log("Updating %s to %s on slave %s" % \ 

2961 (oldNameLV, vdi.fileName, 

2962 self.xapi.getRecordHost(slave)['hostname'])) 

2963 text = self.xapi.session.xenapi.host.call_plugin( \ 

2964 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

2965 Util.log("call-plugin returned: '%s'" % text) 

2966 

2967 def _updateSlavesOnResize(self, vdi): 

2968 uuids = [x.uuid for x in vdi.getAllLeaves()] 

2969 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

2970 if not slaves: 

2971 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

2972 return 

2973 lvhdutil.lvRefreshOnSlaves(self.xapi.session, self.uuid, self.vgName, 

2974 vdi.fileName, vdi.uuid, slaves) 

2975 

2976 

2977class LinstorSR(SR): 

2978 TYPE = SR.TYPE_LINSTOR 

2979 

2980 def __init__(self, uuid, xapi, createLock, force): 

2981 if not LINSTOR_AVAILABLE: 

2982 raise util.SMException( 

2983 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

2984 ) 

2985 

2986 SR.__init__(self, uuid, xapi, createLock, force) 

2987 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

2988 self._reloadLinstor() 

2989 

2990 def deleteVDI(self, vdi): 

2991 self._checkSlaves(vdi) 

2992 SR.deleteVDI(self, vdi) 

2993 

2994 def getFreeSpace(self): 

2995 return self._linstor.max_volume_size_allowed 

2996 

2997 def scan(self, force=False): 

2998 all_vdi_info = self._scan(force) 

2999 for uuid, vdiInfo in all_vdi_info.items(): 

3000 # When vdiInfo is None, the VDI is RAW. 

3001 vdi = self.getVDI(uuid) 

3002 if not vdi: 

3003 self.logFilter.logNewVDI(uuid) 

3004 vdi = LinstorVDI(self, uuid, not vdiInfo) 

3005 self.vdis[uuid] = vdi 

3006 if vdiInfo: 

3007 vdi.load(vdiInfo) 

3008 self._removeStaleVDIs(all_vdi_info.keys()) 

3009 self._buildTree(force) 

3010 self.logFilter.logState() 

3011 self._handleInterruptedCoalesceLeaf() 

3012 

3013 def pauseVDIs(self, vdiList): 

3014 self._linstor.ensure_volume_list_is_not_locked( 

3015 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3016 ) 

3017 return super(LinstorSR, self).pauseVDIs(vdiList) 

3018 

3019 def _reloadLinstor(self): 

3020 session = self.xapi.session 

3021 host_ref = util.get_this_host_ref(session) 

3022 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3023 

3024 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3025 if pbd is None: 

3026 raise util.SMException('Failed to find PBD') 

3027 

3028 dconf = session.xenapi.PBD.get_device_config(pbd) 

3029 group_name = dconf['group-name'] 

3030 

3031 controller_uri = get_controller_uri() 

3032 self.journaler = LinstorJournaler( 

3033 controller_uri, group_name, logger=util.SMlog 

3034 ) 

3035 

3036 self._linstor = LinstorVolumeManager( 

3037 controller_uri, 

3038 group_name, 

3039 repair=True, 

3040 logger=util.SMlog 

3041 ) 

3042 self._vhdutil = LinstorVhdUtil(session, self._linstor) 

3043 

3044 def _scan(self, force): 

3045 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3046 self._reloadLinstor() 

3047 error = False 

3048 try: 

3049 all_vdi_info = self._load_vdi_info() 

3050 for uuid, vdiInfo in all_vdi_info.items(): 

3051 if vdiInfo and vdiInfo.error: 

3052 error = True 

3053 break 

3054 if not error: 

3055 return all_vdi_info 

3056 Util.log('Scan error, retrying ({})'.format(i)) 

3057 except Exception as e: 

3058 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3059 Util.log(traceback.format_exc()) 

3060 

3061 if force: 

3062 return all_vdi_info 

3063 raise util.SMException('Scan error') 

3064 

3065 def _load_vdi_info(self): 

3066 all_vdi_info = {} 

3067 

3068 # TODO: Ensure metadata contains the right info. 

3069 

3070 all_volume_info = self._linstor.get_volumes_with_info() 

3071 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3072 for vdi_uuid, volume_info in all_volume_info.items(): 

3073 try: 

3074 if not volume_info.name and \ 

3075 not list(volumes_metadata[vdi_uuid].items()): 

3076 continue # Ignore it, probably deleted. 

3077 

3078 vdi_type = volumes_metadata[vdi_uuid][VDI_TYPE_TAG] 

3079 if vdi_type == vhdutil.VDI_TYPE_VHD: 

3080 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3081 else: 

3082 info = None 

3083 except Exception as e: 

3084 Util.log( 

3085 ' [VDI {}: failed to load VDI info]: {}' 

3086 .format(vdi_uuid, e) 

3087 ) 

3088 info = vhdutil.VHDInfo(vdi_uuid) 

3089 info.error = 1 

3090 all_vdi_info[vdi_uuid] = info 

3091 return all_vdi_info 

3092 

3093 # TODO: Maybe implement _liveLeafCoalesce/_prepareCoalesceLeaf/ 

3094 # _finishCoalesceLeaf/_updateSlavesOnResize like LVM plugin. 

3095 

3096 def _calcExtraSpaceNeeded(self, child, parent): 

3097 meta_overhead = vhdutil.calcOverheadEmpty(LinstorVDI.MAX_SIZE) 

3098 bitmap_overhead = vhdutil.calcOverheadBitmap(parent.sizeVirt) 

3099 virtual_size = LinstorVolumeManager.round_up_volume_size( 

3100 parent.sizeVirt + meta_overhead + bitmap_overhead 

3101 ) 

3102 volume_size = self._linstor.get_volume_size(parent.uuid) 

3103 return virtual_size - volume_size 

3104 

3105 def _hasValidDevicePath(self, uuid): 

3106 try: 

3107 self._linstor.get_device_path(uuid) 

3108 except Exception: 

3109 # TODO: Maybe log exception. 

3110 return False 

3111 return True 

3112 

3113 def _liveLeafCoalesce(self, vdi): 

3114 self.lock() 

3115 try: 

3116 self._linstor.ensure_volume_is_not_locked( 

3117 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3118 ) 

3119 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3120 finally: 

3121 self.unlock() 

3122 

3123 def _prepareCoalesceLeaf(self, vdi): 

3124 # Move diskless path if necessary. We must have an access 

3125 # to modify locally the volume. 

3126 self._linstor.get_device_path(vdi.uuid) 

3127 

3128 def _handleInterruptedCoalesceLeaf(self): 

3129 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3130 for uuid, parentUuid in entries.items(): 

3131 if self._hasValidDevicePath(parentUuid) or \ 

3132 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3133 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3134 else: 

3135 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3136 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3137 vdi = self.getVDI(uuid) 

3138 if vdi: 

3139 vdi.ensureUnpaused() 

3140 

3141 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3142 Util.log('*** UNDO LEAF-COALESCE') 

3143 parent = self.getVDI(parentUuid) 

3144 if not parent: 

3145 parent = self.getVDI(childUuid) 

3146 if not parent: 

3147 raise util.SMException( 

3148 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3149 ) 

3150 Util.log( 

3151 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3152 ) 

3153 parent.rename(parentUuid) 

3154 

3155 child = self.getVDI(childUuid) 

3156 if not child: 

3157 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3158 if not child: 

3159 raise util.SMException( 

3160 'Neither {} nor {} found'.format( 

3161 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3162 ) 

3163 ) 

3164 Util.log('Renaming child back to {}'.format(childUuid)) 

3165 child.rename(childUuid) 

3166 Util.log('Updating the VDI record') 

3167 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3168 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3169 

3170 # TODO: Maybe deflate here. 

3171 

3172 if child.hidden: 

3173 child._setHidden(False) 

3174 if not parent.hidden: 

3175 parent._setHidden(True) 

3176 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3177 Util.log('*** leaf-coalesce undo successful') 

3178 

3179 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3180 Util.log('*** FINISH LEAF-COALESCE') 

3181 vdi = self.getVDI(childUuid) 

3182 if not vdi: 

3183 raise util.SMException('VDI {} not found'.format(childUuid)) 

3184 # TODO: Maybe inflate. 

3185 try: 

3186 self.forgetVDI(parentUuid) 

3187 except XenAPI.Failure: 

3188 pass 

3189 self._updateSlavesOnResize(vdi) 

3190 Util.log('*** finished leaf-coalesce successfully') 

3191 

3192 def _checkSlaves(self, vdi): 

3193 try: 

3194 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3195 for openers in all_openers.values(): 

3196 for opener in openers.values(): 

3197 if opener['process-name'] != 'tapdisk': 

3198 raise util.SMException( 

3199 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3200 ) 

3201 except LinstorVolumeManagerError as e: 

3202 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3203 raise 

3204 

3205 

3206################################################################################ 

3207# 

3208# Helpers 

3209# 

3210def daemonize(): 

3211 pid = os.fork() 

3212 if pid: 3212 ↛ 3216line 3212 didn't jump to line 3216, because the condition on line 3212 was never false

3213 os.waitpid(pid, 0) 

3214 Util.log("New PID [%d]" % pid) 

3215 return False 

3216 os.chdir("/") 

3217 os.setsid() 

3218 pid = os.fork() 

3219 if pid: 

3220 Util.log("Will finish as PID [%d]" % pid) 

3221 os._exit(0) 

3222 for fd in [0, 1, 2]: 

3223 try: 

3224 os.close(fd) 

3225 except OSError: 

3226 pass 

3227 # we need to fill those special fd numbers or pread won't work 

3228 sys.stdin = open("/dev/null", 'r') 

3229 sys.stderr = open("/dev/null", 'w') 

3230 sys.stdout = open("/dev/null", 'w') 

3231 # As we're a new process we need to clear the lock objects 

3232 lock.Lock.clearAll() 

3233 return True 

3234 

3235 

3236def normalizeType(type): 

3237 if type in LVHDSR.SUBTYPES: 

3238 type = SR.TYPE_LVHD 

3239 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3240 # temporary while LVHD is symlinked as LVM 

3241 type = SR.TYPE_LVHD 

3242 if type in [ 

3243 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3244 "moosefs", "xfs", "zfs" 

3245 ]: 

3246 type = SR.TYPE_FILE 

3247 if type in ["linstor"]: 

3248 type = SR.TYPE_LINSTOR 

3249 if type not in SR.TYPES: 

3250 raise util.SMException("Unsupported SR type: %s" % type) 

3251 return type 

3252 

3253GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3254 

3255 

3256def _gc_init_file(sr_uuid): 

3257 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3258 

3259 

3260def _create_init_file(sr_uuid): 

3261 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3262 with open(os.path.join( 

3263 NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init'), 'w+') as f: 

3264 f.write('1') 

3265 

3266 

3267def _gcLoopPause(sr, dryRun=False, immediate=False): 

3268 if immediate: 

3269 return 

3270 

3271 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3272 # point will just return. Otherwise, fall back on an abortable sleep. 

3273 

3274 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3275 

3276 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3276 ↛ exitline 3276 didn't jump to the function exit

3277 lambda *args: None) 

3278 elif os.path.exists(_gc_init_file(sr.uuid)): 

3279 def abortTest(): 

3280 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3281 

3282 # If time.sleep hangs we are in deep trouble, however for 

3283 # completeness we set the timeout of the abort thread to 

3284 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3285 Util.log("GC active, about to go quiet") 

3286 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3286 ↛ exitline 3286 didn't run the lambda on line 3286

3287 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3288 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3289 Util.log("GC active, quiet period ended") 

3290 

3291 

3292def _gcLoop(sr, dryRun=False, immediate=False): 

3293 if not lockActive.acquireNoblock(): 3293 ↛ 3294line 3293 didn't jump to line 3294, because the condition on line 3293 was never true

3294 Util.log("Another GC instance already active, exiting") 

3295 return 

3296 # Track how many we do 

3297 coalesced = 0 

3298 task_status = "success" 

3299 try: 

3300 # Check if any work needs to be done 

3301 sr.scanLocked() 

3302 if not sr.hasWork(): 

3303 Util.log("No work, exiting") 

3304 return 

3305 sr.xapi.create_task( 

3306 "Garbage Collection", 

3307 "Garbage collection for SR %s" % sr.uuid) 

3308 _gcLoopPause(sr, dryRun, immediate=immediate) 

3309 while True: 

3310 if not sr.xapi.isPluggedHere(): 3310 ↛ 3311line 3310 didn't jump to line 3311, because the condition on line 3310 was never true

3311 Util.log("SR no longer attached, exiting") 

3312 break 

3313 sr.scanLocked() 

3314 if not sr.hasWork(): 

3315 Util.log("No work, exiting") 

3316 break 

3317 

3318 if not lockRunning.acquireNoblock(): 3318 ↛ 3319line 3318 didn't jump to line 3319, because the condition on line 3318 was never true

3319 Util.log("Unable to acquire GC running lock.") 

3320 return 

3321 try: 

3322 if not sr.gcEnabled(): 3322 ↛ 3323line 3322 didn't jump to line 3323, because the condition on line 3322 was never true

3323 break 

3324 

3325 sr.xapi.update_task_progress("done", coalesced) 

3326 

3327 sr.cleanupCoalesceJournals() 

3328 # Create the init file here in case startup is waiting on it 

3329 _create_init_file(sr.uuid) 

3330 sr.scanLocked() 

3331 sr.updateBlockInfo() 

3332 

3333 howmany = len(sr.findGarbage()) 

3334 if howmany > 0: 

3335 Util.log("Found %d orphaned vdis" % howmany) 

3336 sr.lock() 

3337 try: 

3338 sr.garbageCollect(dryRun) 

3339 finally: 

3340 sr.unlock() 

3341 sr.xapi.srUpdate() 

3342 

3343 candidate = sr.findCoalesceable() 

3344 if candidate: 

3345 util.fistpoint.activate( 

3346 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

3347 sr.coalesce(candidate, dryRun) 

3348 sr.xapi.srUpdate() 

3349 coalesced += 1 

3350 continue 

3351 

3352 candidate = sr.findLeafCoalesceable() 

3353 if candidate: 3353 ↛ 3360line 3353 didn't jump to line 3360, because the condition on line 3353 was never false

3354 sr.coalesceLeaf(candidate, dryRun) 

3355 sr.xapi.srUpdate() 

3356 coalesced += 1 

3357 continue 

3358 

3359 finally: 

3360 lockRunning.release() 3360 ↛ 3365line 3360 didn't jump to line 3365, because the break on line 3323 wasn't executed

3361 except: 

3362 task_status = "failure" 

3363 raise 

3364 finally: 

3365 sr.xapi.set_task_status(task_status) 

3366 Util.log("GC process exiting, no work left") 

3367 _create_init_file(sr.uuid) 

3368 lockActive.release() 

3369 

3370 

3371def _xapi_enabled(session, hostref): 

3372 host = session.xenapi.host.get_record(hostref) 

3373 return host['enabled'] 

3374 

3375 

3376def _ensure_xapi_initialised(session): 

3377 """ 

3378 Don't want to start GC until Xapi is fully initialised 

3379 """ 

3380 local_session = None 

3381 if session is None: 

3382 local_session = util.get_localAPI_session() 

3383 session = local_session 

3384 

3385 try: 

3386 hostref = session.xenapi.host.get_by_uuid(util.get_this_host()) 

3387 while not _xapi_enabled(session, hostref): 

3388 util.SMlog("Xapi not ready, GC waiting") 

3389 time.sleep(15) 

3390 finally: 

3391 if local_session is not None: 

3392 local_session.logout() 

3393 

3394def _gc(session, srUuid, dryRun=False, immediate=False): 

3395 init(srUuid) 

3396 _ensure_xapi_initialised(session) 

3397 sr = SR.getInstance(srUuid, session) 

3398 if not sr.gcEnabled(False): 3398 ↛ 3399line 3398 didn't jump to line 3399, because the condition on line 3398 was never true

3399 return 

3400 

3401 sr.cleanupCache() 

3402 try: 

3403 _gcLoop(sr, dryRun, immediate=immediate) 

3404 finally: 

3405 sr.cleanup() 

3406 sr.logFilter.logState() 

3407 del sr.xapi 

3408 

3409 

3410def _abort(srUuid, soft=False): 

3411 """Aborts an GC/coalesce. 

3412 

3413 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

3414 soft: If set to True and there is a pending abort signal, the function 

3415 doesn't do anything. If set to False, a new abort signal is issued. 

3416 

3417 returns: If soft is set to False, we return True holding lockActive. If 

3418 soft is set to False and an abort signal is pending, we return False 

3419 without holding lockActive. An exception is raised in case of error.""" 

3420 Util.log("=== SR %s: abort ===" % (srUuid)) 

3421 init(srUuid) 

3422 if not lockActive.acquireNoblock(): 

3423 gotLock = False 

3424 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

3425 abortFlag = IPCFlag(srUuid) 

3426 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

3427 return False 

3428 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

3429 gotLock = lockActive.acquireNoblock() 

3430 if gotLock: 

3431 break 

3432 time.sleep(SR.LOCK_RETRY_INTERVAL) 

3433 abortFlag.clear(FLAG_TYPE_ABORT) 

3434 if not gotLock: 

3435 raise util.CommandException(code=errno.ETIMEDOUT, 

3436 reason="SR %s: error aborting existing process" % srUuid) 

3437 return True 

3438 

3439 

3440def init(srUuid): 

3441 global lockRunning 

3442 if not lockRunning: 3442 ↛ 3443line 3442 didn't jump to line 3443, because the condition on line 3442 was never true

3443 lockRunning = lock.Lock(LOCK_TYPE_RUNNING, srUuid) 

3444 global lockActive 

3445 if not lockActive: 3445 ↛ 3446line 3445 didn't jump to line 3446, because the condition on line 3445 was never true

3446 lockActive = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

3447 

3448 

3449def usage(): 

3450 output = """Garbage collect and/or coalesce VHDs in a VHD-based SR 

3451 

3452Parameters: 

3453 -u --uuid UUID SR UUID 

3454 and one of: 

3455 -g --gc garbage collect, coalesce, and repeat while there is work 

3456 -G --gc_force garbage collect once, aborting any current operations 

3457 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

3458 max_age hours 

3459 -a --abort abort any currently running operation (GC or coalesce) 

3460 -q --query query the current state (GC'ing, coalescing or not running) 

3461 -x --disable disable GC/coalesce (will be in effect until you exit) 

3462 -t --debug see Debug below 

3463 

3464Options: 

3465 -b --background run in background (return immediately) (valid for -g only) 

3466 -f --force continue in the presence of VHDs with errors (when doing 

3467 GC, this might cause removal of any such VHDs) (only valid 

3468 for -G) (DANGEROUS) 

3469 

3470Debug: 

3471 The --debug parameter enables manipulation of LVHD VDIs for debugging 

3472 purposes. ** NEVER USE IT ON A LIVE VM ** 

3473 The following parameters are required: 

3474 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

3475 "deflate". 

3476 -v --vdi_uuid VDI UUID 

3477 """ 

3478 #-d --dry-run don't actually perform any SR-modifying operations 

3479 print(output) 

3480 Util.log("(Invalid usage)") 

3481 sys.exit(1) 

3482 

3483 

3484############################################################################## 

3485# 

3486# API 

3487# 

3488def abort(srUuid, soft=False): 

3489 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

3490 """ 

3491 if _abort(srUuid, soft): 

3492 Util.log("abort: releasing the process lock") 

3493 lockActive.release() 

3494 return True 

3495 else: 

3496 return False 

3497 

3498 

3499def gc(session, srUuid, inBackground, dryRun=False): 

3500 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

3501 immediately if inBackground=True. 

3502 

3503 The following algorithm is used: 

3504 1. If we are already GC'ing in this SR, return 

3505 2. If we are already coalescing a VDI pair: 

3506 a. Scan the SR and determine if the VDI pair is GC'able 

3507 b. If the pair is not GC'able, return 

3508 c. If the pair is GC'able, abort coalesce 

3509 3. Scan the SR 

3510 4. If there is nothing to collect, nor to coalesce, return 

3511 5. If there is something to collect, GC all, then goto 3 

3512 6. If there is something to coalesce, coalesce one pair, then goto 3 

3513 """ 

3514 Util.log("=== SR %s: gc ===" % srUuid) 

3515 if inBackground: 

3516 if daemonize(): 

3517 # we are now running in the background. Catch & log any errors 

3518 # because there is no other way to propagate them back at this 

3519 # point 

3520 

3521 try: 

3522 _gc(None, srUuid, dryRun) 

3523 except AbortException: 

3524 Util.log("Aborted") 

3525 except Exception: 

3526 Util.logException("gc") 

3527 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

3528 os._exit(0) 

3529 else: 

3530 _gc(session, srUuid, dryRun, immediate=True) 

3531 

3532 

3533def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

3534 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

3535 the SR lock is held. 

3536 The following algorithm is used: 

3537 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

3538 2. Scan the SR 

3539 3. GC 

3540 4. return 

3541 """ 

3542 Util.log("=== SR %s: gc_force ===" % srUuid) 

3543 init(srUuid) 

3544 sr = SR.getInstance(srUuid, session, lockSR, True) 

3545 if not lockActive.acquireNoblock(): 

3546 abort(srUuid) 

3547 else: 

3548 Util.log("Nothing was running, clear to proceed") 

3549 

3550 if force: 

3551 Util.log("FORCED: will continue even if there are VHD errors") 

3552 sr.scanLocked(force) 

3553 sr.cleanupCoalesceJournals() 

3554 

3555 try: 

3556 sr.cleanupCache() 

3557 sr.garbageCollect(dryRun) 

3558 finally: 

3559 sr.cleanup() 

3560 sr.logFilter.logState() 

3561 lockActive.release() 

3562 

3563 

3564def get_state(srUuid): 

3565 """Return whether GC/coalesce is currently running or not. The information 

3566 is not guaranteed for any length of time if the call is not protected by 

3567 locking. 

3568 """ 

3569 init(srUuid) 

3570 if lockActive.acquireNoblock(): 

3571 lockActive.release() 

3572 return False 

3573 return True 

3574 

3575 

3576def should_preempt(session, srUuid): 

3577 sr = SR.getInstance(srUuid, session) 

3578 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

3579 if len(entries) == 0: 

3580 return False 

3581 elif len(entries) > 1: 

3582 raise util.SMException("More than one coalesce entry: " + str(entries)) 

3583 sr.scanLocked() 

3584 coalescedUuid = entries.popitem()[0] 

3585 garbage = sr.findGarbage() 

3586 for vdi in garbage: 

3587 if vdi.uuid == coalescedUuid: 

3588 return True 

3589 return False 

3590 

3591 

3592def get_coalesceable_leaves(session, srUuid, vdiUuids): 

3593 coalesceable = [] 

3594 sr = SR.getInstance(srUuid, session) 

3595 sr.scanLocked() 

3596 for uuid in vdiUuids: 

3597 vdi = sr.getVDI(uuid) 

3598 if not vdi: 

3599 raise util.SMException("VDI %s not found" % uuid) 

3600 if vdi.isLeafCoalesceable(): 

3601 coalesceable.append(uuid) 

3602 return coalesceable 

3603 

3604 

3605def cache_cleanup(session, srUuid, maxAge): 

3606 sr = SR.getInstance(srUuid, session) 

3607 return sr.cleanupCache(maxAge) 

3608 

3609 

3610def debug(sr_uuid, cmd, vdi_uuid): 

3611 Util.log("Debug command: %s" % cmd) 

3612 sr = SR.getInstance(sr_uuid, None) 

3613 if not isinstance(sr, LVHDSR): 

3614 print("Error: not an LVHD SR") 

3615 return 

3616 sr.scanLocked() 

3617 vdi = sr.getVDI(vdi_uuid) 

3618 if not vdi: 

3619 print("Error: VDI %s not found") 

3620 return 

3621 print("Running %s on SR %s" % (cmd, sr)) 

3622 print("VDI before: %s" % vdi) 

3623 if cmd == "activate": 

3624 vdi._activate() 

3625 print("VDI file: %s" % vdi.path) 

3626 if cmd == "deactivate": 

3627 ns = lvhdutil.NS_PREFIX_LVM + sr.uuid 

3628 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

3629 if cmd == "inflate": 

3630 vdi.inflateFully() 

3631 sr.cleanup() 

3632 if cmd == "deflate": 

3633 vdi.deflate() 

3634 sr.cleanup() 

3635 sr.scanLocked() 

3636 print("VDI after: %s" % vdi) 

3637 

3638 

3639def abort_optional_reenable(uuid): 

3640 print("Disabling GC/coalesce for %s" % uuid) 

3641 ret = _abort(uuid) 

3642 input("Press enter to re-enable...") 

3643 print("GC/coalesce re-enabled") 

3644 lockRunning.release() 

3645 if ret: 

3646 lockActive.release() 

3647 

3648 

3649############################################################################## 

3650# 

3651# CLI 

3652# 

3653def main(): 

3654 action = "" 

3655 uuid = "" 

3656 background = False 

3657 force = False 

3658 dryRun = False 

3659 debug_cmd = "" 

3660 vdi_uuid = "" 

3661 shortArgs = "gGc:aqxu:bfdt:v:" 

3662 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

3663 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

3664 

3665 try: 

3666 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

3667 except getopt.GetoptError: 

3668 usage() 

3669 for o, a in opts: 

3670 if o in ("-g", "--gc"): 

3671 action = "gc" 

3672 if o in ("-G", "--gc_force"): 

3673 action = "gc_force" 

3674 if o in ("-c", "--clean_cache"): 

3675 action = "clean_cache" 

3676 maxAge = int(a) 

3677 if o in ("-a", "--abort"): 

3678 action = "abort" 

3679 if o in ("-q", "--query"): 

3680 action = "query" 

3681 if o in ("-x", "--disable"): 

3682 action = "disable" 

3683 if o in ("-u", "--uuid"): 

3684 uuid = a 

3685 if o in ("-b", "--background"): 

3686 background = True 

3687 if o in ("-f", "--force"): 

3688 force = True 

3689 if o in ("-d", "--dry-run"): 

3690 Util.log("Dry run mode") 

3691 dryRun = True 

3692 if o in ("-t", "--debug"): 

3693 action = "debug" 

3694 debug_cmd = a 

3695 if o in ("-v", "--vdi_uuid"): 

3696 vdi_uuid = a 

3697 

3698 if not action or not uuid: 

3699 usage() 

3700 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

3701 action != "debug" and (debug_cmd or vdi_uuid): 

3702 usage() 

3703 

3704 if action != "query" and action != "debug": 

3705 print("All output goes to log") 

3706 

3707 if action == "gc": 

3708 gc(None, uuid, background, dryRun) 

3709 elif action == "gc_force": 

3710 gc_force(None, uuid, force, dryRun, True) 

3711 elif action == "clean_cache": 

3712 cache_cleanup(None, uuid, maxAge) 

3713 elif action == "abort": 

3714 abort(uuid) 

3715 elif action == "query": 

3716 print("Currently running: %s" % get_state(uuid)) 

3717 elif action == "disable": 

3718 abort_optional_reenable(uuid) 

3719 elif action == "debug": 

3720 debug(uuid, debug_cmd, vdi_uuid) 

3721 

3722 

3723if __name__ == '__main__': 3723 ↛ 3724line 3723 didn't jump to line 3724, because the condition on line 3723 was never true

3724 main()