Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect VHD-based SR's in the background 

19# 

20 

21import os 

22import os.path 

23import sys 

24import time 

25import signal 

26import subprocess 

27import getopt 

28import datetime 

29import traceback 

30import base64 

31import zlib 

32import errno 

33import stat 

34 

35import XenAPI 

36import util 

37import lvutil 

38import vhdutil 

39import lvhdutil 

40import lvmcache 

41import journaler 

42import fjournaler 

43import lock 

44import blktap2 

45import xs_errors 

46from refcounter import RefCounter 

47from ipc import IPCFlag 

48from lvmanager import LVActivator 

49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

50from functools import reduce 

51from time import monotonic as _time 

52 

53try: 

54 from linstorjournaler import LinstorJournaler 

55 from linstorvhdutil import LinstorVhdUtil 

56 from linstorvolumemanager import get_controller_uri 

57 from linstorvolumemanager import LinstorVolumeManager 

58 from linstorvolumemanager import LinstorVolumeManagerError 

59 

60 LINSTOR_AVAILABLE = True 

61except ImportError: 

62 LINSTOR_AVAILABLE = False 

63 

64# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

65# possible due to lvhd_stop_using_() not working correctly. However, we leave 

66# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

67# record for use by the offline tool (which makes the operation safe by pausing 

68# the VM first) 

69AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

70 

71FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

72 

73# process "lock", used simply as an indicator that a process already exists 

74# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

75# check for the fact by trying the lock). 

76LOCK_TYPE_RUNNING = "running" 

77lockRunning = None 

78 

79# process "lock" to indicate that the GC process has been activated but may not 

80# yet be running, stops a second process from being started. 

81LOCK_TYPE_GC_ACTIVE = "gc_active" 

82lockActive = None 

83 

84# Default coalesce error rate limit, in messages per minute. A zero value 

85# disables throttling, and a negative value disables error reporting. 

86DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

87 

88COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

89COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

90VAR_RUN = "/var/run/" 

91SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

92 

93N_RUNNING_AVERAGE = 10 

94 

95NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

96 

97 

98class AbortException(util.SMException): 

99 pass 

100 

101 

102################################################################################ 

103# 

104# Util 

105# 

106class Util: 

107 RET_RC = 1 

108 RET_STDOUT = 2 

109 RET_STDERR = 4 

110 

111 UUID_LEN = 36 

112 

113 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

114 

115 def log(text): 

116 util.SMlog(text, ident="SMGC") 

117 log = staticmethod(log) 

118 

119 def logException(tag): 

120 info = sys.exc_info() 

121 if info[0] == SystemExit: 121 ↛ 123line 121 didn't jump to line 123, because the condition on line 121 was never true

122 # this should not be happening when catching "Exception", but it is 

123 sys.exit(0) 

124 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

125 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

126 Util.log(" ***********************") 

127 Util.log(" * E X C E P T I O N *") 

128 Util.log(" ***********************") 

129 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

130 Util.log(tb) 

131 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

132 logException = staticmethod(logException) 

133 

134 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

135 "Execute a subprocess, then return its return code, stdout, stderr" 

136 proc = subprocess.Popen(args, 

137 stdin=subprocess.PIPE, \ 

138 stdout=subprocess.PIPE, \ 

139 stderr=subprocess.PIPE, \ 

140 shell=True, \ 

141 close_fds=True) 

142 (stdout, stderr) = proc.communicate(inputtext) 

143 stdout = str(stdout) 

144 stderr = str(stderr) 

145 rc = proc.returncode 

146 if log: 

147 Util.log("`%s`: %s" % (args, rc)) 

148 if type(expectedRC) != type([]): 

149 expectedRC = [expectedRC] 

150 if not rc in expectedRC: 

151 reason = stderr.strip() 

152 if stdout.strip(): 

153 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

154 Util.log("Failed: %s" % reason) 

155 raise util.CommandException(rc, args, reason) 

156 

157 if ret == Util.RET_RC: 

158 return rc 

159 if ret == Util.RET_STDERR: 

160 return stderr 

161 return stdout 

162 doexec = staticmethod(doexec) 

163 

164 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): 

165 """execute func in a separate thread and kill it if abortTest signals 

166 so""" 

167 abortSignaled = abortTest() # check now before we clear resultFlag 

168 resultFlag = IPCFlag(ns) 

169 resultFlag.clearAll() 

170 pid = os.fork() 

171 if pid: 

172 startTime = _time() 

173 try: 

174 while True: 

175 if resultFlag.test("success"): 

176 Util.log(" Child process completed successfully") 

177 resultFlag.clear("success") 

178 return 

179 if resultFlag.test("failure"): 

180 resultFlag.clear("failure") 

181 raise util.SMException("Child process exited with error") 

182 if abortTest() or abortSignaled: 

183 os.killpg(pid, signal.SIGKILL) 

184 raise AbortException("Aborting due to signal") 

185 if timeOut and _time() - startTime > timeOut: 

186 os.killpg(pid, signal.SIGKILL) 

187 resultFlag.clearAll() 

188 raise util.SMException("Timed out") 

189 time.sleep(pollInterval) 

190 finally: 

191 wait_pid = 0 

192 rc = -1 

193 count = 0 

194 while wait_pid == 0 and count < 10: 

195 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

196 if wait_pid == 0: 

197 time.sleep(2) 

198 count += 1 

199 

200 if wait_pid == 0: 

201 Util.log("runAbortable: wait for process completion timed out") 

202 else: 

203 os.setpgrp() 

204 try: 

205 if func() == ret: 

206 resultFlag.set("success") 

207 else: 

208 resultFlag.set("failure") 

209 except Exception as e: 

210 Util.log("Child process failed with : (%s)" % e) 

211 resultFlag.set("failure") 

212 Util.logException("This exception has occured") 

213 os._exit(0) 

214 runAbortable = staticmethod(runAbortable) 

215 

216 def num2str(number): 

217 for prefix in ("G", "M", "K"): 

218 if number >= Util.PREFIX[prefix]: 

219 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

220 return "%s" % number 

221 num2str = staticmethod(num2str) 

222 

223 def numBits(val): 

224 count = 0 

225 while val: 

226 count += val & 1 

227 val = val >> 1 

228 return count 

229 numBits = staticmethod(numBits) 

230 

231 def countBits(bitmap1, bitmap2): 

232 """return bit count in the bitmap produced by ORing the two bitmaps""" 

233 len1 = len(bitmap1) 

234 len2 = len(bitmap2) 

235 lenLong = len1 

236 lenShort = len2 

237 bitmapLong = bitmap1 

238 if len2 > len1: 

239 lenLong = len2 

240 lenShort = len1 

241 bitmapLong = bitmap2 

242 

243 count = 0 

244 for i in range(lenShort): 

245 val = bitmap1[i] | bitmap2[i] 

246 count += Util.numBits(val) 

247 

248 for i in range(i + 1, lenLong): 

249 val = bitmapLong[i] 

250 count += Util.numBits(val) 

251 return count 

252 countBits = staticmethod(countBits) 

253 

254 def getThisScript(): 

255 thisScript = util.get_real_path(__file__) 

256 if thisScript.endswith(".pyc"): 

257 thisScript = thisScript[:-1] 

258 return thisScript 

259 getThisScript = staticmethod(getThisScript) 

260 

261 

262################################################################################ 

263# 

264# XAPI 

265# 

266class XAPI: 

267 USER = "root" 

268 PLUGIN_ON_SLAVE = "on-slave" 

269 

270 CONFIG_SM = 0 

271 CONFIG_OTHER = 1 

272 CONFIG_ON_BOOT = 2 

273 CONFIG_ALLOW_CACHING = 3 

274 

275 CONFIG_NAME = { 

276 CONFIG_SM: "sm-config", 

277 CONFIG_OTHER: "other-config", 

278 CONFIG_ON_BOOT: "on-boot", 

279 CONFIG_ALLOW_CACHING: "allow_caching" 

280 } 

281 

282 class LookupError(util.SMException): 

283 pass 

284 

285 def getSession(): 

286 session = XenAPI.xapi_local() 

287 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

288 return session 

289 getSession = staticmethod(getSession) 

290 

291 def __init__(self, session, srUuid): 

292 self.sessionPrivate = False 

293 self.session = session 

294 if self.session is None: 

295 self.session = self.getSession() 

296 self.sessionPrivate = True 

297 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

298 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

299 self.hostUuid = util.get_this_host() 

300 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

301 self.task = None 

302 self.task_progress = {"coalescable": 0, "done": 0} 

303 

304 def __del__(self): 

305 if self.sessionPrivate: 

306 self.session.xenapi.session.logout() 

307 

308 def isPluggedHere(self): 

309 pbds = self.getAttachedPBDs() 

310 for pbdRec in pbds: 

311 if pbdRec["host"] == self._hostRef: 

312 return True 

313 return False 

314 

315 def poolOK(self): 

316 host_recs = self.session.xenapi.host.get_all_records() 

317 for host_ref, host_rec in host_recs.items(): 

318 if not host_rec["enabled"]: 

319 Util.log("Host %s not enabled" % host_rec["uuid"]) 

320 return False 

321 return True 

322 

323 def isMaster(self): 

324 if self.srRecord["shared"]: 

325 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

326 return pool["master"] == self._hostRef 

327 else: 

328 pbds = self.getAttachedPBDs() 

329 if len(pbds) < 1: 

330 raise util.SMException("Local SR not attached") 

331 elif len(pbds) > 1: 

332 raise util.SMException("Local SR multiply attached") 

333 return pbds[0]["host"] == self._hostRef 

334 

335 def getAttachedPBDs(self): 

336 """Return PBD records for all PBDs of this SR that are currently 

337 attached""" 

338 attachedPBDs = [] 

339 pbds = self.session.xenapi.PBD.get_all_records() 

340 for pbdRec in pbds.values(): 

341 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

342 attachedPBDs.append(pbdRec) 

343 return attachedPBDs 

344 

345 def getOnlineHosts(self): 

346 return util.get_online_hosts(self.session) 

347 

348 def ensureInactive(self, hostRef, args): 

349 text = self.session.xenapi.host.call_plugin( \ 

350 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

351 Util.log("call-plugin returned: '%s'" % text) 

352 

353 def getRecordHost(self, hostRef): 

354 return self.session.xenapi.host.get_record(hostRef) 

355 

356 def _getRefVDI(self, uuid): 

357 return self.session.xenapi.VDI.get_by_uuid(uuid) 

358 

359 def getRefVDI(self, vdi): 

360 return self._getRefVDI(vdi.uuid) 

361 

362 def getRecordVDI(self, uuid): 

363 try: 

364 ref = self._getRefVDI(uuid) 

365 return self.session.xenapi.VDI.get_record(ref) 

366 except XenAPI.Failure: 

367 return None 

368 

369 def singleSnapshotVDI(self, vdi): 

370 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

371 {"type": "internal"}) 

372 

373 def forgetVDI(self, srUuid, vdiUuid): 

374 """Forget the VDI, but handle the case where the VDI has already been 

375 forgotten (i.e. ignore errors)""" 

376 try: 

377 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

378 self.session.xenapi.VDI.forget(vdiRef) 

379 except XenAPI.Failure: 

380 pass 

381 

382 def getConfigVDI(self, vdi, key): 

383 kind = vdi.CONFIG_TYPE[key] 

384 if kind == self.CONFIG_SM: 

385 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

386 elif kind == self.CONFIG_OTHER: 

387 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

388 elif kind == self.CONFIG_ON_BOOT: 

389 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

390 elif kind == self.CONFIG_ALLOW_CACHING: 

391 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

392 else: 

393 assert(False) 

394 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

395 return cfg 

396 

397 def removeFromConfigVDI(self, vdi, key): 

398 kind = vdi.CONFIG_TYPE[key] 

399 if kind == self.CONFIG_SM: 

400 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

401 elif kind == self.CONFIG_OTHER: 

402 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

403 else: 

404 assert(False) 

405 

406 def addToConfigVDI(self, vdi, key, val): 

407 kind = vdi.CONFIG_TYPE[key] 

408 if kind == self.CONFIG_SM: 

409 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

410 elif kind == self.CONFIG_OTHER: 

411 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

412 else: 

413 assert(False) 

414 

415 def isSnapshot(self, vdi): 

416 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

417 

418 def markCacheSRsDirty(self): 

419 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

420 'field "local_cache_enabled" = "true"') 

421 for sr_ref in sr_refs: 

422 Util.log("Marking SR %s dirty" % sr_ref) 

423 util.set_dirty(self.session, sr_ref) 

424 

425 def srUpdate(self): 

426 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

427 abortFlag = IPCFlag(self.srRecord["uuid"]) 

428 task = self.session.xenapi.Async.SR.update(self._srRef) 

429 cancelTask = True 

430 try: 

431 for i in range(60): 

432 status = self.session.xenapi.task.get_status(task) 

433 if not status == "pending": 

434 Util.log("SR.update_asynch status changed to [%s]" % status) 

435 cancelTask = False 

436 return 

437 if abortFlag.test(FLAG_TYPE_ABORT): 

438 Util.log("Abort signalled during srUpdate, cancelling task...") 

439 try: 

440 self.session.xenapi.task.cancel(task) 

441 cancelTask = False 

442 Util.log("Task cancelled") 

443 except: 

444 pass 

445 return 

446 time.sleep(1) 

447 finally: 

448 if cancelTask: 

449 self.session.xenapi.task.cancel(task) 

450 self.session.xenapi.task.destroy(task) 

451 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

452 

453 def update_task(self): 

454 self.session.xenapi.task.set_other_config( 

455 self.task, 

456 { 

457 "applies_to": self._srRef 

458 }) 

459 total = self.task_progress['coalescable'] + self.task_progress['done'] 

460 if (total > 0): 

461 self.session.xenapi.task.set_progress( 

462 self.task, float(self.task_progress['done']) / total) 

463 

464 def create_task(self, label, description): 

465 self.task = self.session.xenapi.task.create(label, description) 

466 self.update_task() 

467 

468 def update_task_progress(self, key, value): 

469 self.task_progress[key] = value 

470 if self.task: 

471 self.update_task() 

472 

473 def set_task_status(self, status): 

474 if self.task: 

475 self.session.xenapi.task.set_status(self.task, status) 

476 

477 

478################################################################################ 

479# 

480# VDI 

481# 

482class VDI(object): 

483 """Object representing a VDI of a VHD-based SR""" 

484 

485 POLL_INTERVAL = 1 

486 POLL_TIMEOUT = 30 

487 DEVICE_MAJOR = 202 

488 DRIVER_NAME_VHD = "vhd" 

489 

490 # config keys & values 

491 DB_VHD_PARENT = "vhd-parent" 

492 DB_VDI_TYPE = "vdi_type" 

493 DB_VHD_BLOCKS = "vhd-blocks" 

494 DB_VDI_PAUSED = "paused" 

495 DB_VDI_RELINKING = "relinking" 

496 DB_VDI_ACTIVATING = "activating" 

497 DB_GC = "gc" 

498 DB_COALESCE = "coalesce" 

499 DB_LEAFCLSC = "leaf-coalesce" # config key 

500 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

501 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

502 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

503 # no space to snap-coalesce or unable to keep 

504 # up with VDI. This is not used by the SM, it 

505 # might be used by external components. 

506 DB_ONBOOT = "on-boot" 

507 ONBOOT_RESET = "reset" 

508 DB_ALLOW_CACHING = "allow_caching" 

509 

510 CONFIG_TYPE = { 

511 DB_VHD_PARENT: XAPI.CONFIG_SM, 

512 DB_VDI_TYPE: XAPI.CONFIG_SM, 

513 DB_VHD_BLOCKS: XAPI.CONFIG_SM, 

514 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

515 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

516 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

517 DB_GC: XAPI.CONFIG_OTHER, 

518 DB_COALESCE: XAPI.CONFIG_OTHER, 

519 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

520 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

521 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

522 } 

523 

524 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

525 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

526 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

527 # feasibility of leaf coalesce 

528 

529 JRN_RELINK = "relink" # journal entry type for relinking children 

530 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

531 JRN_LEAF = "leaf" # used in coalesce-leaf 

532 

533 STR_TREE_INDENT = 4 

534 

535 def __init__(self, sr, uuid, raw): 

536 self.sr = sr 

537 self.scanError = True 

538 self.uuid = uuid 

539 self.raw = raw 

540 self.fileName = "" 

541 self.parentUuid = "" 

542 self.sizeVirt = -1 

543 self._sizeVHD = -1 

544 self.hidden = False 

545 self.parent = None 

546 self.children = [] 

547 self._vdiRef = None 

548 self._clearRef() 

549 

550 @staticmethod 

551 def extractUuid(path): 

552 raise NotImplementedError("Implement in sub class") 

553 

554 def load(self): 

555 """Load VDI info""" 

556 pass # abstract 

557 

558 def getDriverName(self): 

559 return self.DRIVER_NAME_VHD 

560 

561 def getRef(self): 

562 if self._vdiRef is None: 

563 self._vdiRef = self.sr.xapi.getRefVDI(self) 

564 return self._vdiRef 

565 

566 def getConfig(self, key, default=None): 

567 config = self.sr.xapi.getConfigVDI(self, key) 

568 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 568 ↛ 569line 568 didn't jump to line 569, because the condition on line 568 was never true

569 val = config 

570 else: 

571 val = config.get(key) 

572 if val: 

573 return val 

574 return default 

575 

576 def setConfig(self, key, val): 

577 self.sr.xapi.removeFromConfigVDI(self, key) 

578 self.sr.xapi.addToConfigVDI(self, key, val) 

579 Util.log("Set %s = %s for %s" % (key, val, self)) 

580 

581 def delConfig(self, key): 

582 self.sr.xapi.removeFromConfigVDI(self, key) 

583 Util.log("Removed %s from %s" % (key, self)) 

584 

585 def ensureUnpaused(self): 

586 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

587 Util.log("Unpausing VDI %s" % self) 

588 self.unpause() 

589 

590 def pause(self, failfast=False): 

591 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

592 self.uuid, failfast): 

593 raise util.SMException("Failed to pause VDI %s" % self) 

594 

595 def _report_tapdisk_unpause_error(self): 

596 try: 

597 xapi = self.sr.xapi.session.xenapi 

598 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

599 msg_name = "failed to unpause tapdisk" 

600 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

601 "VMs using this tapdisk have lost access " \ 

602 "to the corresponding disk(s)" % self.uuid 

603 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

604 except Exception as e: 

605 util.SMlog("failed to generate message: %s" % e) 

606 

607 def unpause(self): 

608 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

609 self.uuid): 

610 self._report_tapdisk_unpause_error() 

611 raise util.SMException("Failed to unpause VDI %s" % self) 

612 

613 def refresh(self, ignoreNonexistent=True): 

614 """Pause-unpause in one step""" 

615 self.sr.lock() 

616 try: 

617 try: 

618 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 618 ↛ 620line 618 didn't jump to line 620, because the condition on line 618 was never true

619 self.sr.uuid, self.uuid): 

620 self._report_tapdisk_unpause_error() 

621 raise util.SMException("Failed to refresh %s" % self) 

622 except XenAPI.Failure as e: 

623 if util.isInvalidVDI(e) and ignoreNonexistent: 

624 Util.log("VDI %s not found, ignoring" % self) 

625 return 

626 raise 

627 finally: 

628 self.sr.unlock() 

629 

630 def isSnapshot(self): 

631 return self.sr.xapi.isSnapshot(self) 

632 

633 def isAttachedRW(self): 

634 return util.is_attached_rw( 

635 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

636 

637 def getVHDBlocks(self): 

638 val = self.updateBlockInfo() 

639 bitmap = zlib.decompress(base64.b64decode(val)) 

640 return bitmap 

641 

642 def isCoalesceable(self): 

643 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

644 return not self.scanError and \ 

645 self.parent and \ 

646 len(self.parent.children) == 1 and \ 

647 self.hidden and \ 

648 len(self.children) > 0 

649 

650 def isLeafCoalesceable(self): 

651 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

652 return not self.scanError and \ 

653 self.parent and \ 

654 len(self.parent.children) == 1 and \ 

655 not self.hidden and \ 

656 len(self.children) == 0 

657 

658 def canLiveCoalesce(self, speed): 

659 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

660 isLeafCoalesceable() already""" 

661 feasibleSize = False 

662 allowedDownTime = \ 

663 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

664 if speed: 

665 feasibleSize = \ 

666 self.getSizeVHD() // speed < allowedDownTime 

667 else: 

668 feasibleSize = \ 

669 self.getSizeVHD() < self.LIVE_LEAF_COALESCE_MAX_SIZE 

670 

671 return (feasibleSize or 

672 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

673 

674 def getAllPrunable(self): 

675 if len(self.children) == 0: # base case 

676 # it is possible to have a hidden leaf that was recently coalesced 

677 # onto its parent, its children already relinked but not yet 

678 # reloaded - in which case it may not be garbage collected yet: 

679 # some tapdisks could still be using the file. 

680 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

681 return [] 

682 if not self.scanError and self.hidden: 

683 return [self] 

684 return [] 

685 

686 thisPrunable = True 

687 vdiList = [] 

688 for child in self.children: 

689 childList = child.getAllPrunable() 

690 vdiList.extend(childList) 

691 if child not in childList: 

692 thisPrunable = False 

693 

694 # We can destroy the current VDI if all childs are hidden BUT the 

695 # current VDI must be hidden too to do that! 

696 # Example in this case (after a failed live leaf coalesce): 

697 # 

698 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

699 # SMGC: [32436] b5458d61(1.000G/4.127M) 

700 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

701 # 

702 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

703 # Normally we are not in this function when the delete action is 

704 # executed but in `_liveLeafCoalesce`. 

705 

706 if not self.scanError and not self.hidden and thisPrunable: 

707 vdiList.append(self) 

708 return vdiList 

709 

710 def getSizeVHD(self): 

711 return self._sizeVHD 

712 

713 def getTreeRoot(self): 

714 "Get the root of the tree that self belongs to" 

715 root = self 

716 while root.parent: 

717 root = root.parent 

718 return root 

719 

720 def getTreeHeight(self): 

721 "Get the height of the subtree rooted at self" 

722 if len(self.children) == 0: 

723 return 1 

724 

725 maxChildHeight = 0 

726 for child in self.children: 

727 childHeight = child.getTreeHeight() 

728 if childHeight > maxChildHeight: 

729 maxChildHeight = childHeight 

730 

731 return maxChildHeight + 1 

732 

733 def getAllLeaves(self): 

734 "Get all leaf nodes in the subtree rooted at self" 

735 if len(self.children) == 0: 

736 return [self] 

737 

738 leaves = [] 

739 for child in self.children: 

740 leaves.extend(child.getAllLeaves()) 

741 return leaves 

742 

743 def updateBlockInfo(self): 

744 val = base64.b64encode(self._queryVHDBlocks()).decode() 

745 self.setConfig(VDI.DB_VHD_BLOCKS, val) 

746 return val 

747 

748 def rename(self, uuid): 

749 "Rename the VDI file" 

750 assert(not self.sr.vdis.get(uuid)) 

751 self._clearRef() 

752 oldUuid = self.uuid 

753 self.uuid = uuid 

754 self.children = [] 

755 # updating the children themselves is the responsibility of the caller 

756 del self.sr.vdis[oldUuid] 

757 self.sr.vdis[self.uuid] = self 

758 

759 def delete(self): 

760 "Physically delete the VDI" 

761 lock.Lock.cleanup(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

762 lock.Lock.cleanupAll(self.uuid) 

763 self._clear() 

764 

765 def getParent(self): 

766 return vhdutil.getParent(self.path, lambda x: x.strip()) 766 ↛ exitline 766 didn't run the lambda on line 766

767 

768 def repair(self, parent): 

769 vhdutil.repair(parent) 

770 

771 def __str__(self): 

772 strHidden = "" 

773 if self.hidden: 773 ↛ 774line 773 didn't jump to line 774, because the condition on line 773 was never true

774 strHidden = "*" 

775 strSizeVirt = "?" 

776 if self.sizeVirt > 0: 776 ↛ 777line 776 didn't jump to line 777, because the condition on line 776 was never true

777 strSizeVirt = Util.num2str(self.sizeVirt) 

778 strSizeVHD = "?" 

779 if self._sizeVHD > 0: 779 ↛ 780line 779 didn't jump to line 780, because the condition on line 779 was never true

780 strSizeVHD = "/%s" % Util.num2str(self._sizeVHD) 

781 strType = "" 

782 if self.raw: 

783 strType = "[RAW]" 

784 strSizeVHD = "" 

785 

786 return "%s%s(%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

787 strSizeVHD, strType) 

788 

789 def validate(self, fast=False): 

790 if not vhdutil.check(self.path, fast=fast): 790 ↛ 791line 790 didn't jump to line 791, because the condition on line 790 was never true

791 raise util.SMException("VHD %s corrupted" % self) 

792 

793 def _clear(self): 

794 self.uuid = "" 

795 self.path = "" 

796 self.parentUuid = "" 

797 self.parent = None 

798 self._clearRef() 

799 

800 def _clearRef(self): 

801 self._vdiRef = None 

802 

803 def _doCoalesce(self): 

804 """Coalesce self onto parent. Only perform the actual coalescing of 

805 VHD, but not the subsequent relinking. We'll do that as the next step, 

806 after reloading the entire SR in case things have changed while we 

807 were coalescing""" 

808 self.validate() 

809 self.parent.validate(True) 

810 self.parent._increaseSizeVirt(self.sizeVirt) 

811 self.sr._updateSlavesOnResize(self.parent) 

812 self._coalesceVHD(0) 

813 self.parent.validate(True) 

814 #self._verifyContents(0) 

815 self.parent.updateBlockInfo() 

816 

817 def _verifyContents(self, timeOut): 

818 Util.log(" Coalesce verification on %s" % self) 

819 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

820 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

821 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

822 Util.log(" Coalesce verification succeeded") 

823 

824 def _runTapdiskDiff(self): 

825 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

826 (self.getDriverName(), self.path, \ 

827 self.parent.getDriverName(), self.parent.path) 

828 Util.doexec(cmd, 0) 

829 return True 

830 

831 def _reportCoalesceError(vdi, ce): 

832 """Reports a coalesce error to XenCenter. 

833 

834 vdi: the VDI object on which the coalesce error occured 

835 ce: the CommandException that was raised""" 

836 

837 msg_name = os.strerror(ce.code) 

838 if ce.code == errno.ENOSPC: 

839 # TODO We could add more information here, e.g. exactly how much 

840 # space is required for the particular coalesce, as well as actions 

841 # to be taken by the user and consequences of not taking these 

842 # actions. 

843 msg_body = 'Run out of space while coalescing.' 

844 elif ce.code == errno.EIO: 

845 msg_body = 'I/O error while coalescing.' 

846 else: 

847 msg_body = '' 

848 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

849 % (vdi.sr.uuid, msg_name, msg_body)) 

850 

851 # Create a XenCenter message, but don't spam. 

852 xapi = vdi.sr.xapi.session.xenapi 

853 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

854 oth_cfg = xapi.SR.get_other_config(sr_ref) 

855 if COALESCE_ERR_RATE_TAG in oth_cfg: 

856 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

857 else: 

858 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

859 

860 xcmsg = False 

861 if coalesce_err_rate == 0: 

862 xcmsg = True 

863 elif coalesce_err_rate > 0: 

864 now = datetime.datetime.now() 

865 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

866 if COALESCE_LAST_ERR_TAG in sm_cfg: 

867 # seconds per message (minimum distance in time between two 

868 # messages in seconds) 

869 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

870 last = datetime.datetime.fromtimestamp( 

871 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

872 if now - last >= spm: 

873 xapi.SR.remove_from_sm_config(sr_ref, 

874 COALESCE_LAST_ERR_TAG) 

875 xcmsg = True 

876 else: 

877 xcmsg = True 

878 if xcmsg: 

879 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

880 str(now.strftime('%s'))) 

881 if xcmsg: 

882 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

883 _reportCoalesceError = staticmethod(_reportCoalesceError) 

884 

885 def coalesce(self): 

886 vhdutil.coalesce(self.path) 

887 

888 def _doCoalesceVHD(vdi): 

889 try: 

890 startTime = time.time() 

891 vhdSize = vdi.getSizeVHD() 

892 vdi.coalesce() 

893 endTime = time.time() 

894 vdi.sr.recordStorageSpeed(startTime, endTime, vhdSize) 

895 except util.CommandException as ce: 

896 # We use try/except for the following piece of code because it runs 

897 # in a separate process context and errors will not be caught and 

898 # reported by anyone. 

899 try: 

900 # Report coalesce errors back to user via XC 

901 VDI._reportCoalesceError(vdi, ce) 

902 except Exception as e: 

903 util.SMlog('failed to create XenCenter message: %s' % e) 

904 raise ce 

905 except: 

906 raise 

907 _doCoalesceVHD = staticmethod(_doCoalesceVHD) 

908 

909 def _vdi_is_raw(self, vdi_path): 

910 """ 

911 Given path to vdi determine if it is raw 

912 """ 

913 uuid = self.extractUuid(vdi_path) 

914 return self.sr.vdis[uuid].raw 

915 

916 def _coalesceVHD(self, timeOut): 

917 Util.log(" Running VHD coalesce on %s" % self) 

918 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 918 ↛ exitline 918 didn't run the lambda on line 918

919 try: 

920 util.fistpoint.activate_custom_fn( 

921 "cleanup_coalesceVHD_inject_failure", 

922 util.inject_failure) 

923 Util.runAbortable(lambda: VDI._doCoalesceVHD(self), None, 

924 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

925 except: 

926 #exception at this phase could indicate a failure in vhd coalesce 

927 # or a kill of vhd coalesce by runAbortable due to timeOut 

928 # Try a repair and reraise the exception 

929 parent = "" 

930 try: 

931 parent = self.getParent() 

932 if not self._vdi_is_raw(parent): 

933 # Repair error is logged and ignored. Error reraised later 

934 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

935 'parent %s' % (self.uuid, parent)) 

936 self.repair(parent) 

937 except Exception as e: 

938 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

939 'after failed coalesce on %s, err: %s' % 

940 (parent, self.path, e)) 

941 raise 

942 

943 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

944 

945 def _relinkSkip(self): 

946 """Relink children of this VDI to point to the parent of this VDI""" 

947 abortFlag = IPCFlag(self.sr.uuid) 

948 for child in self.children: 

949 if abortFlag.test(FLAG_TYPE_ABORT): 949 ↛ 950line 949 didn't jump to line 950, because the condition on line 949 was never true

950 raise AbortException("Aborting due to signal") 

951 Util.log(" Relinking %s from %s to %s" % \ 

952 (child, self, self.parent)) 

953 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

954 child._setParent(self.parent) 

955 self.children = [] 

956 

957 def _reloadChildren(self, vdiSkip): 

958 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

959 the VHD metadata for this file in any online VDI""" 

960 abortFlag = IPCFlag(self.sr.uuid) 

961 for child in self.children: 

962 if child == vdiSkip: 

963 continue 

964 if abortFlag.test(FLAG_TYPE_ABORT): 964 ↛ 965line 964 didn't jump to line 965, because the condition on line 964 was never true

965 raise AbortException("Aborting due to signal") 

966 Util.log(" Reloading VDI %s" % child) 

967 child._reload() 

968 

969 def _reload(self): 

970 """Pause & unpause to cause blktap to reload the VHD metadata""" 

971 for child in self.children: 971 ↛ 972line 971 didn't jump to line 972, because the loop on line 971 never started

972 child._reload() 

973 

974 # only leaves can be attached 

975 if len(self.children) == 0: 975 ↛ exitline 975 didn't return from function '_reload', because the condition on line 975 was never false

976 try: 

977 self.delConfig(VDI.DB_VDI_RELINKING) 

978 except XenAPI.Failure as e: 

979 if not util.isInvalidVDI(e): 

980 raise 

981 self.refresh() 

982 

983 def _tagChildrenForRelink(self): 

984 if len(self.children) == 0: 

985 retries = 0 

986 try: 

987 while retries < 15: 

988 retries += 1 

989 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

990 Util.log("VDI %s is activating, wait to relink" % 

991 self.uuid) 

992 else: 

993 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

994 

995 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

996 self.delConfig(VDI.DB_VDI_RELINKING) 

997 Util.log("VDI %s started activating while tagging" % 

998 self.uuid) 

999 else: 

1000 return 

1001 time.sleep(2) 

1002 

1003 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1004 except XenAPI.Failure as e: 

1005 if not util.isInvalidVDI(e): 

1006 raise 

1007 

1008 for child in self.children: 

1009 child._tagChildrenForRelink() 

1010 

1011 def _loadInfoParent(self): 

1012 ret = vhdutil.getParent(self.path, lvhdutil.extractUuid) 

1013 if ret: 

1014 self.parentUuid = ret 

1015 

1016 def _setParent(self, parent): 

1017 vhdutil.setParent(self.path, parent.path, False) 

1018 self.parent = parent 

1019 self.parentUuid = parent.uuid 

1020 parent.children.append(self) 

1021 try: 

1022 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1023 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1024 (self.uuid, self.parentUuid)) 

1025 except: 

1026 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1027 (self.uuid, self.parentUuid)) 

1028 

1029 def _loadInfoHidden(self): 

1030 hidden = vhdutil.getHidden(self.path) 

1031 self.hidden = (hidden != 0) 

1032 

1033 def _setHidden(self, hidden=True): 

1034 vhdutil.setHidden(self.path, hidden) 

1035 self.hidden = hidden 

1036 

1037 def _increaseSizeVirt(self, size, atomic=True): 

1038 """ensure the virtual size of 'self' is at least 'size'. Note that 

1039 resizing a VHD must always be offline and atomically: the file must 

1040 not be open by anyone and no concurrent operations may take place. 

1041 Thus we use the Agent API call for performing paused atomic 

1042 operations. If the caller is already in the atomic context, it must 

1043 call with atomic = False""" 

1044 if self.sizeVirt >= size: 1044 ↛ 1046line 1044 didn't jump to line 1046, because the condition on line 1044 was never false

1045 return 

1046 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ 

1047 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1048 

1049 msize = vhdutil.getMaxResizeSize(self.path) * 1024 * 1024 

1050 if (size <= msize): 

1051 vhdutil.setSizeVirtFast(self.path, size) 

1052 else: 

1053 if atomic: 

1054 vdiList = self._getAllSubtree() 

1055 self.sr.lock() 

1056 try: 

1057 self.sr.pauseVDIs(vdiList) 

1058 try: 

1059 self._setSizeVirt(size) 

1060 finally: 

1061 self.sr.unpauseVDIs(vdiList) 

1062 finally: 

1063 self.sr.unlock() 

1064 else: 

1065 self._setSizeVirt(size) 

1066 

1067 self.sizeVirt = vhdutil.getSizeVirt(self.path) 

1068 

1069 def _setSizeVirt(self, size): 

1070 """WARNING: do not call this method directly unless all VDIs in the 

1071 subtree are guaranteed to be unplugged (and remain so for the duration 

1072 of the operation): this operation is only safe for offline VHDs""" 

1073 jFile = os.path.join(self.sr.path, self.uuid) 

1074 vhdutil.setSizeVirt(self.path, size, jFile) 

1075 

1076 def _queryVHDBlocks(self): 

1077 return vhdutil.getBlockBitmap(self.path) 

1078 

1079 def _getCoalescedSizeData(self): 

1080 """Get the data size of the resulting VHD if we coalesce self onto 

1081 parent. We calculate the actual size by using the VHD block allocation 

1082 information (as opposed to just adding up the two VHD sizes to get an 

1083 upper bound)""" 

1084 # make sure we don't use stale BAT info from vdi_rec since the child 

1085 # was writable all this time 

1086 self.delConfig(VDI.DB_VHD_BLOCKS) 

1087 blocksChild = self.getVHDBlocks() 

1088 blocksParent = self.parent.getVHDBlocks() 

1089 numBlocks = Util.countBits(blocksChild, blocksParent) 

1090 Util.log("Num combined blocks = %d" % numBlocks) 

1091 sizeData = numBlocks * vhdutil.VHD_BLOCK_SIZE 

1092 assert(sizeData <= self.sizeVirt) 

1093 return sizeData 

1094 

1095 def _calcExtraSpaceForCoalescing(self): 

1096 sizeData = self._getCoalescedSizeData() 

1097 sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \ 

1098 vhdutil.calcOverheadEmpty(self.sizeVirt) 

1099 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1100 return sizeCoalesced - self.parent.getSizeVHD() 

1101 

1102 def _calcExtraSpaceForLeafCoalescing(self): 

1103 """How much extra space in the SR will be required to 

1104 [live-]leaf-coalesce this VDI""" 

1105 # the space requirements are the same as for inline coalesce 

1106 return self._calcExtraSpaceForCoalescing() 

1107 

1108 def _calcExtraSpaceForSnapshotCoalescing(self): 

1109 """How much extra space in the SR will be required to 

1110 snapshot-coalesce this VDI""" 

1111 return self._calcExtraSpaceForCoalescing() + \ 

1112 vhdutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1113 

1114 def _getAllSubtree(self): 

1115 """Get self and all VDIs in the subtree of self as a flat list""" 

1116 vdiList = [self] 

1117 for child in self.children: 

1118 vdiList.extend(child._getAllSubtree()) 

1119 return vdiList 

1120 

1121 

1122class FileVDI(VDI): 

1123 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1124 

1125 @staticmethod 

1126 def extractUuid(path): 

1127 path = os.path.basename(path.strip()) 

1128 if not (path.endswith(vhdutil.FILE_EXTN_VHD) or \ 1128 ↛ 1130line 1128 didn't jump to line 1130, because the condition on line 1128 was never true

1129 path.endswith(vhdutil.FILE_EXTN_RAW)): 

1130 return None 

1131 uuid = path.replace(vhdutil.FILE_EXTN_VHD, "").replace( \ 

1132 vhdutil.FILE_EXTN_RAW, "") 

1133 # TODO: validate UUID format 

1134 return uuid 

1135 

1136 def __init__(self, sr, uuid, raw): 

1137 VDI.__init__(self, sr, uuid, raw) 

1138 if self.raw: 1138 ↛ 1139line 1138 didn't jump to line 1139, because the condition on line 1138 was never true

1139 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_RAW) 

1140 else: 

1141 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1142 

1143 def load(self, info=None): 

1144 if not info: 

1145 if not util.pathexists(self.path): 

1146 raise util.SMException("%s not found" % self.path) 

1147 try: 

1148 info = vhdutil.getVHDInfo(self.path, self.extractUuid) 

1149 except util.SMException: 

1150 Util.log(" [VDI %s: failed to read VHD metadata]" % self.uuid) 

1151 return 

1152 self.parent = None 

1153 self.children = [] 

1154 self.parentUuid = info.parentUuid 

1155 self.sizeVirt = info.sizeVirt 

1156 self._sizeVHD = info.sizePhys 

1157 self.hidden = info.hidden 

1158 self.scanError = False 

1159 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1160 (self.uuid, vhdutil.FILE_EXTN_VHD)) 

1161 

1162 def rename(self, uuid): 

1163 oldPath = self.path 

1164 VDI.rename(self, uuid) 

1165 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1166 self.path = os.path.join(self.sr.path, self.fileName) 

1167 assert(not util.pathexists(self.path)) 

1168 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1169 os.rename(oldPath, self.path) 

1170 

1171 def delete(self): 

1172 if len(self.children) > 0: 1172 ↛ 1173line 1172 didn't jump to line 1173, because the condition on line 1172 was never true

1173 raise util.SMException("VDI %s has children, can't delete" % \ 

1174 self.uuid) 

1175 try: 

1176 self.sr.lock() 

1177 try: 

1178 os.unlink(self.path) 

1179 self.sr.forgetVDI(self.uuid) 

1180 finally: 

1181 self.sr.unlock() 

1182 except OSError: 

1183 raise util.SMException("os.unlink(%s) failed" % self.path) 

1184 VDI.delete(self) 

1185 

1186 

1187class LVHDVDI(VDI): 

1188 """Object representing a VDI in an LVHD SR""" 

1189 

1190 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1191 DRIVER_NAME_RAW = "aio" 

1192 

1193 def load(self, vdiInfo): 

1194 self.parent = None 

1195 self.children = [] 

1196 self._sizeVHD = -1 

1197 self.scanError = vdiInfo.scanError 

1198 self.sizeLV = vdiInfo.sizeLV 

1199 self.sizeVirt = vdiInfo.sizeVirt 

1200 self.fileName = vdiInfo.lvName 

1201 self.lvActive = vdiInfo.lvActive 

1202 self.lvOpen = vdiInfo.lvOpen 

1203 self.lvReadonly = vdiInfo.lvReadonly 

1204 self.hidden = vdiInfo.hidden 

1205 self.parentUuid = vdiInfo.parentUuid 

1206 self.path = os.path.join(self.sr.path, self.fileName) 

1207 

1208 @staticmethod 

1209 def extractUuid(path): 

1210 return lvhdutil.extractUuid(path) 

1211 

1212 def getDriverName(self): 

1213 if self.raw: 

1214 return self.DRIVER_NAME_RAW 

1215 return self.DRIVER_NAME_VHD 

1216 

1217 def inflate(self, size): 

1218 """inflate the LV containing the VHD to 'size'""" 

1219 if self.raw: 

1220 return 

1221 self._activate() 

1222 self.sr.lock() 

1223 try: 

1224 lvhdutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, size) 

1225 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1226 finally: 

1227 self.sr.unlock() 

1228 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1229 self._sizeVHD = -1 

1230 

1231 def deflate(self): 

1232 """deflate the LV containing the VHD to minimum""" 

1233 if self.raw: 

1234 return 

1235 self._activate() 

1236 self.sr.lock() 

1237 try: 

1238 lvhdutil.deflate(self.sr.lvmCache, self.fileName, self.getSizeVHD()) 

1239 finally: 

1240 self.sr.unlock() 

1241 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1242 self._sizeVHD = -1 

1243 

1244 def inflateFully(self): 

1245 self.inflate(lvhdutil.calcSizeVHDLV(self.sizeVirt)) 

1246 

1247 def inflateParentForCoalesce(self): 

1248 """Inflate the parent only as much as needed for the purposes of 

1249 coalescing""" 

1250 if self.parent.raw: 

1251 return 

1252 inc = self._calcExtraSpaceForCoalescing() 

1253 if inc > 0: 

1254 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1255 self.parent.inflate(self.parent.sizeLV + inc) 

1256 

1257 def updateBlockInfo(self): 

1258 if not self.raw: 

1259 return VDI.updateBlockInfo(self) 

1260 

1261 def rename(self, uuid): 

1262 oldUuid = self.uuid 

1263 oldLVName = self.fileName 

1264 VDI.rename(self, uuid) 

1265 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + self.uuid 

1266 if self.raw: 

1267 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + self.uuid 

1268 self.path = os.path.join(self.sr.path, self.fileName) 

1269 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1270 

1271 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1272 if self.sr.lvActivator.get(oldUuid, False): 

1273 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1274 

1275 ns = lvhdutil.NS_PREFIX_LVM + self.sr.uuid 

1276 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1277 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1278 RefCounter.reset(oldUuid, ns) 

1279 

1280 def delete(self): 

1281 if len(self.children) > 0: 

1282 raise util.SMException("VDI %s has children, can't delete" % \ 

1283 self.uuid) 

1284 self.sr.lock() 

1285 try: 

1286 self.sr.lvmCache.remove(self.fileName) 

1287 self.sr.forgetVDI(self.uuid) 

1288 finally: 

1289 self.sr.unlock() 

1290 RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

1291 VDI.delete(self) 

1292 

1293 def getSizeVHD(self): 

1294 if self._sizeVHD == -1: 

1295 self._loadInfoSizeVHD() 

1296 return self._sizeVHD 

1297 

1298 def _loadInfoSizeVHD(self): 

1299 """Get the physical utilization of the VHD file. We do it individually 

1300 (and not using the VHD batch scanner) as an optimization: this info is 

1301 relatively expensive and we need it only for VDI's involved in 

1302 coalescing.""" 

1303 if self.raw: 

1304 return 

1305 self._activate() 

1306 self._sizeVHD = vhdutil.getSizePhys(self.path) 

1307 if self._sizeVHD <= 0: 

1308 raise util.SMException("phys size of %s = %d" % \ 

1309 (self, self._sizeVHD)) 

1310 

1311 def _loadInfoHidden(self): 

1312 if self.raw: 

1313 self.hidden = self.sr.lvmCache.getHidden(self.fileName) 

1314 else: 

1315 VDI._loadInfoHidden(self) 

1316 

1317 def _setHidden(self, hidden=True): 

1318 if self.raw: 

1319 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1320 self.hidden = hidden 

1321 else: 

1322 VDI._setHidden(self, hidden) 

1323 

1324 def __str__(self): 

1325 strType = "VHD" 

1326 if self.raw: 

1327 strType = "RAW" 

1328 strHidden = "" 

1329 if self.hidden: 

1330 strHidden = "*" 

1331 strSizeVHD = "" 

1332 if self._sizeVHD > 0: 

1333 strSizeVHD = Util.num2str(self._sizeVHD) 

1334 strActive = "n" 

1335 if self.lvActive: 

1336 strActive = "a" 

1337 if self.lvOpen: 

1338 strActive += "o" 

1339 return "%s%s[%s](%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1340 Util.num2str(self.sizeVirt), strSizeVHD, 

1341 Util.num2str(self.sizeLV), strActive) 

1342 

1343 def validate(self, fast=False): 

1344 if not self.raw: 

1345 VDI.validate(self, fast) 

1346 

1347 def _doCoalesce(self): 

1348 """LVHD parents must first be activated, inflated, and made writable""" 

1349 try: 

1350 self._activateChain() 

1351 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1352 self.parent.validate() 

1353 self.inflateParentForCoalesce() 

1354 VDI._doCoalesce(self) 

1355 finally: 

1356 self.parent._loadInfoSizeVHD() 

1357 self.parent.deflate() 

1358 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1359 

1360 def _setParent(self, parent): 

1361 self._activate() 

1362 if self.lvReadonly: 

1363 self.sr.lvmCache.setReadonly(self.fileName, False) 

1364 

1365 try: 

1366 vhdutil.setParent(self.path, parent.path, parent.raw) 

1367 finally: 

1368 if self.lvReadonly: 

1369 self.sr.lvmCache.setReadonly(self.fileName, True) 

1370 self._deactivate() 

1371 self.parent = parent 

1372 self.parentUuid = parent.uuid 

1373 parent.children.append(self) 

1374 try: 

1375 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1376 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1377 (self.uuid, self.parentUuid)) 

1378 except: 

1379 Util.log("Failed to update the vhd-parent with %s for child %s" % \ 

1380 (self.parentUuid, self.uuid)) 

1381 

1382 def _activate(self): 

1383 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1384 

1385 def _activateChain(self): 

1386 vdi = self 

1387 while vdi: 

1388 vdi._activate() 

1389 vdi = vdi.parent 

1390 

1391 def _deactivate(self): 

1392 self.sr.lvActivator.deactivate(self.uuid, False) 

1393 

1394 def _increaseSizeVirt(self, size, atomic=True): 

1395 "ensure the virtual size of 'self' is at least 'size'" 

1396 self._activate() 

1397 if not self.raw: 

1398 VDI._increaseSizeVirt(self, size, atomic) 

1399 return 

1400 

1401 # raw VDI case 

1402 offset = self.sizeLV 

1403 if self.sizeVirt < size: 

1404 oldSize = self.sizeLV 

1405 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1406 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1407 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1408 offset = oldSize 

1409 unfinishedZero = False 

1410 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1411 if jval: 

1412 unfinishedZero = True 

1413 offset = int(jval) 

1414 length = self.sizeLV - offset 

1415 if not length: 

1416 return 

1417 

1418 if unfinishedZero: 

1419 Util.log(" ==> Redoing unfinished zeroing out") 

1420 else: 

1421 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1422 str(offset)) 

1423 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1424 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1425 func = lambda: util.zeroOut(self.path, offset, length) 

1426 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1427 VDI.POLL_INTERVAL, 0) 

1428 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1429 

1430 def _setSizeVirt(self, size): 

1431 """WARNING: do not call this method directly unless all VDIs in the 

1432 subtree are guaranteed to be unplugged (and remain so for the duration 

1433 of the operation): this operation is only safe for offline VHDs""" 

1434 self._activate() 

1435 jFile = lvhdutil.createVHDJournalLV(self.sr.lvmCache, self.uuid, 

1436 vhdutil.MAX_VHD_JOURNAL_SIZE) 

1437 try: 

1438 lvhdutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, 

1439 size, jFile) 

1440 finally: 

1441 lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid) 

1442 

1443 def _queryVHDBlocks(self): 

1444 self._activate() 

1445 return VDI._queryVHDBlocks(self) 

1446 

1447 def _calcExtraSpaceForCoalescing(self): 

1448 if self.parent.raw: 

1449 return 0 # raw parents are never deflated in the first place 

1450 sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData()) 

1451 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1452 return sizeCoalesced - self.parent.sizeLV 

1453 

1454 def _calcExtraSpaceForLeafCoalescing(self): 

1455 """How much extra space in the SR will be required to 

1456 [live-]leaf-coalesce this VDI""" 

1457 # we can deflate the leaf to minimize the space requirements 

1458 deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD()) 

1459 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1460 

1461 def _calcExtraSpaceForSnapshotCoalescing(self): 

1462 return self._calcExtraSpaceForCoalescing() + \ 

1463 lvhdutil.calcSizeLV(self.getSizeVHD()) 

1464 

1465 

1466class LinstorVDI(VDI): 

1467 """Object representing a VDI in a LINSTOR SR""" 

1468 

1469 MAX_SIZE = 2 * 1024 * 1024 * 1024 * 1024 # Max VHD size. 

1470 

1471 VOLUME_LOCK_TIMEOUT = 30 

1472 

1473 def load(self, info=None): 

1474 self.parentUuid = info.parentUuid 

1475 self.scanError = True 

1476 self.parent = None 

1477 self.children = [] 

1478 

1479 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1480 self.path = self.sr._linstor.build_device_path(self.fileName) 

1481 if not util.pathexists(self.path): 

1482 raise util.SMException( 

1483 '{} of {} not found' 

1484 .format(self.fileName, self.uuid) 

1485 ) 

1486 

1487 if not info: 

1488 try: 

1489 info = self.sr._vhdutil.get_vhd_info(self.uuid) 

1490 except util.SMException: 

1491 Util.log( 

1492 ' [VDI {}: failed to read VHD metadata]'.format(self.uuid) 

1493 ) 

1494 return 

1495 

1496 self.parentUuid = info.parentUuid 

1497 self.sizeVirt = info.sizeVirt 

1498 self._sizeVHD = info.sizePhys 

1499 self.hidden = info.hidden 

1500 self.scanError = False 

1501 

1502 def rename(self, uuid): 

1503 Util.log('Renaming {} -> {} (path={})'.format( 

1504 self.uuid, uuid, self.path 

1505 )) 

1506 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1507 VDI.rename(self, uuid) 

1508 

1509 def delete(self): 

1510 if len(self.children) > 0: 

1511 raise util.SMException( 

1512 'VDI {} has children, can\'t delete'.format(self.uuid) 

1513 ) 

1514 self.sr.lock() 

1515 try: 

1516 self.sr._linstor.destroy_volume(self.uuid) 

1517 self.sr.forgetVDI(self.uuid) 

1518 finally: 

1519 self.sr.unlock() 

1520 VDI.delete(self) 

1521 

1522 def validate(self, fast=False): 

1523 if not self.sr._vhdutil.check(self.uuid, fast=fast): 

1524 raise util.SMException('VHD {} corrupted'.format(self)) 

1525 

1526 def pause(self, failfast=False): 

1527 self.sr._linstor.ensure_volume_is_not_locked( 

1528 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1529 ) 

1530 return super(LinstorVDI, self).pause(failfast) 

1531 

1532 def coalesce(self): 

1533 self.sr._vhdutil.force_coalesce(self.path) 

1534 

1535 def getParent(self): 

1536 return self.sr._vhdutil.get_parent( 

1537 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1538 ) 

1539 

1540 def repair(self, parent_uuid): 

1541 self.sr._vhdutil.force_repair( 

1542 self.sr._linstor.get_device_path(parent_uuid) 

1543 ) 

1544 

1545 def _relinkSkip(self): 

1546 abortFlag = IPCFlag(self.sr.uuid) 

1547 for child in self.children: 

1548 if abortFlag.test(FLAG_TYPE_ABORT): 

1549 raise AbortException('Aborting due to signal') 

1550 Util.log( 

1551 ' Relinking {} from {} to {}'.format( 

1552 child, self, self.parent 

1553 ) 

1554 ) 

1555 

1556 session = child.sr.xapi.session 

1557 sr_uuid = child.sr.uuid 

1558 vdi_uuid = child.uuid 

1559 try: 

1560 self.sr._linstor.ensure_volume_is_not_locked( 

1561 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1562 ) 

1563 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1564 child._setParent(self.parent) 

1565 finally: 

1566 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1567 self.children = [] 

1568 

1569 def _setParent(self, parent): 

1570 self.sr._vhdutil.force_parent(self.path, parent.path) 

1571 self.parent = parent 

1572 self.parentUuid = parent.uuid 

1573 parent.children.append(self) 

1574 try: 

1575 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1576 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1577 (self.uuid, self.parentUuid)) 

1578 except: 

1579 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1580 (self.uuid, self.parentUuid)) 

1581 

1582 def _setHidden(self, hidden=True): 

1583 HIDDEN_TAG = 'hidden' 

1584 

1585 if self.raw: 

1586 self.sr._linstor.update_volume_metadata(self.uuid, { 

1587 HIDDEN_TAG: hidden 

1588 }) 

1589 self.hidden = hidden 

1590 else: 

1591 VDI._setHidden(self, hidden) 

1592 

1593 def _queryVHDBlocks(self): 

1594 return self.sr._vhdutil.get_block_bitmap(self.uuid) 

1595 

1596################################################################################ 

1597# 

1598# SR 

1599# 

1600class SR(object): 

1601 class LogFilter: 

1602 def __init__(self, sr): 

1603 self.sr = sr 

1604 self.stateLogged = False 

1605 self.prevState = {} 

1606 self.currState = {} 

1607 

1608 def logState(self): 

1609 changes = "" 

1610 self.currState.clear() 

1611 for vdi in self.sr.vdiTrees: 

1612 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

1613 if not self.prevState.get(vdi.uuid) or \ 

1614 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

1615 changes += self.currState[vdi.uuid] 

1616 

1617 for uuid in self.prevState: 

1618 if not self.currState.get(uuid): 

1619 changes += "Tree %s gone\n" % uuid 

1620 

1621 result = "SR %s (%d VDIs in %d VHD trees): " % \ 

1622 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

1623 

1624 if len(changes) > 0: 

1625 if self.stateLogged: 

1626 result += "showing only VHD trees that changed:" 

1627 result += "\n%s" % changes 

1628 else: 

1629 result += "no changes" 

1630 

1631 for line in result.split("\n"): 

1632 Util.log("%s" % line) 

1633 self.prevState.clear() 

1634 for key, val in self.currState.items(): 

1635 self.prevState[key] = val 

1636 self.stateLogged = True 

1637 

1638 def logNewVDI(self, uuid): 

1639 if self.stateLogged: 

1640 Util.log("Found new VDI when scanning: %s" % uuid) 

1641 

1642 def _getTreeStr(self, vdi, indent=8): 

1643 treeStr = "%s%s\n" % (" " * indent, vdi) 

1644 for child in vdi.children: 

1645 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

1646 return treeStr 

1647 

1648 TYPE_FILE = "file" 

1649 TYPE_LVHD = "lvhd" 

1650 TYPE_LINSTOR = "linstor" 

1651 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

1652 

1653 LOCK_RETRY_INTERVAL = 3 

1654 LOCK_RETRY_ATTEMPTS = 20 

1655 LOCK_RETRY_ATTEMPTS_LOCK = 100 

1656 

1657 SCAN_RETRY_ATTEMPTS = 3 

1658 

1659 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

1660 TMP_RENAME_PREFIX = "OLD_" 

1661 

1662 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

1663 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

1664 

1665 def getInstance(uuid, xapiSession, createLock=True, force=False): 

1666 xapi = XAPI(xapiSession, uuid) 

1667 type = normalizeType(xapi.srRecord["type"]) 

1668 if type == SR.TYPE_FILE: 

1669 return FileSR(uuid, xapi, createLock, force) 

1670 elif type == SR.TYPE_LVHD: 

1671 return LVHDSR(uuid, xapi, createLock, force) 

1672 elif type == SR.TYPE_LINSTOR: 

1673 return LinstorSR(uuid, xapi, createLock, force) 

1674 raise util.SMException("SR type %s not recognized" % type) 

1675 getInstance = staticmethod(getInstance) 

1676 

1677 def __init__(self, uuid, xapi, createLock, force): 

1678 self.logFilter = self.LogFilter(self) 

1679 self.uuid = uuid 

1680 self.path = "" 

1681 self.name = "" 

1682 self.vdis = {} 

1683 self.vdiTrees = [] 

1684 self.journaler = None 

1685 self.xapi = xapi 

1686 self._locked = 0 

1687 self._srLock = None 

1688 if createLock: 1688 ↛ 1689line 1688 didn't jump to line 1689, because the condition on line 1688 was never true

1689 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, self.uuid) 

1690 else: 

1691 Util.log("Requested no SR locking") 

1692 self.name = self.xapi.srRecord["name_label"] 

1693 self._failedCoalesceTargets = [] 

1694 

1695 if not self.xapi.isPluggedHere(): 1695 ↛ 1696line 1695 didn't jump to line 1696, because the condition on line 1695 was never true

1696 if force: 

1697 Util.log("SR %s not attached on this host, ignoring" % uuid) 

1698 else: 

1699 raise util.SMException("SR %s not attached on this host" % uuid) 

1700 

1701 if force: 1701 ↛ 1702line 1701 didn't jump to line 1702, because the condition on line 1701 was never true

1702 Util.log("Not checking if we are Master (SR %s)" % uuid) 

1703 elif not self.xapi.isMaster(): 1703 ↛ 1704line 1703 didn't jump to line 1704, because the condition on line 1703 was never true

1704 raise util.SMException("This host is NOT master, will not run") 

1705 

1706 def gcEnabled(self, refresh=True): 

1707 if refresh: 

1708 self.xapi.srRecord = \ 

1709 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

1710 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

1711 Util.log("GC is disabled for this SR, abort") 

1712 return False 

1713 return True 

1714 

1715 def scan(self, force=False): 

1716 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

1717 update VDI objects if they already exist""" 

1718 pass # abstract 

1719 

1720 def scanLocked(self, force=False): 

1721 self.lock() 

1722 try: 

1723 self.scan(force) 

1724 finally: 

1725 self.unlock() 

1726 

1727 def getVDI(self, uuid): 

1728 return self.vdis.get(uuid) 

1729 

1730 def hasWork(self): 

1731 if len(self.findGarbage()) > 0: 

1732 return True 

1733 if self.findCoalesceable(): 

1734 return True 

1735 if self.findLeafCoalesceable(): 

1736 return True 

1737 if self.needUpdateBlockInfo(): 

1738 return True 

1739 return False 

1740 

1741 def findCoalesceable(self): 

1742 """Find a coalesceable VDI. Return a vdi that should be coalesced 

1743 (choosing one among all coalesceable candidates according to some 

1744 criteria) or None if there is no VDI that could be coalesced""" 

1745 

1746 candidates = [] 

1747 

1748 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

1749 if srSwitch == "false": 

1750 Util.log("Coalesce disabled for this SR") 

1751 return candidates 

1752 

1753 # finish any VDI for which a relink journal entry exists first 

1754 journals = self.journaler.getAll(VDI.JRN_RELINK) 

1755 for uuid in journals: 

1756 vdi = self.getVDI(uuid) 

1757 if vdi and vdi not in self._failedCoalesceTargets: 

1758 return vdi 

1759 

1760 for vdi in self.vdis.values(): 

1761 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

1762 candidates.append(vdi) 

1763 Util.log("%s is coalescable" % vdi.uuid) 

1764 

1765 self.xapi.update_task_progress("coalescable", len(candidates)) 

1766 

1767 # pick one in the tallest tree 

1768 treeHeight = dict() 

1769 for c in candidates: 

1770 height = c.getTreeRoot().getTreeHeight() 

1771 if treeHeight.get(height): 

1772 treeHeight[height].append(c) 

1773 else: 

1774 treeHeight[height] = [c] 

1775 

1776 freeSpace = self.getFreeSpace() 

1777 heights = list(treeHeight.keys()) 

1778 heights.sort(reverse=True) 

1779 for h in heights: 

1780 for c in treeHeight[h]: 

1781 spaceNeeded = c._calcExtraSpaceForCoalescing() 

1782 if spaceNeeded <= freeSpace: 

1783 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

1784 return c 

1785 else: 

1786 Util.log("No space to coalesce %s (free space: %d)" % \ 

1787 (c, freeSpace)) 

1788 return None 

1789 

1790 def getSwitch(self, key): 

1791 return self.xapi.srRecord["other_config"].get(key) 

1792 

1793 def forbiddenBySwitch(self, switch, condition, fail_msg): 

1794 srSwitch = self.getSwitch(switch) 

1795 ret = False 

1796 if srSwitch: 

1797 ret = srSwitch == condition 

1798 

1799 if ret: 

1800 Util.log(fail_msg) 

1801 

1802 return ret 

1803 

1804 def leafCoalesceForbidden(self): 

1805 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

1806 "false", 

1807 "Coalesce disabled for this SR") or 

1808 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

1809 VDI.LEAFCLSC_DISABLED, 

1810 "Leaf-coalesce disabled for this SR")) 

1811 

1812 def findLeafCoalesceable(self): 

1813 """Find leaf-coalesceable VDIs in each VHD tree""" 

1814 

1815 candidates = [] 

1816 if self.leafCoalesceForbidden(): 

1817 return candidates 

1818 

1819 self.gatherLeafCoalesceable(candidates) 

1820 

1821 self.xapi.update_task_progress("coalescable", len(candidates)) 

1822 

1823 freeSpace = self.getFreeSpace() 

1824 for candidate in candidates: 

1825 # check the space constraints to see if leaf-coalesce is actually 

1826 # feasible for this candidate 

1827 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

1828 spaceNeededLive = spaceNeeded 

1829 if spaceNeeded > freeSpace: 

1830 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

1831 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

1832 spaceNeeded = spaceNeededLive 

1833 

1834 if spaceNeeded <= freeSpace: 

1835 Util.log("Leaf-coalesce candidate: %s" % candidate) 

1836 return candidate 

1837 else: 

1838 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

1839 (candidate, freeSpace)) 

1840 if spaceNeededLive <= freeSpace: 

1841 Util.log("...but enough space if skip snap-coalesce") 

1842 candidate.setConfig(VDI.DB_LEAFCLSC, 

1843 VDI.LEAFCLSC_OFFLINE) 

1844 

1845 return None 

1846 

1847 def gatherLeafCoalesceable(self, candidates): 

1848 for vdi in self.vdis.values(): 

1849 if not vdi.isLeafCoalesceable(): 

1850 continue 

1851 if vdi in self._failedCoalesceTargets: 

1852 continue 

1853 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

1854 Util.log("Skipping reset-on-boot %s" % vdi) 

1855 continue 

1856 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

1857 Util.log("Skipping allow_caching=true %s" % vdi) 

1858 continue 

1859 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

1860 Util.log("Leaf-coalesce disabled for %s" % vdi) 

1861 continue 

1862 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

1863 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

1864 continue 

1865 candidates.append(vdi) 

1866 

1867 def coalesce(self, vdi, dryRun=False): 

1868 """Coalesce vdi onto parent""" 

1869 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

1870 if dryRun: 1870 ↛ 1871line 1870 didn't jump to line 1871, because the condition on line 1870 was never true

1871 return 

1872 

1873 try: 

1874 self._coalesce(vdi) 

1875 except util.SMException as e: 

1876 if isinstance(e, AbortException): 1876 ↛ 1877line 1876 didn't jump to line 1877, because the condition on line 1876 was never true

1877 self.cleanup() 

1878 raise 

1879 else: 

1880 self._failedCoalesceTargets.append(vdi) 

1881 Util.logException("coalesce") 

1882 Util.log("Coalesce failed, skipping") 

1883 self.cleanup() 

1884 

1885 def coalesceLeaf(self, vdi, dryRun=False): 

1886 """Leaf-coalesce vdi onto parent""" 

1887 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

1888 if dryRun: 

1889 return 

1890 

1891 try: 

1892 uuid = vdi.uuid 

1893 try: 

1894 # "vdi" object will no longer be valid after this call 

1895 self._coalesceLeaf(vdi) 

1896 finally: 

1897 vdi = self.getVDI(uuid) 

1898 if vdi: 

1899 vdi.delConfig(vdi.DB_LEAFCLSC) 

1900 except AbortException: 

1901 self.cleanup() 

1902 raise 

1903 except (util.SMException, XenAPI.Failure) as e: 

1904 self._failedCoalesceTargets.append(vdi) 

1905 Util.logException("leaf-coalesce") 

1906 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

1907 self.cleanup() 

1908 

1909 def garbageCollect(self, dryRun=False): 

1910 vdiList = self.findGarbage() 

1911 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

1912 for vdi in vdiList: 

1913 Util.log(" %s" % vdi) 

1914 if not dryRun: 

1915 self.deleteVDIs(vdiList) 

1916 self.cleanupJournals(dryRun) 

1917 

1918 def findGarbage(self): 

1919 vdiList = [] 

1920 for vdi in self.vdiTrees: 

1921 vdiList.extend(vdi.getAllPrunable()) 

1922 return vdiList 

1923 

1924 def deleteVDIs(self, vdiList): 

1925 for vdi in vdiList: 

1926 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

1927 raise AbortException("Aborting due to signal") 

1928 Util.log("Deleting unlinked VDI %s" % vdi) 

1929 self.deleteVDI(vdi) 

1930 

1931 def deleteVDI(self, vdi): 

1932 assert(len(vdi.children) == 0) 

1933 del self.vdis[vdi.uuid] 

1934 if vdi.parent: 1934 ↛ 1936line 1934 didn't jump to line 1936, because the condition on line 1934 was never false

1935 vdi.parent.children.remove(vdi) 

1936 if vdi in self.vdiTrees: 1936 ↛ 1937line 1936 didn't jump to line 1937, because the condition on line 1936 was never true

1937 self.vdiTrees.remove(vdi) 

1938 vdi.delete() 

1939 

1940 def forgetVDI(self, vdiUuid): 

1941 self.xapi.forgetVDI(self.uuid, vdiUuid) 

1942 

1943 def pauseVDIs(self, vdiList): 

1944 paused = [] 

1945 failed = False 

1946 for vdi in vdiList: 

1947 try: 

1948 vdi.pause() 

1949 paused.append(vdi) 

1950 except: 

1951 Util.logException("pauseVDIs") 

1952 failed = True 

1953 break 

1954 

1955 if failed: 

1956 self.unpauseVDIs(paused) 

1957 raise util.SMException("Failed to pause VDIs") 

1958 

1959 def unpauseVDIs(self, vdiList): 

1960 failed = False 

1961 for vdi in vdiList: 

1962 try: 

1963 vdi.unpause() 

1964 except: 

1965 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

1966 failed = True 

1967 if failed: 

1968 raise util.SMException("Failed to unpause VDIs") 

1969 

1970 def getFreeSpace(self): 

1971 return 0 

1972 

1973 def cleanup(self): 

1974 Util.log("In cleanup") 

1975 return 

1976 

1977 def __str__(self): 

1978 if self.name: 

1979 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

1980 else: 

1981 ret = "%s" % self.uuid 

1982 return ret 

1983 

1984 def lock(self): 

1985 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

1986 signal to avoid deadlocking (trying to acquire the SR lock while the 

1987 lock is held by a process that is trying to abort us)""" 

1988 if not self._srLock: 

1989 return 

1990 

1991 if self._locked == 0: 

1992 abortFlag = IPCFlag(self.uuid) 

1993 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

1994 if self._srLock.acquireNoblock(): 

1995 self._locked += 1 

1996 return 

1997 if abortFlag.test(FLAG_TYPE_ABORT): 

1998 raise AbortException("Abort requested") 

1999 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2000 raise util.SMException("Unable to acquire the SR lock") 

2001 

2002 self._locked += 1 

2003 

2004 def unlock(self): 

2005 if not self._srLock: 2005 ↛ 2007line 2005 didn't jump to line 2007, because the condition on line 2005 was never false

2006 return 

2007 assert(self._locked > 0) 

2008 self._locked -= 1 

2009 if self._locked == 0: 

2010 self._srLock.release() 

2011 

2012 def needUpdateBlockInfo(self): 

2013 for vdi in self.vdis.values(): 

2014 if vdi.scanError or len(vdi.children) == 0: 

2015 continue 

2016 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2017 return True 

2018 return False 

2019 

2020 def updateBlockInfo(self): 

2021 for vdi in self.vdis.values(): 

2022 if vdi.scanError or len(vdi.children) == 0: 

2023 continue 

2024 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2025 vdi.updateBlockInfo() 

2026 

2027 def cleanupCoalesceJournals(self): 

2028 """Remove stale coalesce VDI indicators""" 

2029 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2030 for uuid, jval in entries.items(): 

2031 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2032 

2033 def cleanupJournals(self, dryRun=False): 

2034 """delete journal entries for non-existing VDIs""" 

2035 for t in [LVHDVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2036 entries = self.journaler.getAll(t) 

2037 for uuid, jval in entries.items(): 

2038 if self.getVDI(uuid): 

2039 continue 

2040 if t == SR.JRN_CLONE: 

2041 baseUuid, clonUuid = jval.split("_") 

2042 if self.getVDI(baseUuid): 

2043 continue 

2044 Util.log(" Deleting stale '%s' journal entry for %s " 

2045 "(%s)" % (t, uuid, jval)) 

2046 if not dryRun: 

2047 self.journaler.remove(t, uuid) 

2048 

2049 def cleanupCache(self, maxAge=-1): 

2050 return 0 

2051 

2052 def _coalesce(self, vdi): 

2053 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2053 ↛ 2056line 2053 didn't jump to line 2056, because the condition on line 2053 was never true

2054 # this means we had done the actual coalescing already and just 

2055 # need to finish relinking and/or refreshing the children 

2056 Util.log("==> Coalesce apparently already done: skipping") 

2057 else: 

2058 # JRN_COALESCE is used to check which VDI is being coalesced in 

2059 # order to decide whether to abort the coalesce. We remove the 

2060 # journal as soon as the VHD coalesce step is done, because we 

2061 # don't expect the rest of the process to take long 

2062 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2063 vdi._doCoalesce() 

2064 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2065 

2066 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2067 

2068 # we now need to relink the children: lock the SR to prevent ops 

2069 # like SM.clone from manipulating the VDIs we'll be relinking and 

2070 # rescan the SR first in case the children changed since the last 

2071 # scan 

2072 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2073 

2074 self.lock() 

2075 try: 

2076 vdi.parent._tagChildrenForRelink() 

2077 self.scan() 

2078 vdi._relinkSkip() 

2079 finally: 

2080 self.unlock() 

2081 # Reload the children to leave things consistent 

2082 vdi.parent._reloadChildren(vdi) 

2083 

2084 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2085 self.deleteVDI(vdi) 

2086 

2087 class CoalesceTracker: 

2088 GRACE_ITERATIONS = 1 

2089 MAX_ITERATIONS_NO_PROGRESS = 3 

2090 MAX_ITERATIONS = 10 

2091 MAX_INCREASE_FROM_MINIMUM = 1.2 

2092 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2093 " --> Final size {finSize}" 

2094 

2095 def __init__(self, sr): 

2096 self.itsNoProgress = 0 

2097 self.its = 0 

2098 self.minSize = float("inf") 

2099 self.history = [] 

2100 self.reason = "" 

2101 self.startSize = None 

2102 self.finishSize = None 

2103 self.sr = sr 

2104 

2105 def abortCoalesce(self, prevSize, curSize): 

2106 res = False 

2107 

2108 self.its += 1 

2109 self.history.append(self.HISTORY_STRING.format(its=self.its, 

2110 initSize=prevSize, 

2111 finSize=curSize)) 

2112 

2113 self.finishSize = curSize 

2114 

2115 if self.startSize is None: 

2116 self.startSize = prevSize 

2117 

2118 if curSize < self.minSize: 

2119 self.minSize = curSize 

2120 

2121 if prevSize < self.minSize: 

2122 self.minSize = prevSize 

2123 

2124 if prevSize < curSize: 

2125 self.itsNoProgress += 1 

2126 Util.log("No progress, attempt:" 

2127 " {attempt}".format(attempt=self.itsNoProgress)) 

2128 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2129 

2130 if (not res) and (self.its > self.MAX_ITERATIONS): 

2131 max = self.MAX_ITERATIONS 

2132 self.reason = \ 

2133 "Max iterations ({max}) exceeded".format(max=max) 

2134 res = True 

2135 

2136 if (not res) and (self.itsNoProgress > 

2137 self.MAX_ITERATIONS_NO_PROGRESS): 

2138 max = self.MAX_ITERATIONS_NO_PROGRESS 

2139 self.reason = \ 

2140 "No progress made for {max} iterations".format(max=max) 

2141 res = True 

2142 

2143 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2144 if (self.its > self.GRACE_ITERATIONS and 

2145 (not res) and (curSize > maxSizeFromMin)): 

2146 self.reason = "Unexpected bump in size," \ 

2147 " compared to minimum acheived" 

2148 res = True 

2149 

2150 return res 

2151 

2152 def printReasoning(self): 

2153 Util.log("Aborted coalesce") 

2154 for hist in self.history: 

2155 Util.log(hist) 

2156 Util.log(self.reason) 

2157 Util.log("Starting size was {size}" 

2158 .format(size=self.startSize)) 

2159 Util.log("Final size was {size}" 

2160 .format(size=self.finishSize)) 

2161 Util.log("Minimum size acheived was {size}" 

2162 .format(size=self.minSize)) 

2163 

2164 def _coalesceLeaf(self, vdi): 

2165 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2166 complete due to external changes, namely vdi_delete and vdi_snapshot 

2167 that alter leaf-coalescibility of vdi""" 

2168 tracker = self.CoalesceTracker(self) 

2169 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2170 prevSizeVHD = vdi.getSizeVHD() 

2171 if not self._snapshotCoalesce(vdi): 2171 ↛ 2172line 2171 didn't jump to line 2172, because the condition on line 2171 was never true

2172 return False 

2173 if tracker.abortCoalesce(prevSizeVHD, vdi.getSizeVHD()): 

2174 tracker.printReasoning() 

2175 raise util.SMException("VDI {uuid} could not be coalesced" 

2176 .format(uuid=vdi.uuid)) 

2177 return self._liveLeafCoalesce(vdi) 

2178 

2179 def calcStorageSpeed(self, startTime, endTime, vhdSize): 

2180 speed = None 

2181 total_time = endTime - startTime 

2182 if total_time > 0: 

2183 speed = float(vhdSize) / float(total_time) 

2184 return speed 

2185 

2186 def writeSpeedToFile(self, speed): 

2187 content = [] 

2188 speedFile = None 

2189 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2190 self.lock() 

2191 try: 

2192 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2193 lines = "" 

2194 if not os.path.isfile(path): 

2195 lines = str(speed) + "\n" 

2196 else: 

2197 speedFile = open(path, "r+") 

2198 content = speedFile.readlines() 

2199 content.append(str(speed) + "\n") 

2200 if len(content) > N_RUNNING_AVERAGE: 

2201 del content[0] 

2202 lines = "".join(content) 

2203 

2204 util.atomicFileWrite(path, VAR_RUN, lines) 

2205 finally: 

2206 if speedFile is not None: 

2207 speedFile.close() 

2208 Util.log("Closing file: {myfile}".format(myfile=path)) 

2209 self.unlock() 

2210 

2211 def recordStorageSpeed(self, startTime, endTime, vhdSize): 

2212 speed = self.calcStorageSpeed(startTime, endTime, vhdSize) 

2213 if speed is None: 

2214 return 

2215 

2216 self.writeSpeedToFile(speed) 

2217 

2218 def getStorageSpeed(self): 

2219 speedFile = None 

2220 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2221 self.lock() 

2222 try: 

2223 speed = None 

2224 if os.path.isfile(path): 

2225 speedFile = open(path) 

2226 content = speedFile.readlines() 

2227 try: 

2228 content = [float(i) for i in content] 

2229 except ValueError: 

2230 Util.log("Something bad in the speed log:{log}". 

2231 format(log=speedFile.readlines())) 

2232 return speed 

2233 

2234 if len(content): 

2235 speed = sum(content) / float(len(content)) 

2236 if speed <= 0: 2236 ↛ 2238line 2236 didn't jump to line 2238, because the condition on line 2236 was never true

2237 # Defensive, should be impossible. 

2238 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2239 format(speed=speed, uuid=self.uuid)) 

2240 speed = None 

2241 else: 

2242 Util.log("Speed file empty for SR: {uuid}". 

2243 format(uuid=self.uuid)) 

2244 else: 

2245 Util.log("Speed log missing for SR: {uuid}". 

2246 format(uuid=self.uuid)) 

2247 return speed 

2248 finally: 

2249 if not (speedFile is None): 

2250 speedFile.close() 

2251 self.unlock() 

2252 

2253 def _snapshotCoalesce(self, vdi): 

2254 # Note that because we are not holding any locks here, concurrent SM 

2255 # operations may change this tree under our feet. In particular, vdi 

2256 # can be deleted, or it can be snapshotted. 

2257 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2258 Util.log("Single-snapshotting %s" % vdi) 

2259 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2260 try: 

2261 ret = self.xapi.singleSnapshotVDI(vdi) 

2262 Util.log("Single-snapshot returned: %s" % ret) 

2263 except XenAPI.Failure as e: 

2264 if util.isInvalidVDI(e): 

2265 Util.log("The VDI appears to have been concurrently deleted") 

2266 return False 

2267 raise 

2268 self.scanLocked() 

2269 tempSnap = vdi.parent 

2270 if not tempSnap.isCoalesceable(): 

2271 Util.log("The VDI appears to have been concurrently snapshotted") 

2272 return False 

2273 Util.log("Coalescing parent %s" % tempSnap) 

2274 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2275 vhdSize = vdi.getSizeVHD() 

2276 self._coalesce(tempSnap) 

2277 if not vdi.isLeafCoalesceable(): 

2278 Util.log("The VDI tree appears to have been altered since") 

2279 return False 

2280 return True 

2281 

2282 def _liveLeafCoalesce(self, vdi): 

2283 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2284 self.lock() 

2285 try: 

2286 self.scan() 

2287 if not self.getVDI(vdi.uuid): 

2288 Util.log("The VDI appears to have been deleted meanwhile") 

2289 return False 

2290 if not vdi.isLeafCoalesceable(): 

2291 Util.log("The VDI is no longer leaf-coalesceable") 

2292 return False 

2293 

2294 uuid = vdi.uuid 

2295 vdi.pause(failfast=True) 

2296 try: 

2297 try: 

2298 # "vdi" object will no longer be valid after this call 

2299 self._doCoalesceLeaf(vdi) 

2300 except: 

2301 Util.logException("_doCoalesceLeaf") 

2302 self._handleInterruptedCoalesceLeaf() 

2303 raise 

2304 finally: 

2305 vdi = self.getVDI(uuid) 

2306 if vdi: 

2307 vdi.ensureUnpaused() 

2308 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2309 if vdiOld: 

2310 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2311 self.deleteVDI(vdiOld) 

2312 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2313 finally: 

2314 self.cleanup() 

2315 self.unlock() 

2316 self.logFilter.logState() 

2317 return True 

2318 

2319 def _doCoalesceLeaf(self, vdi): 

2320 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2321 offline/atomic context""" 

2322 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2323 self._prepareCoalesceLeaf(vdi) 

2324 vdi.parent._setHidden(False) 

2325 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2326 vdi.validate(True) 

2327 vdi.parent.validate(True) 

2328 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2329 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2330 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 

2331 Util.log("Leaf-coalesce forced, will not use timeout") 

2332 timeout = 0 

2333 vdi._coalesceVHD(timeout) 

2334 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2335 vdi.parent.validate(True) 

2336 #vdi._verifyContents(timeout / 2) 

2337 

2338 # rename 

2339 vdiUuid = vdi.uuid 

2340 oldName = vdi.fileName 

2341 origParentUuid = vdi.parent.uuid 

2342 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2343 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2344 vdi.parent.rename(vdiUuid) 

2345 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2346 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2347 

2348 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2349 # garbage 

2350 

2351 # update the VDI record 

2352 vdi.parent.delConfig(VDI.DB_VHD_PARENT) 

2353 if vdi.parent.raw: 

2354 vdi.parent.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_RAW) 

2355 vdi.parent.delConfig(VDI.DB_VHD_BLOCKS) 

2356 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2357 

2358 self._updateNode(vdi) 

2359 

2360 # delete the obsolete leaf & inflate the parent (in that order, to 

2361 # minimize free space requirements) 

2362 parent = vdi.parent 

2363 vdi._setHidden(True) 

2364 vdi.parent.children = [] 

2365 vdi.parent = None 

2366 

2367 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2368 freeSpace = self.getFreeSpace() 

2369 if freeSpace < extraSpace: 

2370 # don't delete unless we need the space: deletion is time-consuming 

2371 # because it requires contacting the slaves, and we're paused here 

2372 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2373 self.deleteVDI(vdi) 

2374 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2375 

2376 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2377 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2378 

2379 self.forgetVDI(origParentUuid) 

2380 self._finishCoalesceLeaf(parent) 

2381 self._updateSlavesOnResize(parent) 

2382 

2383 def _calcExtraSpaceNeeded(self, child, parent): 

2384 assert(not parent.raw) # raw parents not supported 

2385 extra = child.getSizeVHD() - parent.getSizeVHD() 

2386 if extra < 0: 

2387 extra = 0 

2388 return extra 

2389 

2390 def _prepareCoalesceLeaf(self, vdi): 

2391 pass 

2392 

2393 def _updateNode(self, vdi): 

2394 pass 

2395 

2396 def _finishCoalesceLeaf(self, parent): 

2397 pass 

2398 

2399 def _updateSlavesOnUndoLeafCoalesce(self, parent, child): 

2400 pass 

2401 

2402 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid): 

2403 pass 

2404 

2405 def _updateSlavesOnResize(self, vdi): 

2406 pass 

2407 

2408 def _removeStaleVDIs(self, uuidsPresent): 

2409 for uuid in list(self.vdis.keys()): 

2410 if not uuid in uuidsPresent: 

2411 Util.log("VDI %s disappeared since last scan" % \ 

2412 self.vdis[uuid]) 

2413 del self.vdis[uuid] 

2414 

2415 def _handleInterruptedCoalesceLeaf(self): 

2416 """An interrupted leaf-coalesce operation may leave the VHD tree in an 

2417 inconsistent state. If the old-leaf VDI is still present, we revert the 

2418 operation (in case the original error is persistent); otherwise we must 

2419 finish the operation""" 

2420 # abstract 

2421 pass 

2422 

2423 def _buildTree(self, force): 

2424 self.vdiTrees = [] 

2425 for vdi in self.vdis.values(): 

2426 if vdi.parentUuid: 

2427 parent = self.getVDI(vdi.parentUuid) 

2428 if not parent: 

2429 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2430 self.vdiTrees.append(vdi) 

2431 continue 

2432 if force: 

2433 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2434 (vdi.parentUuid, vdi.uuid)) 

2435 self.vdiTrees.append(vdi) 

2436 continue 

2437 else: 

2438 raise util.SMException("Parent VDI %s of %s not " \ 

2439 "found" % (vdi.parentUuid, vdi.uuid)) 

2440 vdi.parent = parent 

2441 parent.children.append(vdi) 

2442 else: 

2443 self.vdiTrees.append(vdi) 

2444 

2445 

2446class FileSR(SR): 

2447 TYPE = SR.TYPE_FILE 

2448 CACHE_FILE_EXT = ".vhdcache" 

2449 # cache cleanup actions 

2450 CACHE_ACTION_KEEP = 0 

2451 CACHE_ACTION_REMOVE = 1 

2452 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

2453 

2454 def __init__(self, uuid, xapi, createLock, force): 

2455 SR.__init__(self, uuid, xapi, createLock, force) 

2456 self.path = "/var/run/sr-mount/%s" % self.uuid 

2457 self.journaler = fjournaler.Journaler(self.path) 

2458 

2459 def scan(self, force=False): 

2460 if not util.pathexists(self.path): 

2461 raise util.SMException("directory %s not found!" % self.uuid) 

2462 vhds = self._scan(force) 

2463 for uuid, vhdInfo in vhds.items(): 

2464 vdi = self.getVDI(uuid) 

2465 if not vdi: 

2466 self.logFilter.logNewVDI(uuid) 

2467 vdi = FileVDI(self, uuid, False) 

2468 self.vdis[uuid] = vdi 

2469 vdi.load(vhdInfo) 

2470 uuidsPresent = list(vhds.keys()) 

2471 rawList = [x for x in os.listdir(self.path) if x.endswith(vhdutil.FILE_EXTN_RAW)] 

2472 for rawName in rawList: 

2473 uuid = FileVDI.extractUuid(rawName) 

2474 uuidsPresent.append(uuid) 

2475 vdi = self.getVDI(uuid) 

2476 if not vdi: 

2477 self.logFilter.logNewVDI(uuid) 

2478 vdi = FileVDI(self, uuid, True) 

2479 self.vdis[uuid] = vdi 

2480 self._removeStaleVDIs(uuidsPresent) 

2481 self._buildTree(force) 

2482 self.logFilter.logState() 

2483 self._handleInterruptedCoalesceLeaf() 

2484 

2485 def getFreeSpace(self): 

2486 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

2487 

2488 def deleteVDIs(self, vdiList): 

2489 rootDeleted = False 

2490 for vdi in vdiList: 

2491 if not vdi.parent: 

2492 rootDeleted = True 

2493 break 

2494 SR.deleteVDIs(self, vdiList) 

2495 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

2496 self.xapi.markCacheSRsDirty() 

2497 

2498 def cleanupCache(self, maxAge=-1): 

2499 """Clean up IntelliCache cache files. Caches for leaf nodes are 

2500 removed when the leaf node no longer exists or its allow-caching 

2501 attribute is not set. Caches for parent nodes are removed when the 

2502 parent node no longer exists or it hasn't been used in more than 

2503 <maxAge> hours. 

2504 Return number of caches removed. 

2505 """ 

2506 numRemoved = 0 

2507 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

2508 Util.log("Found %d cache files" % len(cacheFiles)) 

2509 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

2510 for cacheFile in cacheFiles: 

2511 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

2512 action = self.CACHE_ACTION_KEEP 

2513 rec = self.xapi.getRecordVDI(uuid) 

2514 if not rec: 

2515 Util.log("Cache %s: VDI doesn't exist" % uuid) 

2516 action = self.CACHE_ACTION_REMOVE 

2517 elif rec["managed"] and not rec["allow_caching"]: 

2518 Util.log("Cache %s: caching disabled" % uuid) 

2519 action = self.CACHE_ACTION_REMOVE 

2520 elif not rec["managed"] and maxAge >= 0: 

2521 lastAccess = datetime.datetime.fromtimestamp( \ 

2522 os.path.getatime(os.path.join(self.path, cacheFile))) 

2523 if lastAccess < cutoff: 

2524 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

2525 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

2526 

2527 if action == self.CACHE_ACTION_KEEP: 

2528 Util.log("Keeping cache %s" % uuid) 

2529 continue 

2530 

2531 lockId = uuid 

2532 parentUuid = None 

2533 if rec and rec["managed"]: 

2534 parentUuid = rec["sm_config"].get("vhd-parent") 

2535 if parentUuid: 

2536 lockId = parentUuid 

2537 

2538 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

2539 cacheLock.acquire() 

2540 try: 

2541 if self._cleanupCache(uuid, action): 

2542 numRemoved += 1 

2543 finally: 

2544 cacheLock.release() 

2545 return numRemoved 

2546 

2547 def _cleanupCache(self, uuid, action): 

2548 assert(action != self.CACHE_ACTION_KEEP) 

2549 rec = self.xapi.getRecordVDI(uuid) 

2550 if rec and rec["allow_caching"]: 

2551 Util.log("Cache %s appears to have become valid" % uuid) 

2552 return False 

2553 

2554 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

2555 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

2556 if tapdisk: 

2557 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

2558 Util.log("Cache %s still in use" % uuid) 

2559 return False 

2560 Util.log("Shutting down tapdisk for %s" % fullPath) 

2561 tapdisk.shutdown() 

2562 

2563 Util.log("Deleting file %s" % fullPath) 

2564 os.unlink(fullPath) 

2565 return True 

2566 

2567 def _isCacheFileName(self, name): 

2568 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

2569 name.endswith(self.CACHE_FILE_EXT) 

2570 

2571 def _scan(self, force): 

2572 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2573 error = False 

2574 pattern = os.path.join(self.path, "*%s" % vhdutil.FILE_EXTN_VHD) 

2575 vhds = vhdutil.getAllVHDs(pattern, FileVDI.extractUuid) 

2576 for uuid, vhdInfo in vhds.items(): 

2577 if vhdInfo.error: 

2578 error = True 

2579 break 

2580 if not error: 

2581 return vhds 

2582 Util.log("Scan error on attempt %d" % i) 

2583 if force: 

2584 return vhds 

2585 raise util.SMException("Scan error") 

2586 

2587 def deleteVDI(self, vdi): 

2588 self._checkSlaves(vdi) 

2589 SR.deleteVDI(self, vdi) 

2590 

2591 def _checkSlaves(self, vdi): 

2592 onlineHosts = self.xapi.getOnlineHosts() 

2593 abortFlag = IPCFlag(self.uuid) 

2594 for pbdRecord in self.xapi.getAttachedPBDs(): 

2595 hostRef = pbdRecord["host"] 

2596 if hostRef == self.xapi._hostRef: 

2597 continue 

2598 if abortFlag.test(FLAG_TYPE_ABORT): 

2599 raise AbortException("Aborting due to signal") 

2600 try: 

2601 self._checkSlave(hostRef, vdi) 

2602 except util.CommandException: 

2603 if hostRef in onlineHosts: 

2604 raise 

2605 

2606 def _checkSlave(self, hostRef, vdi): 

2607 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

2608 Util.log("Checking with slave: %s" % repr(call)) 

2609 _host = self.xapi.session.xenapi.host 

2610 text = _host.call_plugin( * call) 

2611 

2612 def _handleInterruptedCoalesceLeaf(self): 

2613 entries = self.journaler.getAll(VDI.JRN_LEAF) 

2614 for uuid, parentUuid in entries.items(): 

2615 fileList = os.listdir(self.path) 

2616 childName = uuid + vhdutil.FILE_EXTN_VHD 

2617 tmpChildName = self.TMP_RENAME_PREFIX + uuid + vhdutil.FILE_EXTN_VHD 

2618 parentName1 = parentUuid + vhdutil.FILE_EXTN_VHD 

2619 parentName2 = parentUuid + vhdutil.FILE_EXTN_RAW 

2620 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

2621 if parentPresent or tmpChildName in fileList: 

2622 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

2623 else: 

2624 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

2625 self.journaler.remove(VDI.JRN_LEAF, uuid) 

2626 vdi = self.getVDI(uuid) 

2627 if vdi: 

2628 vdi.ensureUnpaused() 

2629 

2630 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2631 Util.log("*** UNDO LEAF-COALESCE") 

2632 parent = self.getVDI(parentUuid) 

2633 if not parent: 

2634 parent = self.getVDI(childUuid) 

2635 if not parent: 

2636 raise util.SMException("Neither %s nor %s found" % \ 

2637 (parentUuid, childUuid)) 

2638 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

2639 parent.rename(parentUuid) 

2640 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

2641 

2642 child = self.getVDI(childUuid) 

2643 if not child: 

2644 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

2645 if not child: 

2646 raise util.SMException("Neither %s nor %s found" % \ 

2647 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

2648 Util.log("Renaming child back to %s" % childUuid) 

2649 child.rename(childUuid) 

2650 Util.log("Updating the VDI record") 

2651 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

2652 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

2653 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

2654 

2655 if child.hidden: 

2656 child._setHidden(False) 

2657 if not parent.hidden: 

2658 parent._setHidden(True) 

2659 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

2660 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

2661 Util.log("*** leaf-coalesce undo successful") 

2662 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

2663 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

2664 

2665 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2666 Util.log("*** FINISH LEAF-COALESCE") 

2667 vdi = self.getVDI(childUuid) 

2668 if not vdi: 

2669 raise util.SMException("VDI %s not found" % childUuid) 

2670 try: 

2671 self.forgetVDI(parentUuid) 

2672 except XenAPI.Failure: 

2673 pass 

2674 self._updateSlavesOnResize(vdi) 

2675 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

2676 Util.log("*** finished leaf-coalesce successfully") 

2677 

2678 

2679class LVHDSR(SR): 

2680 TYPE = SR.TYPE_LVHD 

2681 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

2682 

2683 def __init__(self, uuid, xapi, createLock, force): 

2684 SR.__init__(self, uuid, xapi, createLock, force) 

2685 self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid) 

2686 self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName) 

2687 self.lvmCache = lvmcache.LVMCache(self.vgName) 

2688 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

2689 self.journaler = journaler.Journaler(self.lvmCache) 

2690 

2691 def deleteVDI(self, vdi): 

2692 if self.lvActivator.get(vdi.uuid, False): 

2693 self.lvActivator.deactivate(vdi.uuid, False) 

2694 self._checkSlaves(vdi) 

2695 SR.deleteVDI(self, vdi) 

2696 

2697 def forgetVDI(self, vdiUuid): 

2698 SR.forgetVDI(self, vdiUuid) 

2699 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

2700 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

2701 

2702 def getFreeSpace(self): 

2703 stats = lvutil._getVGstats(self.vgName) 

2704 return stats['physical_size'] - stats['physical_utilisation'] 

2705 

2706 def cleanup(self): 

2707 if not self.lvActivator.deactivateAll(): 

2708 Util.log("ERROR deactivating LVs while cleaning up") 

2709 

2710 def needUpdateBlockInfo(self): 

2711 for vdi in self.vdis.values(): 

2712 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

2713 continue 

2714 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2715 return True 

2716 return False 

2717 

2718 def updateBlockInfo(self): 

2719 numUpdated = 0 

2720 for vdi in self.vdis.values(): 

2721 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

2722 continue 

2723 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2724 vdi.updateBlockInfo() 

2725 numUpdated += 1 

2726 if numUpdated: 

2727 # deactivate the LVs back sooner rather than later. If we don't 

2728 # now, by the time this thread gets to deactivations, another one 

2729 # might have leaf-coalesced a node and deleted it, making the child 

2730 # inherit the refcount value and preventing the correct decrement 

2731 self.cleanup() 

2732 

2733 def scan(self, force=False): 

2734 vdis = self._scan(force) 

2735 for uuid, vdiInfo in vdis.items(): 

2736 vdi = self.getVDI(uuid) 

2737 if not vdi: 

2738 self.logFilter.logNewVDI(uuid) 

2739 vdi = LVHDVDI(self, uuid, 

2740 vdiInfo.vdiType == vhdutil.VDI_TYPE_RAW) 

2741 self.vdis[uuid] = vdi 

2742 vdi.load(vdiInfo) 

2743 self._removeStaleVDIs(vdis.keys()) 

2744 self._buildTree(force) 

2745 self.logFilter.logState() 

2746 self._handleInterruptedCoalesceLeaf() 

2747 

2748 def _scan(self, force): 

2749 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2750 error = False 

2751 self.lvmCache.refresh() 

2752 vdis = lvhdutil.getVDIInfo(self.lvmCache) 

2753 for uuid, vdiInfo in vdis.items(): 

2754 if vdiInfo.scanError: 

2755 error = True 

2756 break 

2757 if not error: 

2758 return vdis 

2759 Util.log("Scan error, retrying (%d)" % i) 

2760 if force: 

2761 return vdis 

2762 raise util.SMException("Scan error") 

2763 

2764 def _removeStaleVDIs(self, uuidsPresent): 

2765 for uuid in list(self.vdis.keys()): 

2766 if not uuid in uuidsPresent: 

2767 Util.log("VDI %s disappeared since last scan" % \ 

2768 self.vdis[uuid]) 

2769 del self.vdis[uuid] 

2770 if self.lvActivator.get(uuid, False): 

2771 self.lvActivator.remove(uuid, False) 

2772 

2773 def _liveLeafCoalesce(self, vdi): 

2774 """If the parent is raw and the child was resized (virt. size), then 

2775 we'll need to resize the parent, which can take a while due to zeroing 

2776 out of the extended portion of the LV. Do it before pausing the child 

2777 to avoid a protracted downtime""" 

2778 if vdi.parent.raw and vdi.sizeVirt > vdi.parent.sizeVirt: 

2779 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

2780 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

2781 

2782 return SR._liveLeafCoalesce(self, vdi) 

2783 

2784 def _prepareCoalesceLeaf(self, vdi): 

2785 vdi._activateChain() 

2786 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

2787 vdi.deflate() 

2788 vdi.inflateParentForCoalesce() 

2789 

2790 def _updateNode(self, vdi): 

2791 # fix the refcounts: the remaining node should inherit the binary 

2792 # refcount from the leaf (because if it was online, it should remain 

2793 # refcounted as such), but the normal refcount from the parent (because 

2794 # this node is really the parent node) - minus 1 if it is online (since 

2795 # non-leaf nodes increment their normal counts when they are online and 

2796 # we are now a leaf, storing that 1 in the binary refcount). 

2797 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

2798 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

2799 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

2800 pCnt = pCnt - cBcnt 

2801 assert(pCnt >= 0) 

2802 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

2803 

2804 def _finishCoalesceLeaf(self, parent): 

2805 if not parent.isSnapshot() or parent.isAttachedRW(): 

2806 parent.inflateFully() 

2807 else: 

2808 parent.deflate() 

2809 

2810 def _calcExtraSpaceNeeded(self, child, parent): 

2811 return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV 

2812 

2813 def _handleInterruptedCoalesceLeaf(self): 

2814 entries = self.journaler.getAll(VDI.JRN_LEAF) 

2815 for uuid, parentUuid in entries.items(): 

2816 childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid 

2817 tmpChildLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

2818 self.TMP_RENAME_PREFIX + uuid 

2819 parentLV1 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + parentUuid 

2820 parentLV2 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + parentUuid 

2821 parentPresent = (self.lvmCache.checkLV(parentLV1) or \ 

2822 self.lvmCache.checkLV(parentLV2)) 

2823 if parentPresent or self.lvmCache.checkLV(tmpChildLV): 

2824 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

2825 else: 

2826 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

2827 self.journaler.remove(VDI.JRN_LEAF, uuid) 

2828 vdi = self.getVDI(uuid) 

2829 if vdi: 

2830 vdi.ensureUnpaused() 

2831 

2832 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2833 Util.log("*** UNDO LEAF-COALESCE") 

2834 parent = self.getVDI(parentUuid) 

2835 if not parent: 

2836 parent = self.getVDI(childUuid) 

2837 if not parent: 

2838 raise util.SMException("Neither %s nor %s found" % \ 

2839 (parentUuid, childUuid)) 

2840 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

2841 parent.rename(parentUuid) 

2842 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

2843 

2844 child = self.getVDI(childUuid) 

2845 if not child: 

2846 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

2847 if not child: 

2848 raise util.SMException("Neither %s nor %s found" % \ 

2849 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

2850 Util.log("Renaming child back to %s" % childUuid) 

2851 child.rename(childUuid) 

2852 Util.log("Updating the VDI record") 

2853 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

2854 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

2855 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

2856 

2857 # refcount (best effort - assume that it had succeeded if the 

2858 # second rename succeeded; if not, this adjustment will be wrong, 

2859 # leading to a non-deactivation of the LV) 

2860 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

2861 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

2862 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

2863 pCnt = pCnt + cBcnt 

2864 RefCounter.set(parent.uuid, pCnt, 0, ns) 

2865 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

2866 

2867 parent.deflate() 

2868 child.inflateFully() 

2869 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

2870 if child.hidden: 

2871 child._setHidden(False) 

2872 if not parent.hidden: 

2873 parent._setHidden(True) 

2874 if not parent.lvReadonly: 

2875 self.lvmCache.setReadonly(parent.fileName, True) 

2876 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

2877 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

2878 Util.log("*** leaf-coalesce undo successful") 

2879 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

2880 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

2881 

2882 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2883 Util.log("*** FINISH LEAF-COALESCE") 

2884 vdi = self.getVDI(childUuid) 

2885 if not vdi: 

2886 raise util.SMException("VDI %s not found" % childUuid) 

2887 vdi.inflateFully() 

2888 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

2889 try: 

2890 self.forgetVDI(parentUuid) 

2891 except XenAPI.Failure: 

2892 pass 

2893 self._updateSlavesOnResize(vdi) 

2894 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

2895 Util.log("*** finished leaf-coalesce successfully") 

2896 

2897 def _checkSlaves(self, vdi): 

2898 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

2899 try to check all slaves, including those that the Agent believes are 

2900 offline, but ignore failures for offline hosts. This is to avoid cases 

2901 where the Agent thinks a host is offline but the host is up.""" 

2902 args = {"vgName": self.vgName, 

2903 "action1": "deactivateNoRefcount", 

2904 "lvName1": vdi.fileName, 

2905 "action2": "cleanupLockAndRefcount", 

2906 "uuid2": vdi.uuid, 

2907 "ns2": lvhdutil.NS_PREFIX_LVM + self.uuid} 

2908 onlineHosts = self.xapi.getOnlineHosts() 

2909 abortFlag = IPCFlag(self.uuid) 

2910 for pbdRecord in self.xapi.getAttachedPBDs(): 

2911 hostRef = pbdRecord["host"] 

2912 if hostRef == self.xapi._hostRef: 

2913 continue 

2914 if abortFlag.test(FLAG_TYPE_ABORT): 

2915 raise AbortException("Aborting due to signal") 

2916 Util.log("Checking with slave %s (path %s)" % ( 

2917 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

2918 try: 

2919 self.xapi.ensureInactive(hostRef, args) 

2920 except XenAPI.Failure: 

2921 if hostRef in onlineHosts: 

2922 raise 

2923 

2924 def _updateSlavesOnUndoLeafCoalesce(self, parent, child): 

2925 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

2926 if not slaves: 

2927 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

2928 child) 

2929 return 

2930 

2931 tmpName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

2932 self.TMP_RENAME_PREFIX + child.uuid 

2933 args = {"vgName": self.vgName, 

2934 "action1": "deactivateNoRefcount", 

2935 "lvName1": tmpName, 

2936 "action2": "deactivateNoRefcount", 

2937 "lvName2": child.fileName, 

2938 "action3": "refresh", 

2939 "lvName3": child.fileName, 

2940 "action4": "refresh", 

2941 "lvName4": parent.fileName} 

2942 for slave in slaves: 

2943 Util.log("Updating %s, %s, %s on slave %s" % \ 

2944 (tmpName, child.fileName, parent.fileName, 

2945 self.xapi.getRecordHost(slave)['hostname'])) 

2946 text = self.xapi.session.xenapi.host.call_plugin( \ 

2947 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

2948 Util.log("call-plugin returned: '%s'" % text) 

2949 

2950 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid): 

2951 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

2952 if not slaves: 

2953 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

2954 return 

2955 

2956 args = {"vgName": self.vgName, 

2957 "action1": "deactivateNoRefcount", 

2958 "lvName1": oldNameLV, 

2959 "action2": "refresh", 

2960 "lvName2": vdi.fileName, 

2961 "action3": "cleanupLockAndRefcount", 

2962 "uuid3": origParentUuid, 

2963 "ns3": lvhdutil.NS_PREFIX_LVM + self.uuid} 

2964 for slave in slaves: 

2965 Util.log("Updating %s to %s on slave %s" % \ 

2966 (oldNameLV, vdi.fileName, 

2967 self.xapi.getRecordHost(slave)['hostname'])) 

2968 text = self.xapi.session.xenapi.host.call_plugin( \ 

2969 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

2970 Util.log("call-plugin returned: '%s'" % text) 

2971 

2972 def _updateSlavesOnResize(self, vdi): 

2973 uuids = [x.uuid for x in vdi.getAllLeaves()] 

2974 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

2975 if not slaves: 

2976 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

2977 return 

2978 lvhdutil.lvRefreshOnSlaves(self.xapi.session, self.uuid, self.vgName, 

2979 vdi.fileName, vdi.uuid, slaves) 

2980 

2981 

2982class LinstorSR(SR): 

2983 TYPE = SR.TYPE_LINSTOR 

2984 

2985 def __init__(self, uuid, xapi, createLock, force): 

2986 if not LINSTOR_AVAILABLE: 

2987 raise util.SMException( 

2988 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

2989 ) 

2990 

2991 SR.__init__(self, uuid, xapi, createLock, force) 

2992 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

2993 self._reloadLinstor() 

2994 

2995 def deleteVDI(self, vdi): 

2996 self._checkSlaves(vdi) 

2997 SR.deleteVDI(self, vdi) 

2998 

2999 def getFreeSpace(self): 

3000 return self._linstor.max_volume_size_allowed 

3001 

3002 def scan(self, force=False): 

3003 all_vdi_info = self._scan(force) 

3004 for uuid, vdiInfo in all_vdi_info.items(): 

3005 # When vdiInfo is None, the VDI is RAW. 

3006 vdi = self.getVDI(uuid) 

3007 if not vdi: 

3008 self.logFilter.logNewVDI(uuid) 

3009 vdi = LinstorVDI(self, uuid, not vdiInfo) 

3010 self.vdis[uuid] = vdi 

3011 if vdiInfo: 

3012 vdi.load(vdiInfo) 

3013 self._removeStaleVDIs(all_vdi_info.keys()) 

3014 self._buildTree(force) 

3015 self.logFilter.logState() 

3016 self._handleInterruptedCoalesceLeaf() 

3017 

3018 def pauseVDIs(self, vdiList): 

3019 self._linstor.ensure_volume_list_is_not_locked( 

3020 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3021 ) 

3022 return super(LinstorSR, self).pauseVDIs(vdiList) 

3023 

3024 def _reloadLinstor(self): 

3025 session = self.xapi.session 

3026 host_ref = util.get_this_host_ref(session) 

3027 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3028 

3029 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3030 if pbd is None: 

3031 raise util.SMException('Failed to find PBD') 

3032 

3033 dconf = session.xenapi.PBD.get_device_config(pbd) 

3034 group_name = dconf['group-name'] 

3035 

3036 controller_uri = get_controller_uri() 

3037 self.journaler = LinstorJournaler( 

3038 controller_uri, group_name, logger=util.SMlog 

3039 ) 

3040 

3041 self._linstor = LinstorVolumeManager( 

3042 controller_uri, 

3043 group_name, 

3044 repair=True, 

3045 logger=util.SMlog 

3046 ) 

3047 self._vhdutil = LinstorVhdUtil(session, self._linstor) 

3048 

3049 def _scan(self, force): 

3050 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3051 self._reloadLinstor() 

3052 error = False 

3053 try: 

3054 all_vdi_info = self._load_vdi_info() 

3055 for uuid, vdiInfo in all_vdi_info.items(): 

3056 if vdiInfo and vdiInfo.error: 

3057 error = True 

3058 break 

3059 if not error: 

3060 return all_vdi_info 

3061 Util.log('Scan error, retrying ({})'.format(i)) 

3062 except Exception as e: 

3063 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3064 Util.log(traceback.format_exc()) 

3065 

3066 if force: 

3067 return all_vdi_info 

3068 raise util.SMException('Scan error') 

3069 

3070 def _load_vdi_info(self): 

3071 all_vdi_info = {} 

3072 

3073 # TODO: Ensure metadata contains the right info. 

3074 

3075 all_volume_info = self._linstor.get_volumes_with_info() 

3076 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3077 for vdi_uuid, volume_info in all_volume_info.items(): 

3078 try: 

3079 if not volume_info.name and \ 

3080 not list(volumes_metadata[vdi_uuid].items()): 

3081 continue # Ignore it, probably deleted. 

3082 

3083 vdi_type = volumes_metadata[vdi_uuid][VDI_TYPE_TAG] 

3084 if vdi_type == vhdutil.VDI_TYPE_VHD: 

3085 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3086 else: 

3087 info = None 

3088 except Exception as e: 

3089 Util.log( 

3090 ' [VDI {}: failed to load VDI info]: {}' 

3091 .format(vdi_uuid, e) 

3092 ) 

3093 info = vhdutil.VHDInfo(vdi_uuid) 

3094 info.error = 1 

3095 all_vdi_info[vdi_uuid] = info 

3096 return all_vdi_info 

3097 

3098 # TODO: Maybe implement _liveLeafCoalesce/_prepareCoalesceLeaf/ 

3099 # _finishCoalesceLeaf/_updateSlavesOnResize like LVM plugin. 

3100 

3101 def _calcExtraSpaceNeeded(self, child, parent): 

3102 meta_overhead = vhdutil.calcOverheadEmpty(LinstorVDI.MAX_SIZE) 

3103 bitmap_overhead = vhdutil.calcOverheadBitmap(parent.sizeVirt) 

3104 virtual_size = LinstorVolumeManager.round_up_volume_size( 

3105 parent.sizeVirt + meta_overhead + bitmap_overhead 

3106 ) 

3107 volume_size = self._linstor.get_volume_size(parent.uuid) 

3108 

3109 assert virtual_size >= volume_size 

3110 return virtual_size - volume_size 

3111 

3112 def _hasValidDevicePath(self, uuid): 

3113 try: 

3114 self._linstor.get_device_path(uuid) 

3115 except Exception: 

3116 # TODO: Maybe log exception. 

3117 return False 

3118 return True 

3119 

3120 def _liveLeafCoalesce(self, vdi): 

3121 self.lock() 

3122 try: 

3123 self._linstor.ensure_volume_is_not_locked( 

3124 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3125 ) 

3126 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3127 finally: 

3128 self.unlock() 

3129 

3130 def _handleInterruptedCoalesceLeaf(self): 

3131 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3132 for uuid, parentUuid in entries.items(): 

3133 if self._hasValidDevicePath(parentUuid) or \ 

3134 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3135 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3136 else: 

3137 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3138 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3139 vdi = self.getVDI(uuid) 

3140 if vdi: 

3141 vdi.ensureUnpaused() 

3142 

3143 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3144 Util.log('*** UNDO LEAF-COALESCE') 

3145 parent = self.getVDI(parentUuid) 

3146 if not parent: 

3147 parent = self.getVDI(childUuid) 

3148 if not parent: 

3149 raise util.SMException( 

3150 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3151 ) 

3152 Util.log( 

3153 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3154 ) 

3155 parent.rename(parentUuid) 

3156 

3157 child = self.getVDI(childUuid) 

3158 if not child: 

3159 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3160 if not child: 

3161 raise util.SMException( 

3162 'Neither {} nor {} found'.format( 

3163 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3164 ) 

3165 ) 

3166 Util.log('Renaming child back to {}'.format(childUuid)) 

3167 child.rename(childUuid) 

3168 Util.log('Updating the VDI record') 

3169 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3170 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3171 

3172 # TODO: Maybe deflate here. 

3173 

3174 if child.hidden: 

3175 child._setHidden(False) 

3176 if not parent.hidden: 

3177 parent._setHidden(True) 

3178 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3179 Util.log('*** leaf-coalesce undo successful') 

3180 

3181 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3182 Util.log('*** FINISH LEAF-COALESCE') 

3183 vdi = self.getVDI(childUuid) 

3184 if not vdi: 

3185 raise util.SMException('VDI {} not found'.format(childUuid)) 

3186 # TODO: Maybe inflate. 

3187 try: 

3188 self.forgetVDI(parentUuid) 

3189 except XenAPI.Failure: 

3190 pass 

3191 self._updateSlavesOnResize(vdi) 

3192 Util.log('*** finished leaf-coalesce successfully') 

3193 

3194 def _checkSlaves(self, vdi): 

3195 try: 

3196 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3197 for openers in all_openers.values(): 

3198 for opener in openers.values(): 

3199 if opener['process-name'] != 'tapdisk': 

3200 raise util.SMException( 

3201 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3202 ) 

3203 except LinstorVolumeManagerError as e: 

3204 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3205 raise 

3206 

3207 

3208################################################################################ 

3209# 

3210# Helpers 

3211# 

3212def daemonize(): 

3213 pid = os.fork() 

3214 if pid: 3214 ↛ 3218line 3214 didn't jump to line 3218, because the condition on line 3214 was never false

3215 os.waitpid(pid, 0) 

3216 Util.log("New PID [%d]" % pid) 

3217 return False 

3218 os.chdir("/") 

3219 os.setsid() 

3220 pid = os.fork() 

3221 if pid: 

3222 Util.log("Will finish as PID [%d]" % pid) 

3223 os._exit(0) 

3224 for fd in [0, 1, 2]: 

3225 try: 

3226 os.close(fd) 

3227 except OSError: 

3228 pass 

3229 # we need to fill those special fd numbers or pread won't work 

3230 sys.stdin = open("/dev/null", 'r') 

3231 sys.stderr = open("/dev/null", 'w') 

3232 sys.stdout = open("/dev/null", 'w') 

3233 # As we're a new process we need to clear the lock objects 

3234 lock.Lock.clearAll() 

3235 return True 

3236 

3237 

3238def normalizeType(type): 

3239 if type in LVHDSR.SUBTYPES: 

3240 type = SR.TYPE_LVHD 

3241 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3242 # temporary while LVHD is symlinked as LVM 

3243 type = SR.TYPE_LVHD 

3244 if type in [ 

3245 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3246 "moosefs", "xfs", "zfs", "ext4" 

3247 ]: 

3248 type = SR.TYPE_FILE 

3249 if type in ["linstor"]: 

3250 type = SR.TYPE_LINSTOR 

3251 if type not in SR.TYPES: 

3252 raise util.SMException("Unsupported SR type: %s" % type) 

3253 return type 

3254 

3255GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3256 

3257 

3258def _gc_init_file(sr_uuid): 

3259 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3260 

3261 

3262def _create_init_file(sr_uuid): 

3263 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3264 with open(os.path.join( 

3265 NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init'), 'w+') as f: 

3266 f.write('1') 

3267 

3268 

3269def _gcLoopPause(sr, dryRun=False, immediate=False): 

3270 if immediate: 

3271 return 

3272 

3273 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3274 # point will just return. Otherwise, fall back on an abortable sleep. 

3275 

3276 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3277 

3278 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3278 ↛ exitline 3278 didn't jump to the function exit

3279 lambda *args: None) 

3280 elif os.path.exists(_gc_init_file(sr.uuid)): 

3281 def abortTest(): 

3282 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3283 

3284 # If time.sleep hangs we are in deep trouble, however for 

3285 # completeness we set the timeout of the abort thread to 

3286 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3287 Util.log("GC active, about to go quiet") 

3288 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3288 ↛ exitline 3288 didn't run the lambda on line 3288

3289 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3290 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3291 Util.log("GC active, quiet period ended") 

3292 

3293 

3294def _gcLoop(sr, dryRun=False, immediate=False): 

3295 if not lockActive.acquireNoblock(): 3295 ↛ 3296line 3295 didn't jump to line 3296, because the condition on line 3295 was never true

3296 Util.log("Another GC instance already active, exiting") 

3297 return 

3298 # Track how many we do 

3299 coalesced = 0 

3300 task_status = "success" 

3301 try: 

3302 # Check if any work needs to be done 

3303 sr.scanLocked() 

3304 if not sr.hasWork(): 

3305 Util.log("No work, exiting") 

3306 return 

3307 sr.xapi.create_task( 

3308 "Garbage Collection", 

3309 "Garbage collection for SR %s" % sr.uuid) 

3310 _gcLoopPause(sr, dryRun, immediate=immediate) 

3311 while True: 

3312 if not sr.xapi.isPluggedHere(): 3312 ↛ 3313line 3312 didn't jump to line 3313, because the condition on line 3312 was never true

3313 Util.log("SR no longer attached, exiting") 

3314 break 

3315 sr.scanLocked() 

3316 if not sr.hasWork(): 

3317 Util.log("No work, exiting") 

3318 break 

3319 

3320 if not lockRunning.acquireNoblock(): 3320 ↛ 3321line 3320 didn't jump to line 3321, because the condition on line 3320 was never true

3321 Util.log("Unable to acquire GC running lock.") 

3322 return 

3323 try: 

3324 if not sr.gcEnabled(): 3324 ↛ 3325line 3324 didn't jump to line 3325, because the condition on line 3324 was never true

3325 break 

3326 

3327 sr.xapi.update_task_progress("done", coalesced) 

3328 

3329 sr.cleanupCoalesceJournals() 

3330 # Create the init file here in case startup is waiting on it 

3331 _create_init_file(sr.uuid) 

3332 sr.scanLocked() 

3333 sr.updateBlockInfo() 

3334 

3335 howmany = len(sr.findGarbage()) 

3336 if howmany > 0: 

3337 Util.log("Found %d orphaned vdis" % howmany) 

3338 sr.lock() 

3339 try: 

3340 sr.garbageCollect(dryRun) 

3341 finally: 

3342 sr.unlock() 

3343 sr.xapi.srUpdate() 

3344 

3345 candidate = sr.findCoalesceable() 

3346 if candidate: 

3347 util.fistpoint.activate( 

3348 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

3349 sr.coalesce(candidate, dryRun) 

3350 sr.xapi.srUpdate() 

3351 coalesced += 1 

3352 continue 

3353 

3354 candidate = sr.findLeafCoalesceable() 

3355 if candidate: 3355 ↛ 3362line 3355 didn't jump to line 3362, because the condition on line 3355 was never false

3356 sr.coalesceLeaf(candidate, dryRun) 

3357 sr.xapi.srUpdate() 

3358 coalesced += 1 

3359 continue 

3360 

3361 finally: 

3362 lockRunning.release() 3362 ↛ 3367line 3362 didn't jump to line 3367, because the break on line 3325 wasn't executed

3363 except: 

3364 task_status = "failure" 

3365 raise 

3366 finally: 

3367 sr.xapi.set_task_status(task_status) 

3368 Util.log("GC process exiting, no work left") 

3369 _create_init_file(sr.uuid) 

3370 lockActive.release() 

3371 

3372 

3373def _xapi_enabled(session, hostref): 

3374 host = session.xenapi.host.get_record(hostref) 

3375 return host['enabled'] 

3376 

3377 

3378def _ensure_xapi_initialised(session): 

3379 """ 

3380 Don't want to start GC until Xapi is fully initialised 

3381 """ 

3382 local_session = None 

3383 if session is None: 

3384 local_session = util.get_localAPI_session() 

3385 session = local_session 

3386 

3387 try: 

3388 hostref = session.xenapi.host.get_by_uuid(util.get_this_host()) 

3389 while not _xapi_enabled(session, hostref): 

3390 util.SMlog("Xapi not ready, GC waiting") 

3391 time.sleep(15) 

3392 finally: 

3393 if local_session is not None: 

3394 local_session.logout() 

3395 

3396def _gc(session, srUuid, dryRun=False, immediate=False): 

3397 init(srUuid) 

3398 _ensure_xapi_initialised(session) 

3399 sr = SR.getInstance(srUuid, session) 

3400 if not sr.gcEnabled(False): 3400 ↛ 3401line 3400 didn't jump to line 3401, because the condition on line 3400 was never true

3401 return 

3402 

3403 sr.cleanupCache() 

3404 try: 

3405 _gcLoop(sr, dryRun, immediate=immediate) 

3406 finally: 

3407 sr.cleanup() 

3408 sr.logFilter.logState() 

3409 del sr.xapi 

3410 

3411 

3412def _abort(srUuid, soft=False): 

3413 """Aborts an GC/coalesce. 

3414 

3415 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

3416 soft: If set to True and there is a pending abort signal, the function 

3417 doesn't do anything. If set to False, a new abort signal is issued. 

3418 

3419 returns: If soft is set to False, we return True holding lockActive. If 

3420 soft is set to False and an abort signal is pending, we return False 

3421 without holding lockActive. An exception is raised in case of error.""" 

3422 Util.log("=== SR %s: abort ===" % (srUuid)) 

3423 init(srUuid) 

3424 if not lockActive.acquireNoblock(): 

3425 gotLock = False 

3426 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

3427 abortFlag = IPCFlag(srUuid) 

3428 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

3429 return False 

3430 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

3431 gotLock = lockActive.acquireNoblock() 

3432 if gotLock: 

3433 break 

3434 time.sleep(SR.LOCK_RETRY_INTERVAL) 

3435 abortFlag.clear(FLAG_TYPE_ABORT) 

3436 if not gotLock: 

3437 raise util.CommandException(code=errno.ETIMEDOUT, 

3438 reason="SR %s: error aborting existing process" % srUuid) 

3439 return True 

3440 

3441 

3442def init(srUuid): 

3443 global lockRunning 

3444 if not lockRunning: 3444 ↛ 3445line 3444 didn't jump to line 3445, because the condition on line 3444 was never true

3445 lockRunning = lock.Lock(LOCK_TYPE_RUNNING, srUuid) 

3446 global lockActive 

3447 if not lockActive: 3447 ↛ 3448line 3447 didn't jump to line 3448, because the condition on line 3447 was never true

3448 lockActive = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

3449 

3450 

3451def usage(): 

3452 output = """Garbage collect and/or coalesce VHDs in a VHD-based SR 

3453 

3454Parameters: 

3455 -u --uuid UUID SR UUID 

3456 and one of: 

3457 -g --gc garbage collect, coalesce, and repeat while there is work 

3458 -G --gc_force garbage collect once, aborting any current operations 

3459 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

3460 max_age hours 

3461 -a --abort abort any currently running operation (GC or coalesce) 

3462 -q --query query the current state (GC'ing, coalescing or not running) 

3463 -x --disable disable GC/coalesce (will be in effect until you exit) 

3464 -t --debug see Debug below 

3465 

3466Options: 

3467 -b --background run in background (return immediately) (valid for -g only) 

3468 -f --force continue in the presence of VHDs with errors (when doing 

3469 GC, this might cause removal of any such VHDs) (only valid 

3470 for -G) (DANGEROUS) 

3471 

3472Debug: 

3473 The --debug parameter enables manipulation of LVHD VDIs for debugging 

3474 purposes. ** NEVER USE IT ON A LIVE VM ** 

3475 The following parameters are required: 

3476 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

3477 "deflate". 

3478 -v --vdi_uuid VDI UUID 

3479 """ 

3480 #-d --dry-run don't actually perform any SR-modifying operations 

3481 print(output) 

3482 Util.log("(Invalid usage)") 

3483 sys.exit(1) 

3484 

3485 

3486############################################################################## 

3487# 

3488# API 

3489# 

3490def abort(srUuid, soft=False): 

3491 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

3492 """ 

3493 if _abort(srUuid, soft): 

3494 Util.log("abort: releasing the process lock") 

3495 lockActive.release() 

3496 return True 

3497 else: 

3498 return False 

3499 

3500 

3501def gc(session, srUuid, inBackground, dryRun=False): 

3502 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

3503 immediately if inBackground=True. 

3504 

3505 The following algorithm is used: 

3506 1. If we are already GC'ing in this SR, return 

3507 2. If we are already coalescing a VDI pair: 

3508 a. Scan the SR and determine if the VDI pair is GC'able 

3509 b. If the pair is not GC'able, return 

3510 c. If the pair is GC'able, abort coalesce 

3511 3. Scan the SR 

3512 4. If there is nothing to collect, nor to coalesce, return 

3513 5. If there is something to collect, GC all, then goto 3 

3514 6. If there is something to coalesce, coalesce one pair, then goto 3 

3515 """ 

3516 Util.log("=== SR %s: gc ===" % srUuid) 

3517 if inBackground: 

3518 if daemonize(): 

3519 # we are now running in the background. Catch & log any errors 

3520 # because there is no other way to propagate them back at this 

3521 # point 

3522 

3523 try: 

3524 _gc(None, srUuid, dryRun) 

3525 except AbortException: 

3526 Util.log("Aborted") 

3527 except Exception: 

3528 Util.logException("gc") 

3529 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

3530 os._exit(0) 

3531 else: 

3532 _gc(session, srUuid, dryRun, immediate=True) 

3533 

3534 

3535def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

3536 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

3537 the SR lock is held. 

3538 The following algorithm is used: 

3539 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

3540 2. Scan the SR 

3541 3. GC 

3542 4. return 

3543 """ 

3544 Util.log("=== SR %s: gc_force ===" % srUuid) 

3545 init(srUuid) 

3546 sr = SR.getInstance(srUuid, session, lockSR, True) 

3547 if not lockActive.acquireNoblock(): 

3548 abort(srUuid) 

3549 else: 

3550 Util.log("Nothing was running, clear to proceed") 

3551 

3552 if force: 

3553 Util.log("FORCED: will continue even if there are VHD errors") 

3554 sr.scanLocked(force) 

3555 sr.cleanupCoalesceJournals() 

3556 

3557 try: 

3558 sr.cleanupCache() 

3559 sr.garbageCollect(dryRun) 

3560 finally: 

3561 sr.cleanup() 

3562 sr.logFilter.logState() 

3563 lockActive.release() 

3564 

3565 

3566def get_state(srUuid): 

3567 """Return whether GC/coalesce is currently running or not. The information 

3568 is not guaranteed for any length of time if the call is not protected by 

3569 locking. 

3570 """ 

3571 init(srUuid) 

3572 if lockActive.acquireNoblock(): 

3573 lockActive.release() 

3574 return False 

3575 return True 

3576 

3577 

3578def should_preempt(session, srUuid): 

3579 sr = SR.getInstance(srUuid, session) 

3580 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

3581 if len(entries) == 0: 

3582 return False 

3583 elif len(entries) > 1: 

3584 raise util.SMException("More than one coalesce entry: " + str(entries)) 

3585 sr.scanLocked() 

3586 coalescedUuid = entries.popitem()[0] 

3587 garbage = sr.findGarbage() 

3588 for vdi in garbage: 

3589 if vdi.uuid == coalescedUuid: 

3590 return True 

3591 return False 

3592 

3593 

3594def get_coalesceable_leaves(session, srUuid, vdiUuids): 

3595 coalesceable = [] 

3596 sr = SR.getInstance(srUuid, session) 

3597 sr.scanLocked() 

3598 for uuid in vdiUuids: 

3599 vdi = sr.getVDI(uuid) 

3600 if not vdi: 

3601 raise util.SMException("VDI %s not found" % uuid) 

3602 if vdi.isLeafCoalesceable(): 

3603 coalesceable.append(uuid) 

3604 return coalesceable 

3605 

3606 

3607def cache_cleanup(session, srUuid, maxAge): 

3608 sr = SR.getInstance(srUuid, session) 

3609 return sr.cleanupCache(maxAge) 

3610 

3611 

3612def debug(sr_uuid, cmd, vdi_uuid): 

3613 Util.log("Debug command: %s" % cmd) 

3614 sr = SR.getInstance(sr_uuid, None) 

3615 if not isinstance(sr, LVHDSR): 

3616 print("Error: not an LVHD SR") 

3617 return 

3618 sr.scanLocked() 

3619 vdi = sr.getVDI(vdi_uuid) 

3620 if not vdi: 

3621 print("Error: VDI %s not found") 

3622 return 

3623 print("Running %s on SR %s" % (cmd, sr)) 

3624 print("VDI before: %s" % vdi) 

3625 if cmd == "activate": 

3626 vdi._activate() 

3627 print("VDI file: %s" % vdi.path) 

3628 if cmd == "deactivate": 

3629 ns = lvhdutil.NS_PREFIX_LVM + sr.uuid 

3630 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

3631 if cmd == "inflate": 

3632 vdi.inflateFully() 

3633 sr.cleanup() 

3634 if cmd == "deflate": 

3635 vdi.deflate() 

3636 sr.cleanup() 

3637 sr.scanLocked() 

3638 print("VDI after: %s" % vdi) 

3639 

3640 

3641def abort_optional_reenable(uuid): 

3642 print("Disabling GC/coalesce for %s" % uuid) 

3643 ret = _abort(uuid) 

3644 input("Press enter to re-enable...") 

3645 print("GC/coalesce re-enabled") 

3646 lockRunning.release() 

3647 if ret: 

3648 lockActive.release() 

3649 

3650 

3651############################################################################## 

3652# 

3653# CLI 

3654# 

3655def main(): 

3656 action = "" 

3657 uuid = "" 

3658 background = False 

3659 force = False 

3660 dryRun = False 

3661 debug_cmd = "" 

3662 vdi_uuid = "" 

3663 shortArgs = "gGc:aqxu:bfdt:v:" 

3664 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

3665 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

3666 

3667 try: 

3668 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

3669 except getopt.GetoptError: 

3670 usage() 

3671 for o, a in opts: 

3672 if o in ("-g", "--gc"): 

3673 action = "gc" 

3674 if o in ("-G", "--gc_force"): 

3675 action = "gc_force" 

3676 if o in ("-c", "--clean_cache"): 

3677 action = "clean_cache" 

3678 maxAge = int(a) 

3679 if o in ("-a", "--abort"): 

3680 action = "abort" 

3681 if o in ("-q", "--query"): 

3682 action = "query" 

3683 if o in ("-x", "--disable"): 

3684 action = "disable" 

3685 if o in ("-u", "--uuid"): 

3686 uuid = a 

3687 if o in ("-b", "--background"): 

3688 background = True 

3689 if o in ("-f", "--force"): 

3690 force = True 

3691 if o in ("-d", "--dry-run"): 

3692 Util.log("Dry run mode") 

3693 dryRun = True 

3694 if o in ("-t", "--debug"): 

3695 action = "debug" 

3696 debug_cmd = a 

3697 if o in ("-v", "--vdi_uuid"): 

3698 vdi_uuid = a 

3699 

3700 if not action or not uuid: 

3701 usage() 

3702 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

3703 action != "debug" and (debug_cmd or vdi_uuid): 

3704 usage() 

3705 

3706 if action != "query" and action != "debug": 

3707 print("All output goes to log") 

3708 

3709 if action == "gc": 

3710 gc(None, uuid, background, dryRun) 

3711 elif action == "gc_force": 

3712 gc_force(None, uuid, force, dryRun, True) 

3713 elif action == "clean_cache": 

3714 cache_cleanup(None, uuid, maxAge) 

3715 elif action == "abort": 

3716 abort(uuid) 

3717 elif action == "query": 

3718 print("Currently running: %s" % get_state(uuid)) 

3719 elif action == "disable": 

3720 abort_optional_reenable(uuid) 

3721 elif action == "debug": 

3722 debug(uuid, debug_cmd, vdi_uuid) 

3723 

3724 

3725if __name__ == '__main__': 3725 ↛ 3726line 3725 didn't jump to line 3726, because the condition on line 3725 was never true

3726 main()