Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect VHD-based SR's in the background 

19# 

20 

21import os 

22import os.path 

23import sys 

24import time 

25import signal 

26import subprocess 

27import getopt 

28import datetime 

29import traceback 

30import base64 

31import zlib 

32import errno 

33import stat 

34 

35import XenAPI # pylint: disable=import-error 

36import util 

37import lvutil 

38import vhdutil 

39import lvhdutil 

40import lvmcache 

41import journaler 

42import fjournaler 

43import lock 

44import blktap2 

45import xs_errors 

46from refcounter import RefCounter 

47from ipc import IPCFlag 

48from lvmanager import LVActivator 

49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

50from functools import reduce 

51from time import monotonic as _time 

52 

53try: 

54 from linstorjournaler import LinstorJournaler 

55 from linstorvhdutil import LinstorVhdUtil 

56 from linstorvolumemanager import get_controller_uri 

57 from linstorvolumemanager import LinstorVolumeManager 

58 from linstorvolumemanager import LinstorVolumeManagerError 

59 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX 

60 

61 LINSTOR_AVAILABLE = True 

62except ImportError: 

63 LINSTOR_AVAILABLE = False 

64 

65# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

66# possible due to lvhd_stop_using_() not working correctly. However, we leave 

67# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

68# record for use by the offline tool (which makes the operation safe by pausing 

69# the VM first) 

70AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

71 

72FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

73 

74# process "lock", used simply as an indicator that a process already exists 

75# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

76# check for the fact by trying the lock). 

77LOCK_TYPE_RUNNING = "running" 

78lockRunning = None 

79 

80# process "lock" to indicate that the GC process has been activated but may not 

81# yet be running, stops a second process from being started. 

82LOCK_TYPE_GC_ACTIVE = "gc_active" 

83lockActive = None 

84 

85# Default coalesce error rate limit, in messages per minute. A zero value 

86# disables throttling, and a negative value disables error reporting. 

87DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

88 

89COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

90COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

91VAR_RUN = "/var/run/" 

92SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

93 

94N_RUNNING_AVERAGE = 10 

95 

96NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

97 

98 

99class AbortException(util.SMException): 

100 pass 

101 

102 

103################################################################################ 

104# 

105# Util 

106# 

107class Util: 

108 RET_RC = 1 

109 RET_STDOUT = 2 

110 RET_STDERR = 4 

111 

112 UUID_LEN = 36 

113 

114 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

115 

116 def log(text): 

117 util.SMlog(text, ident="SMGC") 

118 log = staticmethod(log) 

119 

120 def logException(tag): 

121 info = sys.exc_info() 

122 if info[0] == SystemExit: 122 ↛ 124line 122 didn't jump to line 124, because the condition on line 122 was never true

123 # this should not be happening when catching "Exception", but it is 

124 sys.exit(0) 

125 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

126 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

127 Util.log(" ***********************") 

128 Util.log(" * E X C E P T I O N *") 

129 Util.log(" ***********************") 

130 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

131 Util.log(tb) 

132 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

133 logException = staticmethod(logException) 

134 

135 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

136 "Execute a subprocess, then return its return code, stdout, stderr" 

137 proc = subprocess.Popen(args, 

138 stdin=subprocess.PIPE, \ 

139 stdout=subprocess.PIPE, \ 

140 stderr=subprocess.PIPE, \ 

141 shell=True, \ 

142 close_fds=True) 

143 (stdout, stderr) = proc.communicate(inputtext) 

144 stdout = str(stdout) 

145 stderr = str(stderr) 

146 rc = proc.returncode 

147 if log: 

148 Util.log("`%s`: %s" % (args, rc)) 

149 if type(expectedRC) != type([]): 

150 expectedRC = [expectedRC] 

151 if not rc in expectedRC: 

152 reason = stderr.strip() 

153 if stdout.strip(): 

154 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

155 Util.log("Failed: %s" % reason) 

156 raise util.CommandException(rc, args, reason) 

157 

158 if ret == Util.RET_RC: 

159 return rc 

160 if ret == Util.RET_STDERR: 

161 return stderr 

162 return stdout 

163 doexec = staticmethod(doexec) 

164 

165 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): 

166 """execute func in a separate thread and kill it if abortTest signals 

167 so""" 

168 abortSignaled = abortTest() # check now before we clear resultFlag 

169 resultFlag = IPCFlag(ns) 

170 resultFlag.clearAll() 

171 pid = os.fork() 

172 if pid: 

173 startTime = _time() 

174 try: 

175 while True: 

176 if resultFlag.test("success"): 

177 Util.log(" Child process completed successfully") 

178 resultFlag.clear("success") 

179 return 

180 if resultFlag.test("failure"): 

181 resultFlag.clear("failure") 

182 raise util.SMException("Child process exited with error") 

183 if abortTest() or abortSignaled: 

184 os.killpg(pid, signal.SIGKILL) 

185 raise AbortException("Aborting due to signal") 

186 if timeOut and _time() - startTime > timeOut: 

187 os.killpg(pid, signal.SIGKILL) 

188 resultFlag.clearAll() 

189 raise util.SMException("Timed out") 

190 time.sleep(pollInterval) 

191 finally: 

192 wait_pid = 0 

193 rc = -1 

194 count = 0 

195 while wait_pid == 0 and count < 10: 

196 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

197 if wait_pid == 0: 

198 time.sleep(2) 

199 count += 1 

200 

201 if wait_pid == 0: 

202 Util.log("runAbortable: wait for process completion timed out") 

203 else: 

204 os.setpgrp() 

205 try: 

206 if func() == ret: 

207 resultFlag.set("success") 

208 else: 

209 resultFlag.set("failure") 

210 except Exception as e: 

211 Util.log("Child process failed with : (%s)" % e) 

212 resultFlag.set("failure") 

213 Util.logException("This exception has occured") 

214 os._exit(0) 

215 runAbortable = staticmethod(runAbortable) 

216 

217 def num2str(number): 

218 for prefix in ("G", "M", "K"): 

219 if number >= Util.PREFIX[prefix]: 

220 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

221 return "%s" % number 

222 num2str = staticmethod(num2str) 

223 

224 def numBits(val): 

225 count = 0 

226 while val: 

227 count += val & 1 

228 val = val >> 1 

229 return count 

230 numBits = staticmethod(numBits) 

231 

232 def countBits(bitmap1, bitmap2): 

233 """return bit count in the bitmap produced by ORing the two bitmaps""" 

234 len1 = len(bitmap1) 

235 len2 = len(bitmap2) 

236 lenLong = len1 

237 lenShort = len2 

238 bitmapLong = bitmap1 

239 if len2 > len1: 

240 lenLong = len2 

241 lenShort = len1 

242 bitmapLong = bitmap2 

243 

244 count = 0 

245 for i in range(lenShort): 

246 val = bitmap1[i] | bitmap2[i] 

247 count += Util.numBits(val) 

248 

249 for i in range(i + 1, lenLong): 

250 val = bitmapLong[i] 

251 count += Util.numBits(val) 

252 return count 

253 countBits = staticmethod(countBits) 

254 

255 def getThisScript(): 

256 thisScript = util.get_real_path(__file__) 

257 if thisScript.endswith(".pyc"): 

258 thisScript = thisScript[:-1] 

259 return thisScript 

260 getThisScript = staticmethod(getThisScript) 

261 

262 

263################################################################################ 

264# 

265# XAPI 

266# 

267class XAPI: 

268 USER = "root" 

269 PLUGIN_ON_SLAVE = "on-slave" 

270 

271 CONFIG_SM = 0 

272 CONFIG_OTHER = 1 

273 CONFIG_ON_BOOT = 2 

274 CONFIG_ALLOW_CACHING = 3 

275 

276 CONFIG_NAME = { 

277 CONFIG_SM: "sm-config", 

278 CONFIG_OTHER: "other-config", 

279 CONFIG_ON_BOOT: "on-boot", 

280 CONFIG_ALLOW_CACHING: "allow_caching" 

281 } 

282 

283 class LookupError(util.SMException): 

284 pass 

285 

286 def getSession(): 

287 session = XenAPI.xapi_local() 

288 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

289 return session 

290 getSession = staticmethod(getSession) 

291 

292 def __init__(self, session, srUuid): 

293 self.sessionPrivate = False 

294 self.session = session 

295 if self.session is None: 

296 self.session = self.getSession() 

297 self.sessionPrivate = True 

298 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

299 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

300 self.hostUuid = util.get_this_host() 

301 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

302 self.task = None 

303 self.task_progress = {"coalescable": 0, "done": 0} 

304 

305 def __del__(self): 

306 if self.sessionPrivate: 

307 self.session.xenapi.session.logout() 

308 

309 def isPluggedHere(self): 

310 pbds = self.getAttachedPBDs() 

311 for pbdRec in pbds: 

312 if pbdRec["host"] == self._hostRef: 

313 return True 

314 return False 

315 

316 def poolOK(self): 

317 host_recs = self.session.xenapi.host.get_all_records() 

318 for host_ref, host_rec in host_recs.items(): 

319 if not host_rec["enabled"]: 

320 Util.log("Host %s not enabled" % host_rec["uuid"]) 

321 return False 

322 return True 

323 

324 def isMaster(self): 

325 if self.srRecord["shared"]: 

326 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

327 return pool["master"] == self._hostRef 

328 else: 

329 pbds = self.getAttachedPBDs() 

330 if len(pbds) < 1: 

331 raise util.SMException("Local SR not attached") 

332 elif len(pbds) > 1: 

333 raise util.SMException("Local SR multiply attached") 

334 return pbds[0]["host"] == self._hostRef 

335 

336 def getAttachedPBDs(self): 

337 """Return PBD records for all PBDs of this SR that are currently 

338 attached""" 

339 attachedPBDs = [] 

340 pbds = self.session.xenapi.PBD.get_all_records() 

341 for pbdRec in pbds.values(): 

342 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

343 attachedPBDs.append(pbdRec) 

344 return attachedPBDs 

345 

346 def getOnlineHosts(self): 

347 return util.get_online_hosts(self.session) 

348 

349 def ensureInactive(self, hostRef, args): 

350 text = self.session.xenapi.host.call_plugin( \ 

351 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

352 Util.log("call-plugin returned: '%s'" % text) 

353 

354 def getRecordHost(self, hostRef): 

355 return self.session.xenapi.host.get_record(hostRef) 

356 

357 def _getRefVDI(self, uuid): 

358 return self.session.xenapi.VDI.get_by_uuid(uuid) 

359 

360 def getRefVDI(self, vdi): 

361 return self._getRefVDI(vdi.uuid) 

362 

363 def getRecordVDI(self, uuid): 

364 try: 

365 ref = self._getRefVDI(uuid) 

366 return self.session.xenapi.VDI.get_record(ref) 

367 except XenAPI.Failure: 

368 return None 

369 

370 def singleSnapshotVDI(self, vdi): 

371 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

372 {"type": "internal"}) 

373 

374 def forgetVDI(self, srUuid, vdiUuid): 

375 """Forget the VDI, but handle the case where the VDI has already been 

376 forgotten (i.e. ignore errors)""" 

377 try: 

378 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

379 self.session.xenapi.VDI.forget(vdiRef) 

380 except XenAPI.Failure: 

381 pass 

382 

383 def getConfigVDI(self, vdi, key): 

384 kind = vdi.CONFIG_TYPE[key] 

385 if kind == self.CONFIG_SM: 

386 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

387 elif kind == self.CONFIG_OTHER: 

388 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

389 elif kind == self.CONFIG_ON_BOOT: 

390 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

391 elif kind == self.CONFIG_ALLOW_CACHING: 

392 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

393 else: 

394 assert(False) 

395 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

396 return cfg 

397 

398 def removeFromConfigVDI(self, vdi, key): 

399 kind = vdi.CONFIG_TYPE[key] 

400 if kind == self.CONFIG_SM: 

401 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

402 elif kind == self.CONFIG_OTHER: 

403 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

404 else: 

405 assert(False) 

406 

407 def addToConfigVDI(self, vdi, key, val): 

408 kind = vdi.CONFIG_TYPE[key] 

409 if kind == self.CONFIG_SM: 

410 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

411 elif kind == self.CONFIG_OTHER: 

412 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

413 else: 

414 assert(False) 

415 

416 def isSnapshot(self, vdi): 

417 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

418 

419 def markCacheSRsDirty(self): 

420 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

421 'field "local_cache_enabled" = "true"') 

422 for sr_ref in sr_refs: 

423 Util.log("Marking SR %s dirty" % sr_ref) 

424 util.set_dirty(self.session, sr_ref) 

425 

426 def srUpdate(self): 

427 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

428 abortFlag = IPCFlag(self.srRecord["uuid"]) 

429 task = self.session.xenapi.Async.SR.update(self._srRef) 

430 cancelTask = True 

431 try: 

432 for i in range(60): 

433 status = self.session.xenapi.task.get_status(task) 

434 if not status == "pending": 

435 Util.log("SR.update_asynch status changed to [%s]" % status) 

436 cancelTask = False 

437 return 

438 if abortFlag.test(FLAG_TYPE_ABORT): 

439 Util.log("Abort signalled during srUpdate, cancelling task...") 

440 try: 

441 self.session.xenapi.task.cancel(task) 

442 cancelTask = False 

443 Util.log("Task cancelled") 

444 except: 

445 pass 

446 return 

447 time.sleep(1) 

448 finally: 

449 if cancelTask: 

450 self.session.xenapi.task.cancel(task) 

451 self.session.xenapi.task.destroy(task) 

452 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

453 

454 def update_task(self): 

455 self.session.xenapi.task.set_other_config( 

456 self.task, 

457 { 

458 "applies_to": self._srRef 

459 }) 

460 total = self.task_progress['coalescable'] + self.task_progress['done'] 

461 if (total > 0): 

462 self.session.xenapi.task.set_progress( 

463 self.task, float(self.task_progress['done']) / total) 

464 

465 def create_task(self, label, description): 

466 self.task = self.session.xenapi.task.create(label, description) 

467 self.update_task() 

468 

469 def update_task_progress(self, key, value): 

470 self.task_progress[key] = value 

471 if self.task: 

472 self.update_task() 

473 

474 def set_task_status(self, status): 

475 if self.task: 

476 self.session.xenapi.task.set_status(self.task, status) 

477 

478 

479################################################################################ 

480# 

481# VDI 

482# 

483class VDI(object): 

484 """Object representing a VDI of a VHD-based SR""" 

485 

486 POLL_INTERVAL = 1 

487 POLL_TIMEOUT = 30 

488 DEVICE_MAJOR = 202 

489 DRIVER_NAME_VHD = "vhd" 

490 

491 # config keys & values 

492 DB_VHD_PARENT = "vhd-parent" 

493 DB_VDI_TYPE = "vdi_type" 

494 DB_VHD_BLOCKS = "vhd-blocks" 

495 DB_VDI_PAUSED = "paused" 

496 DB_VDI_RELINKING = "relinking" 

497 DB_VDI_ACTIVATING = "activating" 

498 DB_GC = "gc" 

499 DB_COALESCE = "coalesce" 

500 DB_LEAFCLSC = "leaf-coalesce" # config key 

501 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

502 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

503 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

504 # no space to snap-coalesce or unable to keep 

505 # up with VDI. This is not used by the SM, it 

506 # might be used by external components. 

507 DB_ONBOOT = "on-boot" 

508 ONBOOT_RESET = "reset" 

509 DB_ALLOW_CACHING = "allow_caching" 

510 

511 CONFIG_TYPE = { 

512 DB_VHD_PARENT: XAPI.CONFIG_SM, 

513 DB_VDI_TYPE: XAPI.CONFIG_SM, 

514 DB_VHD_BLOCKS: XAPI.CONFIG_SM, 

515 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

516 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

517 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

518 DB_GC: XAPI.CONFIG_OTHER, 

519 DB_COALESCE: XAPI.CONFIG_OTHER, 

520 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

521 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

522 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

523 } 

524 

525 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

526 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

527 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

528 # feasibility of leaf coalesce 

529 

530 JRN_RELINK = "relink" # journal entry type for relinking children 

531 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

532 JRN_LEAF = "leaf" # used in coalesce-leaf 

533 

534 STR_TREE_INDENT = 4 

535 

536 def __init__(self, sr, uuid, raw): 

537 self.sr = sr 

538 self.scanError = True 

539 self.uuid = uuid 

540 self.raw = raw 

541 self.fileName = "" 

542 self.parentUuid = "" 

543 self.sizeVirt = -1 

544 self._sizeVHD = -1 

545 self.hidden = False 

546 self.parent = None 

547 self.children = [] 

548 self._vdiRef = None 

549 self._clearRef() 

550 

551 @staticmethod 

552 def extractUuid(path): 

553 raise NotImplementedError("Implement in sub class") 

554 

555 def load(self): 

556 """Load VDI info""" 

557 pass # abstract 

558 

559 def getDriverName(self): 

560 return self.DRIVER_NAME_VHD 

561 

562 def getRef(self): 

563 if self._vdiRef is None: 

564 self._vdiRef = self.sr.xapi.getRefVDI(self) 

565 return self._vdiRef 

566 

567 def getConfig(self, key, default=None): 

568 config = self.sr.xapi.getConfigVDI(self, key) 

569 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 569 ↛ 570line 569 didn't jump to line 570, because the condition on line 569 was never true

570 val = config 

571 else: 

572 val = config.get(key) 

573 if val: 

574 return val 

575 return default 

576 

577 def setConfig(self, key, val): 

578 self.sr.xapi.removeFromConfigVDI(self, key) 

579 self.sr.xapi.addToConfigVDI(self, key, val) 

580 Util.log("Set %s = %s for %s" % (key, val, self)) 

581 

582 def delConfig(self, key): 

583 self.sr.xapi.removeFromConfigVDI(self, key) 

584 Util.log("Removed %s from %s" % (key, self)) 

585 

586 def ensureUnpaused(self): 

587 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

588 Util.log("Unpausing VDI %s" % self) 

589 self.unpause() 

590 

591 def pause(self, failfast=False): 

592 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

593 self.uuid, failfast): 

594 raise util.SMException("Failed to pause VDI %s" % self) 

595 

596 def _report_tapdisk_unpause_error(self): 

597 try: 

598 xapi = self.sr.xapi.session.xenapi 

599 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

600 msg_name = "failed to unpause tapdisk" 

601 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

602 "VMs using this tapdisk have lost access " \ 

603 "to the corresponding disk(s)" % self.uuid 

604 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

605 except Exception as e: 

606 util.SMlog("failed to generate message: %s" % e) 

607 

608 def unpause(self): 

609 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

610 self.uuid): 

611 self._report_tapdisk_unpause_error() 

612 raise util.SMException("Failed to unpause VDI %s" % self) 

613 

614 def refresh(self, ignoreNonexistent=True): 

615 """Pause-unpause in one step""" 

616 self.sr.lock() 

617 try: 

618 try: 

619 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 619 ↛ 621line 619 didn't jump to line 621, because the condition on line 619 was never true

620 self.sr.uuid, self.uuid): 

621 self._report_tapdisk_unpause_error() 

622 raise util.SMException("Failed to refresh %s" % self) 

623 except XenAPI.Failure as e: 

624 if util.isInvalidVDI(e) and ignoreNonexistent: 

625 Util.log("VDI %s not found, ignoring" % self) 

626 return 

627 raise 

628 finally: 

629 self.sr.unlock() 

630 

631 def isSnapshot(self): 

632 return self.sr.xapi.isSnapshot(self) 

633 

634 def isAttachedRW(self): 

635 return util.is_attached_rw( 

636 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

637 

638 def getVHDBlocks(self): 

639 val = self.updateBlockInfo() 

640 bitmap = zlib.decompress(base64.b64decode(val)) 

641 return bitmap 

642 

643 def isCoalesceable(self): 

644 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

645 return not self.scanError and \ 

646 self.parent and \ 

647 len(self.parent.children) == 1 and \ 

648 self.hidden and \ 

649 len(self.children) > 0 

650 

651 def isLeafCoalesceable(self): 

652 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

653 return not self.scanError and \ 

654 self.parent and \ 

655 len(self.parent.children) == 1 and \ 

656 not self.hidden and \ 

657 len(self.children) == 0 

658 

659 def canLiveCoalesce(self, speed): 

660 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

661 isLeafCoalesceable() already""" 

662 feasibleSize = False 

663 allowedDownTime = \ 

664 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

665 if speed: 

666 feasibleSize = \ 

667 self.getSizeVHD() // speed < allowedDownTime 

668 else: 

669 feasibleSize = \ 

670 self.getSizeVHD() < self.LIVE_LEAF_COALESCE_MAX_SIZE 

671 

672 return (feasibleSize or 

673 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

674 

675 def getAllPrunable(self): 

676 if len(self.children) == 0: # base case 

677 # it is possible to have a hidden leaf that was recently coalesced 

678 # onto its parent, its children already relinked but not yet 

679 # reloaded - in which case it may not be garbage collected yet: 

680 # some tapdisks could still be using the file. 

681 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

682 return [] 

683 if not self.scanError and self.hidden: 

684 return [self] 

685 return [] 

686 

687 thisPrunable = True 

688 vdiList = [] 

689 for child in self.children: 

690 childList = child.getAllPrunable() 

691 vdiList.extend(childList) 

692 if child not in childList: 

693 thisPrunable = False 

694 

695 # We can destroy the current VDI if all childs are hidden BUT the 

696 # current VDI must be hidden too to do that! 

697 # Example in this case (after a failed live leaf coalesce): 

698 # 

699 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

700 # SMGC: [32436] b5458d61(1.000G/4.127M) 

701 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

702 # 

703 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

704 # Normally we are not in this function when the delete action is 

705 # executed but in `_liveLeafCoalesce`. 

706 

707 if not self.scanError and not self.hidden and thisPrunable: 

708 vdiList.append(self) 

709 return vdiList 

710 

711 def getSizeVHD(self): 

712 return self._sizeVHD 

713 

714 def getTreeRoot(self): 

715 "Get the root of the tree that self belongs to" 

716 root = self 

717 while root.parent: 

718 root = root.parent 

719 return root 

720 

721 def getTreeHeight(self): 

722 "Get the height of the subtree rooted at self" 

723 if len(self.children) == 0: 

724 return 1 

725 

726 maxChildHeight = 0 

727 for child in self.children: 

728 childHeight = child.getTreeHeight() 

729 if childHeight > maxChildHeight: 

730 maxChildHeight = childHeight 

731 

732 return maxChildHeight + 1 

733 

734 def getAllLeaves(self): 

735 "Get all leaf nodes in the subtree rooted at self" 

736 if len(self.children) == 0: 

737 return [self] 

738 

739 leaves = [] 

740 for child in self.children: 

741 leaves.extend(child.getAllLeaves()) 

742 return leaves 

743 

744 def updateBlockInfo(self): 

745 val = base64.b64encode(self._queryVHDBlocks()).decode() 

746 self.setConfig(VDI.DB_VHD_BLOCKS, val) 

747 return val 

748 

749 def rename(self, uuid): 

750 "Rename the VDI file" 

751 assert(not self.sr.vdis.get(uuid)) 

752 self._clearRef() 

753 oldUuid = self.uuid 

754 self.uuid = uuid 

755 self.children = [] 

756 # updating the children themselves is the responsibility of the caller 

757 del self.sr.vdis[oldUuid] 

758 self.sr.vdis[self.uuid] = self 

759 

760 def delete(self): 

761 "Physically delete the VDI" 

762 lock.Lock.cleanup(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

763 lock.Lock.cleanupAll(self.uuid) 

764 self._clear() 

765 

766 def getParent(self): 

767 return vhdutil.getParent(self.path, lambda x: x.strip()) 767 ↛ exitline 767 didn't run the lambda on line 767

768 

769 def repair(self, parent): 

770 vhdutil.repair(parent) 

771 

772 def __str__(self): 

773 strHidden = "" 

774 if self.hidden: 774 ↛ 775line 774 didn't jump to line 775, because the condition on line 774 was never true

775 strHidden = "*" 

776 strSizeVirt = "?" 

777 if self.sizeVirt > 0: 777 ↛ 778line 777 didn't jump to line 778, because the condition on line 777 was never true

778 strSizeVirt = Util.num2str(self.sizeVirt) 

779 strSizeVHD = "?" 

780 if self._sizeVHD > 0: 780 ↛ 781line 780 didn't jump to line 781, because the condition on line 780 was never true

781 strSizeVHD = "/%s" % Util.num2str(self._sizeVHD) 

782 strType = "" 

783 if self.raw: 

784 strType = "[RAW]" 

785 strSizeVHD = "" 

786 

787 return "%s%s(%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

788 strSizeVHD, strType) 

789 

790 def validate(self, fast=False): 

791 if not vhdutil.check(self.path, fast=fast): 791 ↛ 792line 791 didn't jump to line 792, because the condition on line 791 was never true

792 raise util.SMException("VHD %s corrupted" % self) 

793 

794 def _clear(self): 

795 self.uuid = "" 

796 self.path = "" 

797 self.parentUuid = "" 

798 self.parent = None 

799 self._clearRef() 

800 

801 def _clearRef(self): 

802 self._vdiRef = None 

803 

804 def _doCoalesce(self): 

805 """Coalesce self onto parent. Only perform the actual coalescing of 

806 VHD, but not the subsequent relinking. We'll do that as the next step, 

807 after reloading the entire SR in case things have changed while we 

808 were coalescing""" 

809 self.validate() 

810 self.parent.validate(True) 

811 self.parent._increaseSizeVirt(self.sizeVirt) 

812 self.sr._updateSlavesOnResize(self.parent) 

813 self._coalesceVHD(0) 

814 self.parent.validate(True) 

815 #self._verifyContents(0) 

816 self.parent.updateBlockInfo() 

817 

818 def _verifyContents(self, timeOut): 

819 Util.log(" Coalesce verification on %s" % self) 

820 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

821 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

822 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

823 Util.log(" Coalesce verification succeeded") 

824 

825 def _runTapdiskDiff(self): 

826 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

827 (self.getDriverName(), self.path, \ 

828 self.parent.getDriverName(), self.parent.path) 

829 Util.doexec(cmd, 0) 

830 return True 

831 

832 def _reportCoalesceError(vdi, ce): 

833 """Reports a coalesce error to XenCenter. 

834 

835 vdi: the VDI object on which the coalesce error occured 

836 ce: the CommandException that was raised""" 

837 

838 msg_name = os.strerror(ce.code) 

839 if ce.code == errno.ENOSPC: 

840 # TODO We could add more information here, e.g. exactly how much 

841 # space is required for the particular coalesce, as well as actions 

842 # to be taken by the user and consequences of not taking these 

843 # actions. 

844 msg_body = 'Run out of space while coalescing.' 

845 elif ce.code == errno.EIO: 

846 msg_body = 'I/O error while coalescing.' 

847 else: 

848 msg_body = '' 

849 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

850 % (vdi.sr.uuid, msg_name, msg_body)) 

851 

852 # Create a XenCenter message, but don't spam. 

853 xapi = vdi.sr.xapi.session.xenapi 

854 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

855 oth_cfg = xapi.SR.get_other_config(sr_ref) 

856 if COALESCE_ERR_RATE_TAG in oth_cfg: 

857 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

858 else: 

859 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

860 

861 xcmsg = False 

862 if coalesce_err_rate == 0: 

863 xcmsg = True 

864 elif coalesce_err_rate > 0: 

865 now = datetime.datetime.now() 

866 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

867 if COALESCE_LAST_ERR_TAG in sm_cfg: 

868 # seconds per message (minimum distance in time between two 

869 # messages in seconds) 

870 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

871 last = datetime.datetime.fromtimestamp( 

872 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

873 if now - last >= spm: 

874 xapi.SR.remove_from_sm_config(sr_ref, 

875 COALESCE_LAST_ERR_TAG) 

876 xcmsg = True 

877 else: 

878 xcmsg = True 

879 if xcmsg: 

880 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

881 str(now.strftime('%s'))) 

882 if xcmsg: 

883 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

884 _reportCoalesceError = staticmethod(_reportCoalesceError) 

885 

886 def coalesce(self): 

887 vhdutil.coalesce(self.path) 

888 

889 def _doCoalesceVHD(vdi): 

890 try: 

891 startTime = time.time() 

892 vhdSize = vdi.getSizeVHD() 

893 vdi.coalesce() 

894 endTime = time.time() 

895 vdi.sr.recordStorageSpeed(startTime, endTime, vhdSize) 

896 except util.CommandException as ce: 

897 # We use try/except for the following piece of code because it runs 

898 # in a separate process context and errors will not be caught and 

899 # reported by anyone. 

900 try: 

901 # Report coalesce errors back to user via XC 

902 VDI._reportCoalesceError(vdi, ce) 

903 except Exception as e: 

904 util.SMlog('failed to create XenCenter message: %s' % e) 

905 raise ce 

906 except: 

907 raise 

908 _doCoalesceVHD = staticmethod(_doCoalesceVHD) 

909 

910 def _vdi_is_raw(self, vdi_path): 

911 """ 

912 Given path to vdi determine if it is raw 

913 """ 

914 uuid = self.extractUuid(vdi_path) 

915 return self.sr.vdis[uuid].raw 

916 

917 def _coalesceVHD(self, timeOut): 

918 Util.log(" Running VHD coalesce on %s" % self) 

919 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 919 ↛ exitline 919 didn't run the lambda on line 919

920 try: 

921 util.fistpoint.activate_custom_fn( 

922 "cleanup_coalesceVHD_inject_failure", 

923 util.inject_failure) 

924 Util.runAbortable(lambda: VDI._doCoalesceVHD(self), None, 

925 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

926 except: 

927 #exception at this phase could indicate a failure in vhd coalesce 

928 # or a kill of vhd coalesce by runAbortable due to timeOut 

929 # Try a repair and reraise the exception 

930 parent = "" 

931 try: 

932 parent = self.getParent() 

933 if not self._vdi_is_raw(parent): 

934 # Repair error is logged and ignored. Error reraised later 

935 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

936 'parent %s' % (self.uuid, parent)) 

937 self.repair(parent) 

938 except Exception as e: 

939 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

940 'after failed coalesce on %s, err: %s' % 

941 (parent, self.path, e)) 

942 raise 

943 

944 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

945 

946 def _relinkSkip(self): 

947 """Relink children of this VDI to point to the parent of this VDI""" 

948 abortFlag = IPCFlag(self.sr.uuid) 

949 for child in self.children: 

950 if abortFlag.test(FLAG_TYPE_ABORT): 950 ↛ 951line 950 didn't jump to line 951, because the condition on line 950 was never true

951 raise AbortException("Aborting due to signal") 

952 Util.log(" Relinking %s from %s to %s" % \ 

953 (child, self, self.parent)) 

954 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

955 child._setParent(self.parent) 

956 self.children = [] 

957 

958 def _reloadChildren(self, vdiSkip): 

959 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

960 the VHD metadata for this file in any online VDI""" 

961 abortFlag = IPCFlag(self.sr.uuid) 

962 for child in self.children: 

963 if child == vdiSkip: 

964 continue 

965 if abortFlag.test(FLAG_TYPE_ABORT): 965 ↛ 966line 965 didn't jump to line 966, because the condition on line 965 was never true

966 raise AbortException("Aborting due to signal") 

967 Util.log(" Reloading VDI %s" % child) 

968 child._reload() 

969 

970 def _reload(self): 

971 """Pause & unpause to cause blktap to reload the VHD metadata""" 

972 for child in self.children: 972 ↛ 973line 972 didn't jump to line 973, because the loop on line 972 never started

973 child._reload() 

974 

975 # only leaves can be attached 

976 if len(self.children) == 0: 976 ↛ exitline 976 didn't return from function '_reload', because the condition on line 976 was never false

977 try: 

978 self.delConfig(VDI.DB_VDI_RELINKING) 

979 except XenAPI.Failure as e: 

980 if not util.isInvalidVDI(e): 

981 raise 

982 self.refresh() 

983 

984 def _tagChildrenForRelink(self): 

985 if len(self.children) == 0: 

986 retries = 0 

987 try: 

988 while retries < 15: 

989 retries += 1 

990 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

991 Util.log("VDI %s is activating, wait to relink" % 

992 self.uuid) 

993 else: 

994 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

995 

996 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

997 self.delConfig(VDI.DB_VDI_RELINKING) 

998 Util.log("VDI %s started activating while tagging" % 

999 self.uuid) 

1000 else: 

1001 return 

1002 time.sleep(2) 

1003 

1004 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1005 except XenAPI.Failure as e: 

1006 if not util.isInvalidVDI(e): 

1007 raise 

1008 

1009 for child in self.children: 

1010 child._tagChildrenForRelink() 

1011 

1012 def _loadInfoParent(self): 

1013 ret = vhdutil.getParent(self.path, lvhdutil.extractUuid) 

1014 if ret: 

1015 self.parentUuid = ret 

1016 

1017 def _setParent(self, parent): 

1018 vhdutil.setParent(self.path, parent.path, False) 

1019 self.parent = parent 

1020 self.parentUuid = parent.uuid 

1021 parent.children.append(self) 

1022 try: 

1023 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1024 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1025 (self.uuid, self.parentUuid)) 

1026 except: 

1027 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1028 (self.uuid, self.parentUuid)) 

1029 

1030 def _loadInfoHidden(self): 

1031 hidden = vhdutil.getHidden(self.path) 

1032 self.hidden = (hidden != 0) 

1033 

1034 def _setHidden(self, hidden=True): 

1035 vhdutil.setHidden(self.path, hidden) 

1036 self.hidden = hidden 

1037 

1038 def _increaseSizeVirt(self, size, atomic=True): 

1039 """ensure the virtual size of 'self' is at least 'size'. Note that 

1040 resizing a VHD must always be offline and atomically: the file must 

1041 not be open by anyone and no concurrent operations may take place. 

1042 Thus we use the Agent API call for performing paused atomic 

1043 operations. If the caller is already in the atomic context, it must 

1044 call with atomic = False""" 

1045 if self.sizeVirt >= size: 1045 ↛ 1047line 1045 didn't jump to line 1047, because the condition on line 1045 was never false

1046 return 

1047 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ 

1048 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1049 

1050 msize = vhdutil.getMaxResizeSize(self.path) * 1024 * 1024 

1051 if (size <= msize): 

1052 vhdutil.setSizeVirtFast(self.path, size) 

1053 else: 

1054 if atomic: 

1055 vdiList = self._getAllSubtree() 

1056 self.sr.lock() 

1057 try: 

1058 self.sr.pauseVDIs(vdiList) 

1059 try: 

1060 self._setSizeVirt(size) 

1061 finally: 

1062 self.sr.unpauseVDIs(vdiList) 

1063 finally: 

1064 self.sr.unlock() 

1065 else: 

1066 self._setSizeVirt(size) 

1067 

1068 self.sizeVirt = vhdutil.getSizeVirt(self.path) 

1069 

1070 def _setSizeVirt(self, size): 

1071 """WARNING: do not call this method directly unless all VDIs in the 

1072 subtree are guaranteed to be unplugged (and remain so for the duration 

1073 of the operation): this operation is only safe for offline VHDs""" 

1074 jFile = os.path.join(self.sr.path, self.uuid) 

1075 vhdutil.setSizeVirt(self.path, size, jFile) 

1076 

1077 def _queryVHDBlocks(self): 

1078 return vhdutil.getBlockBitmap(self.path) 

1079 

1080 def _getCoalescedSizeData(self): 

1081 """Get the data size of the resulting VHD if we coalesce self onto 

1082 parent. We calculate the actual size by using the VHD block allocation 

1083 information (as opposed to just adding up the two VHD sizes to get an 

1084 upper bound)""" 

1085 # make sure we don't use stale BAT info from vdi_rec since the child 

1086 # was writable all this time 

1087 self.delConfig(VDI.DB_VHD_BLOCKS) 

1088 blocksChild = self.getVHDBlocks() 

1089 blocksParent = self.parent.getVHDBlocks() 

1090 numBlocks = Util.countBits(blocksChild, blocksParent) 

1091 Util.log("Num combined blocks = %d" % numBlocks) 

1092 sizeData = numBlocks * vhdutil.VHD_BLOCK_SIZE 

1093 assert(sizeData <= self.sizeVirt) 

1094 return sizeData 

1095 

1096 def _calcExtraSpaceForCoalescing(self): 

1097 sizeData = self._getCoalescedSizeData() 

1098 sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \ 

1099 vhdutil.calcOverheadEmpty(self.sizeVirt) 

1100 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1101 return sizeCoalesced - self.parent.getSizeVHD() 

1102 

1103 def _calcExtraSpaceForLeafCoalescing(self): 

1104 """How much extra space in the SR will be required to 

1105 [live-]leaf-coalesce this VDI""" 

1106 # the space requirements are the same as for inline coalesce 

1107 return self._calcExtraSpaceForCoalescing() 

1108 

1109 def _calcExtraSpaceForSnapshotCoalescing(self): 

1110 """How much extra space in the SR will be required to 

1111 snapshot-coalesce this VDI""" 

1112 return self._calcExtraSpaceForCoalescing() + \ 

1113 vhdutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1114 

1115 def _getAllSubtree(self): 

1116 """Get self and all VDIs in the subtree of self as a flat list""" 

1117 vdiList = [self] 

1118 for child in self.children: 

1119 vdiList.extend(child._getAllSubtree()) 

1120 return vdiList 

1121 

1122 

1123class FileVDI(VDI): 

1124 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1125 

1126 @staticmethod 

1127 def extractUuid(path): 

1128 path = os.path.basename(path.strip()) 

1129 if not (path.endswith(vhdutil.FILE_EXTN_VHD) or \ 1129 ↛ 1131line 1129 didn't jump to line 1131, because the condition on line 1129 was never true

1130 path.endswith(vhdutil.FILE_EXTN_RAW)): 

1131 return None 

1132 uuid = path.replace(vhdutil.FILE_EXTN_VHD, "").replace( \ 

1133 vhdutil.FILE_EXTN_RAW, "") 

1134 # TODO: validate UUID format 

1135 return uuid 

1136 

1137 def __init__(self, sr, uuid, raw): 

1138 VDI.__init__(self, sr, uuid, raw) 

1139 if self.raw: 1139 ↛ 1140line 1139 didn't jump to line 1140, because the condition on line 1139 was never true

1140 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_RAW) 

1141 else: 

1142 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1143 

1144 def load(self, info=None): 

1145 if not info: 

1146 if not util.pathexists(self.path): 

1147 raise util.SMException("%s not found" % self.path) 

1148 try: 

1149 info = vhdutil.getVHDInfo(self.path, self.extractUuid) 

1150 except util.SMException: 

1151 Util.log(" [VDI %s: failed to read VHD metadata]" % self.uuid) 

1152 return 

1153 self.parent = None 

1154 self.children = [] 

1155 self.parentUuid = info.parentUuid 

1156 self.sizeVirt = info.sizeVirt 

1157 self._sizeVHD = info.sizePhys 

1158 self.hidden = info.hidden 

1159 self.scanError = False 

1160 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1161 (self.uuid, vhdutil.FILE_EXTN_VHD)) 

1162 

1163 def rename(self, uuid): 

1164 oldPath = self.path 

1165 VDI.rename(self, uuid) 

1166 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1167 self.path = os.path.join(self.sr.path, self.fileName) 

1168 assert(not util.pathexists(self.path)) 

1169 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1170 os.rename(oldPath, self.path) 

1171 

1172 def delete(self): 

1173 if len(self.children) > 0: 1173 ↛ 1174line 1173 didn't jump to line 1174, because the condition on line 1173 was never true

1174 raise util.SMException("VDI %s has children, can't delete" % \ 

1175 self.uuid) 

1176 try: 

1177 self.sr.lock() 

1178 try: 

1179 os.unlink(self.path) 

1180 self.sr.forgetVDI(self.uuid) 

1181 finally: 

1182 self.sr.unlock() 

1183 except OSError: 

1184 raise util.SMException("os.unlink(%s) failed" % self.path) 

1185 VDI.delete(self) 

1186 

1187 

1188class LVHDVDI(VDI): 

1189 """Object representing a VDI in an LVHD SR""" 

1190 

1191 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1192 DRIVER_NAME_RAW = "aio" 

1193 

1194 def load(self, vdiInfo): 

1195 self.parent = None 

1196 self.children = [] 

1197 self._sizeVHD = -1 

1198 self.scanError = vdiInfo.scanError 

1199 self.sizeLV = vdiInfo.sizeLV 

1200 self.sizeVirt = vdiInfo.sizeVirt 

1201 self.fileName = vdiInfo.lvName 

1202 self.lvActive = vdiInfo.lvActive 

1203 self.lvOpen = vdiInfo.lvOpen 

1204 self.lvReadonly = vdiInfo.lvReadonly 

1205 self.hidden = vdiInfo.hidden 

1206 self.parentUuid = vdiInfo.parentUuid 

1207 self.path = os.path.join(self.sr.path, self.fileName) 

1208 

1209 @staticmethod 

1210 def extractUuid(path): 

1211 return lvhdutil.extractUuid(path) 

1212 

1213 def getDriverName(self): 

1214 if self.raw: 

1215 return self.DRIVER_NAME_RAW 

1216 return self.DRIVER_NAME_VHD 

1217 

1218 def inflate(self, size): 

1219 """inflate the LV containing the VHD to 'size'""" 

1220 if self.raw: 

1221 return 

1222 self._activate() 

1223 self.sr.lock() 

1224 try: 

1225 lvhdutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, size) 

1226 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1227 finally: 

1228 self.sr.unlock() 

1229 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1230 self._sizeVHD = -1 

1231 

1232 def deflate(self): 

1233 """deflate the LV containing the VHD to minimum""" 

1234 if self.raw: 

1235 return 

1236 self._activate() 

1237 self.sr.lock() 

1238 try: 

1239 lvhdutil.deflate(self.sr.lvmCache, self.fileName, self.getSizeVHD()) 

1240 finally: 

1241 self.sr.unlock() 

1242 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1243 self._sizeVHD = -1 

1244 

1245 def inflateFully(self): 

1246 self.inflate(lvhdutil.calcSizeVHDLV(self.sizeVirt)) 

1247 

1248 def inflateParentForCoalesce(self): 

1249 """Inflate the parent only as much as needed for the purposes of 

1250 coalescing""" 

1251 if self.parent.raw: 

1252 return 

1253 inc = self._calcExtraSpaceForCoalescing() 

1254 if inc > 0: 

1255 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1256 self.parent.inflate(self.parent.sizeLV + inc) 

1257 

1258 def updateBlockInfo(self): 

1259 if not self.raw: 

1260 return VDI.updateBlockInfo(self) 

1261 

1262 def rename(self, uuid): 

1263 oldUuid = self.uuid 

1264 oldLVName = self.fileName 

1265 VDI.rename(self, uuid) 

1266 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + self.uuid 

1267 if self.raw: 

1268 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + self.uuid 

1269 self.path = os.path.join(self.sr.path, self.fileName) 

1270 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1271 

1272 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1273 if self.sr.lvActivator.get(oldUuid, False): 

1274 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1275 

1276 ns = lvhdutil.NS_PREFIX_LVM + self.sr.uuid 

1277 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1278 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1279 RefCounter.reset(oldUuid, ns) 

1280 

1281 def delete(self): 

1282 if len(self.children) > 0: 

1283 raise util.SMException("VDI %s has children, can't delete" % \ 

1284 self.uuid) 

1285 self.sr.lock() 

1286 try: 

1287 self.sr.lvmCache.remove(self.fileName) 

1288 self.sr.forgetVDI(self.uuid) 

1289 finally: 

1290 self.sr.unlock() 

1291 RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

1292 VDI.delete(self) 

1293 

1294 def getSizeVHD(self): 

1295 if self._sizeVHD == -1: 

1296 self._loadInfoSizeVHD() 

1297 return self._sizeVHD 

1298 

1299 def _loadInfoSizeVHD(self): 

1300 """Get the physical utilization of the VHD file. We do it individually 

1301 (and not using the VHD batch scanner) as an optimization: this info is 

1302 relatively expensive and we need it only for VDI's involved in 

1303 coalescing.""" 

1304 if self.raw: 

1305 return 

1306 self._activate() 

1307 self._sizeVHD = vhdutil.getSizePhys(self.path) 

1308 if self._sizeVHD <= 0: 

1309 raise util.SMException("phys size of %s = %d" % \ 

1310 (self, self._sizeVHD)) 

1311 

1312 def _loadInfoHidden(self): 

1313 if self.raw: 

1314 self.hidden = self.sr.lvmCache.getHidden(self.fileName) 

1315 else: 

1316 VDI._loadInfoHidden(self) 

1317 

1318 def _setHidden(self, hidden=True): 

1319 if self.raw: 

1320 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1321 self.hidden = hidden 

1322 else: 

1323 VDI._setHidden(self, hidden) 

1324 

1325 def __str__(self): 

1326 strType = "VHD" 

1327 if self.raw: 

1328 strType = "RAW" 

1329 strHidden = "" 

1330 if self.hidden: 

1331 strHidden = "*" 

1332 strSizeVHD = "" 

1333 if self._sizeVHD > 0: 

1334 strSizeVHD = Util.num2str(self._sizeVHD) 

1335 strActive = "n" 

1336 if self.lvActive: 

1337 strActive = "a" 

1338 if self.lvOpen: 

1339 strActive += "o" 

1340 return "%s%s[%s](%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1341 Util.num2str(self.sizeVirt), strSizeVHD, 

1342 Util.num2str(self.sizeLV), strActive) 

1343 

1344 def validate(self, fast=False): 

1345 if not self.raw: 

1346 VDI.validate(self, fast) 

1347 

1348 def _doCoalesce(self): 

1349 """LVHD parents must first be activated, inflated, and made writable""" 

1350 try: 

1351 self._activateChain() 

1352 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1353 self.parent.validate() 

1354 self.inflateParentForCoalesce() 

1355 VDI._doCoalesce(self) 

1356 finally: 

1357 self.parent._loadInfoSizeVHD() 

1358 self.parent.deflate() 

1359 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1360 

1361 def _setParent(self, parent): 

1362 self._activate() 

1363 if self.lvReadonly: 

1364 self.sr.lvmCache.setReadonly(self.fileName, False) 

1365 

1366 try: 

1367 vhdutil.setParent(self.path, parent.path, parent.raw) 

1368 finally: 

1369 if self.lvReadonly: 

1370 self.sr.lvmCache.setReadonly(self.fileName, True) 

1371 self._deactivate() 

1372 self.parent = parent 

1373 self.parentUuid = parent.uuid 

1374 parent.children.append(self) 

1375 try: 

1376 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1377 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1378 (self.uuid, self.parentUuid)) 

1379 except: 

1380 Util.log("Failed to update the vhd-parent with %s for child %s" % \ 

1381 (self.parentUuid, self.uuid)) 

1382 

1383 def _activate(self): 

1384 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1385 

1386 def _activateChain(self): 

1387 vdi = self 

1388 while vdi: 

1389 vdi._activate() 

1390 vdi = vdi.parent 

1391 

1392 def _deactivate(self): 

1393 self.sr.lvActivator.deactivate(self.uuid, False) 

1394 

1395 def _increaseSizeVirt(self, size, atomic=True): 

1396 "ensure the virtual size of 'self' is at least 'size'" 

1397 self._activate() 

1398 if not self.raw: 

1399 VDI._increaseSizeVirt(self, size, atomic) 

1400 return 

1401 

1402 # raw VDI case 

1403 offset = self.sizeLV 

1404 if self.sizeVirt < size: 

1405 oldSize = self.sizeLV 

1406 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1407 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1408 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1409 offset = oldSize 

1410 unfinishedZero = False 

1411 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1412 if jval: 

1413 unfinishedZero = True 

1414 offset = int(jval) 

1415 length = self.sizeLV - offset 

1416 if not length: 

1417 return 

1418 

1419 if unfinishedZero: 

1420 Util.log(" ==> Redoing unfinished zeroing out") 

1421 else: 

1422 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1423 str(offset)) 

1424 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1425 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1426 func = lambda: util.zeroOut(self.path, offset, length) 

1427 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1428 VDI.POLL_INTERVAL, 0) 

1429 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1430 

1431 def _setSizeVirt(self, size): 

1432 """WARNING: do not call this method directly unless all VDIs in the 

1433 subtree are guaranteed to be unplugged (and remain so for the duration 

1434 of the operation): this operation is only safe for offline VHDs""" 

1435 self._activate() 

1436 jFile = lvhdutil.createVHDJournalLV(self.sr.lvmCache, self.uuid, 

1437 vhdutil.MAX_VHD_JOURNAL_SIZE) 

1438 try: 

1439 lvhdutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, 

1440 size, jFile) 

1441 finally: 

1442 lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid) 

1443 

1444 def _queryVHDBlocks(self): 

1445 self._activate() 

1446 return VDI._queryVHDBlocks(self) 

1447 

1448 def _calcExtraSpaceForCoalescing(self): 

1449 if self.parent.raw: 

1450 return 0 # raw parents are never deflated in the first place 

1451 sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData()) 

1452 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1453 return sizeCoalesced - self.parent.sizeLV 

1454 

1455 def _calcExtraSpaceForLeafCoalescing(self): 

1456 """How much extra space in the SR will be required to 

1457 [live-]leaf-coalesce this VDI""" 

1458 # we can deflate the leaf to minimize the space requirements 

1459 deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD()) 

1460 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1461 

1462 def _calcExtraSpaceForSnapshotCoalescing(self): 

1463 return self._calcExtraSpaceForCoalescing() + \ 

1464 lvhdutil.calcSizeLV(self.getSizeVHD()) 

1465 

1466 

1467class LinstorVDI(VDI): 

1468 """Object representing a VDI in a LINSTOR SR""" 

1469 

1470 VOLUME_LOCK_TIMEOUT = 30 

1471 

1472 def load(self, info=None): 

1473 self.parentUuid = info.parentUuid 

1474 self.scanError = True 

1475 self.parent = None 

1476 self.children = [] 

1477 

1478 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1479 self.path = self.sr._linstor.build_device_path(self.fileName) 

1480 

1481 if not info: 

1482 try: 

1483 info = self.sr._vhdutil.get_vhd_info(self.uuid) 

1484 except util.SMException: 

1485 Util.log( 

1486 ' [VDI {}: failed to read VHD metadata]'.format(self.uuid) 

1487 ) 

1488 return 

1489 

1490 self.parentUuid = info.parentUuid 

1491 self.sizeVirt = info.sizeVirt 

1492 self._sizeVHD = -1 

1493 self.drbd_size = -1 

1494 self.hidden = info.hidden 

1495 self.scanError = False 

1496 self.vdi_type = vhdutil.VDI_TYPE_VHD 

1497 

1498 def getSizeVHD(self, fetch=False): 

1499 if self._sizeVHD < 0 or fetch: 

1500 self._sizeVHD = self.sr._vhdutil.get_size_phys(self.uuid) 

1501 return self._sizeVHD 

1502 

1503 def getDrbdSize(self, fetch=False): 

1504 if self.drbd_size < 0 or fetch: 

1505 self.drbd_size = self.sr._vhdutil.get_drbd_size(self.uuid) 

1506 return self.drbd_size 

1507 

1508 def inflate(self, size): 

1509 if self.raw: 

1510 return 

1511 self.sr.lock() 

1512 try: 

1513 # Ensure we use the real DRBD size and not the cached one. 

1514 # Why? Because this attribute can be changed if volume is resized by user. 

1515 self.drbd_size = self.getDrbdSize(fetch=True) 

1516 self.sr._vhdutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) 

1517 finally: 

1518 self.sr.unlock() 

1519 self.drbd_size = -1 

1520 self._sizeVHD = -1 

1521 

1522 def deflate(self): 

1523 if self.raw: 

1524 return 

1525 self.sr.lock() 

1526 try: 

1527 # Ensure we use the real sizes and not the cached info. 

1528 self.drbd_size = self.getDrbdSize(fetch=True) 

1529 self._sizeVHD = self.getSizeVHD(fetch=True) 

1530 self.sr._vhdutil.force_deflate(self.path, self._sizeVHD, self.drbd_size, zeroize=False) 

1531 finally: 

1532 self.sr.unlock() 

1533 self.drbd_size = -1 

1534 self._sizeVHD = -1 

1535 

1536 def inflateFully(self): 

1537 if not self.raw: 

1538 self.inflate(LinstorVhdUtil.compute_volume_size(self.sizeVirt, self.vdi_type)) 

1539 

1540 def rename(self, uuid): 

1541 Util.log('Renaming {} -> {} (path={})'.format( 

1542 self.uuid, uuid, self.path 

1543 )) 

1544 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1545 VDI.rename(self, uuid) 

1546 

1547 def delete(self): 

1548 if len(self.children) > 0: 

1549 raise util.SMException( 

1550 'VDI {} has children, can\'t delete'.format(self.uuid) 

1551 ) 

1552 self.sr.lock() 

1553 try: 

1554 self.sr._linstor.destroy_volume(self.uuid) 

1555 self.sr.forgetVDI(self.uuid) 

1556 finally: 

1557 self.sr.unlock() 

1558 VDI.delete(self) 

1559 

1560 def validate(self, fast=False): 

1561 if not self.raw and not self.sr._vhdutil.check(self.uuid, fast=fast): 

1562 raise util.SMException('VHD {} corrupted'.format(self)) 

1563 

1564 def pause(self, failfast=False): 

1565 self.sr._linstor.ensure_volume_is_not_locked( 

1566 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1567 ) 

1568 return super(LinstorVDI, self).pause(failfast) 

1569 

1570 def coalesce(self): 

1571 # Note: We raise `SMException` here to skip the current coalesce in case of failure. 

1572 # Using another exception we can't execute the next coalesce calls. 

1573 self.sr._vhdutil.force_coalesce(self.path) 

1574 

1575 def getParent(self): 

1576 return self.sr._vhdutil.get_parent( 

1577 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1578 ) 

1579 

1580 def repair(self, parent_uuid): 

1581 self.sr._vhdutil.force_repair( 

1582 self.sr._linstor.get_device_path(parent_uuid) 

1583 ) 

1584 

1585 def _relinkSkip(self): 

1586 abortFlag = IPCFlag(self.sr.uuid) 

1587 for child in self.children: 

1588 if abortFlag.test(FLAG_TYPE_ABORT): 

1589 raise AbortException('Aborting due to signal') 

1590 Util.log( 

1591 ' Relinking {} from {} to {}'.format( 

1592 child, self, self.parent 

1593 ) 

1594 ) 

1595 

1596 session = child.sr.xapi.session 

1597 sr_uuid = child.sr.uuid 

1598 vdi_uuid = child.uuid 

1599 try: 

1600 self.sr._linstor.ensure_volume_is_not_locked( 

1601 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1602 ) 

1603 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1604 child._setParent(self.parent) 

1605 finally: 

1606 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1607 self.children = [] 

1608 

1609 def _setParent(self, parent): 

1610 self.sr._linstor.get_device_path(self.uuid) 

1611 self.sr._vhdutil.force_parent(self.path, parent.path) 

1612 self.parent = parent 

1613 self.parentUuid = parent.uuid 

1614 parent.children.append(self) 

1615 try: 

1616 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1617 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1618 (self.uuid, self.parentUuid)) 

1619 except: 

1620 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1621 (self.uuid, self.parentUuid)) 

1622 

1623 def _doCoalesce(self): 

1624 try: 

1625 self._activateChain() 

1626 self.parent.validate() 

1627 self._inflateParentForCoalesce() 

1628 VDI._doCoalesce(self) 

1629 finally: 

1630 self.parent.deflate() 

1631 

1632 def _activateChain(self): 

1633 vdi = self 

1634 while vdi: 

1635 try: 

1636 p = self.sr._linstor.get_device_path(vdi.uuid) 

1637 except Exception as e: 

1638 # Use SMException to skip coalesce. 

1639 # Otherwise the GC is stopped... 

1640 raise util.SMException(str(e)) 

1641 vdi = vdi.parent 

1642 

1643 def _setHidden(self, hidden=True): 

1644 HIDDEN_TAG = 'hidden' 

1645 

1646 if self.raw: 

1647 self.sr._linstor.update_volume_metadata(self.uuid, { 

1648 HIDDEN_TAG: hidden 

1649 }) 

1650 self.hidden = hidden 

1651 else: 

1652 VDI._setHidden(self, hidden) 

1653 

1654 def _setSizeVirt(self, size): 

1655 jfile = self.uuid + '-jvhd' 

1656 self.sr._linstor.create_volume( 

1657 jfile, vhdutil.MAX_VHD_JOURNAL_SIZE, persistent=False, volume_name=jfile 

1658 ) 

1659 try: 

1660 self.inflate(LinstorVhdUtil.compute_volume_size(size, self.vdi_type)) 

1661 self.sr._vhdutil.set_size_virt(size, jfile) 

1662 finally: 

1663 try: 

1664 self.sr._linstor.destroy_volume(jfile) 

1665 except Exception: 

1666 # We can ignore it, in any case this volume is not persistent. 

1667 pass 

1668 

1669 def _queryVHDBlocks(self): 

1670 return self.sr._vhdutil.get_block_bitmap(self.uuid) 

1671 

1672 def _inflateParentForCoalesce(self): 

1673 if self.parent.raw: 

1674 return 

1675 inc = self._calcExtraSpaceForCoalescing() 

1676 if inc > 0: 

1677 self.parent.inflate(self.parent.getDrbdSize() + inc) 

1678 

1679 def _calcExtraSpaceForCoalescing(self): 

1680 if self.parent.raw: 

1681 return 0 

1682 size_coalesced = LinstorVhdUtil.compute_volume_size( 

1683 self._getCoalescedSizeData(), self.vdi_type 

1684 ) 

1685 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) 

1686 return size_coalesced - self.parent.getDrbdSize() 

1687 

1688 def _calcExtraSpaceForLeafCoalescing(self): 

1689 assert self.getDrbdSize() > 0 

1690 assert self.getSizeVHD() > 0 

1691 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) 

1692 assert deflate_diff >= 0 

1693 return self._calcExtraSpaceForCoalescing() - deflate_diff 

1694 

1695 def _calcExtraSpaceForSnapshotCoalescing(self): 

1696 assert self.getSizeVHD() > 0 

1697 return self._calcExtraSpaceForCoalescing() + \ 

1698 LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) 

1699 

1700################################################################################ 

1701# 

1702# SR 

1703# 

1704class SR(object): 

1705 class LogFilter: 

1706 def __init__(self, sr): 

1707 self.sr = sr 

1708 self.stateLogged = False 

1709 self.prevState = {} 

1710 self.currState = {} 

1711 

1712 def logState(self): 

1713 changes = "" 

1714 self.currState.clear() 

1715 for vdi in self.sr.vdiTrees: 

1716 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

1717 if not self.prevState.get(vdi.uuid) or \ 

1718 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

1719 changes += self.currState[vdi.uuid] 

1720 

1721 for uuid in self.prevState: 

1722 if not self.currState.get(uuid): 

1723 changes += "Tree %s gone\n" % uuid 

1724 

1725 result = "SR %s (%d VDIs in %d VHD trees): " % \ 

1726 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

1727 

1728 if len(changes) > 0: 

1729 if self.stateLogged: 

1730 result += "showing only VHD trees that changed:" 

1731 result += "\n%s" % changes 

1732 else: 

1733 result += "no changes" 

1734 

1735 for line in result.split("\n"): 

1736 Util.log("%s" % line) 

1737 self.prevState.clear() 

1738 for key, val in self.currState.items(): 

1739 self.prevState[key] = val 

1740 self.stateLogged = True 

1741 

1742 def logNewVDI(self, uuid): 

1743 if self.stateLogged: 

1744 Util.log("Found new VDI when scanning: %s" % uuid) 

1745 

1746 def _getTreeStr(self, vdi, indent=8): 

1747 treeStr = "%s%s\n" % (" " * indent, vdi) 

1748 for child in vdi.children: 

1749 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

1750 return treeStr 

1751 

1752 TYPE_FILE = "file" 

1753 TYPE_LVHD = "lvhd" 

1754 TYPE_LINSTOR = "linstor" 

1755 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

1756 

1757 LOCK_RETRY_INTERVAL = 3 

1758 LOCK_RETRY_ATTEMPTS = 20 

1759 LOCK_RETRY_ATTEMPTS_LOCK = 100 

1760 

1761 SCAN_RETRY_ATTEMPTS = 3 

1762 

1763 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

1764 TMP_RENAME_PREFIX = "OLD_" 

1765 

1766 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

1767 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

1768 

1769 def getInstance(uuid, xapiSession, createLock=True, force=False): 

1770 xapi = XAPI(xapiSession, uuid) 

1771 type = normalizeType(xapi.srRecord["type"]) 

1772 if type == SR.TYPE_FILE: 

1773 return FileSR(uuid, xapi, createLock, force) 

1774 elif type == SR.TYPE_LVHD: 

1775 return LVHDSR(uuid, xapi, createLock, force) 

1776 elif type == SR.TYPE_LINSTOR: 

1777 return LinstorSR(uuid, xapi, createLock, force) 

1778 raise util.SMException("SR type %s not recognized" % type) 

1779 getInstance = staticmethod(getInstance) 

1780 

1781 def __init__(self, uuid, xapi, createLock, force): 

1782 self.logFilter = self.LogFilter(self) 

1783 self.uuid = uuid 

1784 self.path = "" 

1785 self.name = "" 

1786 self.vdis = {} 

1787 self.vdiTrees = [] 

1788 self.journaler = None 

1789 self.xapi = xapi 

1790 self._locked = 0 

1791 self._srLock = None 

1792 if createLock: 1792 ↛ 1793line 1792 didn't jump to line 1793, because the condition on line 1792 was never true

1793 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, self.uuid) 

1794 else: 

1795 Util.log("Requested no SR locking") 

1796 self.name = self.xapi.srRecord["name_label"] 

1797 self._failedCoalesceTargets = [] 

1798 

1799 if not self.xapi.isPluggedHere(): 1799 ↛ 1800line 1799 didn't jump to line 1800, because the condition on line 1799 was never true

1800 if force: 

1801 Util.log("SR %s not attached on this host, ignoring" % uuid) 

1802 else: 

1803 raise util.SMException("SR %s not attached on this host" % uuid) 

1804 

1805 if force: 1805 ↛ 1806line 1805 didn't jump to line 1806, because the condition on line 1805 was never true

1806 Util.log("Not checking if we are Master (SR %s)" % uuid) 

1807 elif not self.xapi.isMaster(): 1807 ↛ 1808line 1807 didn't jump to line 1808, because the condition on line 1807 was never true

1808 raise util.SMException("This host is NOT master, will not run") 

1809 

1810 def gcEnabled(self, refresh=True): 

1811 if refresh: 

1812 self.xapi.srRecord = \ 

1813 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

1814 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

1815 Util.log("GC is disabled for this SR, abort") 

1816 return False 

1817 return True 

1818 

1819 def scan(self, force=False): 

1820 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

1821 update VDI objects if they already exist""" 

1822 pass # abstract 

1823 

1824 def scanLocked(self, force=False): 

1825 self.lock() 

1826 try: 

1827 self.scan(force) 

1828 finally: 

1829 self.unlock() 

1830 

1831 def getVDI(self, uuid): 

1832 return self.vdis.get(uuid) 

1833 

1834 def hasWork(self): 

1835 if len(self.findGarbage()) > 0: 

1836 return True 

1837 if self.findCoalesceable(): 

1838 return True 

1839 if self.findLeafCoalesceable(): 

1840 return True 

1841 if self.needUpdateBlockInfo(): 

1842 return True 

1843 return False 

1844 

1845 def findCoalesceable(self): 

1846 """Find a coalesceable VDI. Return a vdi that should be coalesced 

1847 (choosing one among all coalesceable candidates according to some 

1848 criteria) or None if there is no VDI that could be coalesced""" 

1849 

1850 candidates = [] 

1851 

1852 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

1853 if srSwitch == "false": 

1854 Util.log("Coalesce disabled for this SR") 

1855 return candidates 

1856 

1857 # finish any VDI for which a relink journal entry exists first 

1858 journals = self.journaler.getAll(VDI.JRN_RELINK) 

1859 for uuid in journals: 

1860 vdi = self.getVDI(uuid) 

1861 if vdi and vdi not in self._failedCoalesceTargets: 

1862 return vdi 

1863 

1864 for vdi in self.vdis.values(): 

1865 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

1866 candidates.append(vdi) 

1867 Util.log("%s is coalescable" % vdi.uuid) 

1868 

1869 self.xapi.update_task_progress("coalescable", len(candidates)) 

1870 

1871 # pick one in the tallest tree 

1872 treeHeight = dict() 

1873 for c in candidates: 

1874 height = c.getTreeRoot().getTreeHeight() 

1875 if treeHeight.get(height): 

1876 treeHeight[height].append(c) 

1877 else: 

1878 treeHeight[height] = [c] 

1879 

1880 freeSpace = self.getFreeSpace() 

1881 heights = list(treeHeight.keys()) 

1882 heights.sort(reverse=True) 

1883 for h in heights: 

1884 for c in treeHeight[h]: 

1885 spaceNeeded = c._calcExtraSpaceForCoalescing() 

1886 if spaceNeeded <= freeSpace: 

1887 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

1888 return c 

1889 else: 

1890 Util.log("No space to coalesce %s (free space: %d)" % \ 

1891 (c, freeSpace)) 

1892 return None 

1893 

1894 def getSwitch(self, key): 

1895 return self.xapi.srRecord["other_config"].get(key) 

1896 

1897 def forbiddenBySwitch(self, switch, condition, fail_msg): 

1898 srSwitch = self.getSwitch(switch) 

1899 ret = False 

1900 if srSwitch: 

1901 ret = srSwitch == condition 

1902 

1903 if ret: 

1904 Util.log(fail_msg) 

1905 

1906 return ret 

1907 

1908 def leafCoalesceForbidden(self): 

1909 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

1910 "false", 

1911 "Coalesce disabled for this SR") or 

1912 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

1913 VDI.LEAFCLSC_DISABLED, 

1914 "Leaf-coalesce disabled for this SR")) 

1915 

1916 def findLeafCoalesceable(self): 

1917 """Find leaf-coalesceable VDIs in each VHD tree""" 

1918 

1919 candidates = [] 

1920 if self.leafCoalesceForbidden(): 

1921 return candidates 

1922 

1923 self.gatherLeafCoalesceable(candidates) 

1924 

1925 self.xapi.update_task_progress("coalescable", len(candidates)) 

1926 

1927 freeSpace = self.getFreeSpace() 

1928 for candidate in candidates: 

1929 # check the space constraints to see if leaf-coalesce is actually 

1930 # feasible for this candidate 

1931 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

1932 spaceNeededLive = spaceNeeded 

1933 if spaceNeeded > freeSpace: 

1934 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

1935 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

1936 spaceNeeded = spaceNeededLive 

1937 

1938 if spaceNeeded <= freeSpace: 

1939 Util.log("Leaf-coalesce candidate: %s" % candidate) 

1940 return candidate 

1941 else: 

1942 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

1943 (candidate, freeSpace)) 

1944 if spaceNeededLive <= freeSpace: 

1945 Util.log("...but enough space if skip snap-coalesce") 

1946 candidate.setConfig(VDI.DB_LEAFCLSC, 

1947 VDI.LEAFCLSC_OFFLINE) 

1948 

1949 return None 

1950 

1951 def gatherLeafCoalesceable(self, candidates): 

1952 for vdi in self.vdis.values(): 

1953 if not vdi.isLeafCoalesceable(): 

1954 continue 

1955 if vdi in self._failedCoalesceTargets: 

1956 continue 

1957 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

1958 Util.log("Skipping reset-on-boot %s" % vdi) 

1959 continue 

1960 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

1961 Util.log("Skipping allow_caching=true %s" % vdi) 

1962 continue 

1963 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

1964 Util.log("Leaf-coalesce disabled for %s" % vdi) 

1965 continue 

1966 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

1967 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

1968 continue 

1969 candidates.append(vdi) 

1970 

1971 def coalesce(self, vdi, dryRun=False): 

1972 """Coalesce vdi onto parent""" 

1973 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

1974 if dryRun: 1974 ↛ 1975line 1974 didn't jump to line 1975, because the condition on line 1974 was never true

1975 return 

1976 

1977 try: 

1978 self._coalesce(vdi) 

1979 except util.SMException as e: 

1980 if isinstance(e, AbortException): 1980 ↛ 1981line 1980 didn't jump to line 1981, because the condition on line 1980 was never true

1981 self.cleanup() 

1982 raise 

1983 else: 

1984 self._failedCoalesceTargets.append(vdi) 

1985 Util.logException("coalesce") 

1986 Util.log("Coalesce failed, skipping") 

1987 self.cleanup() 

1988 

1989 def coalesceLeaf(self, vdi, dryRun=False): 

1990 """Leaf-coalesce vdi onto parent""" 

1991 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

1992 if dryRun: 

1993 return 

1994 

1995 try: 

1996 uuid = vdi.uuid 

1997 try: 

1998 # "vdi" object will no longer be valid after this call 

1999 self._coalesceLeaf(vdi) 

2000 finally: 

2001 vdi = self.getVDI(uuid) 

2002 if vdi: 

2003 vdi.delConfig(vdi.DB_LEAFCLSC) 

2004 except AbortException: 

2005 self.cleanup() 

2006 raise 

2007 except (util.SMException, XenAPI.Failure) as e: 

2008 self._failedCoalesceTargets.append(vdi) 

2009 Util.logException("leaf-coalesce") 

2010 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

2011 self.cleanup() 

2012 

2013 def garbageCollect(self, dryRun=False): 

2014 vdiList = self.findGarbage() 

2015 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

2016 for vdi in vdiList: 

2017 Util.log(" %s" % vdi) 

2018 if not dryRun: 

2019 self.deleteVDIs(vdiList) 

2020 self.cleanupJournals(dryRun) 

2021 

2022 def findGarbage(self): 

2023 vdiList = [] 

2024 for vdi in self.vdiTrees: 

2025 vdiList.extend(vdi.getAllPrunable()) 

2026 return vdiList 

2027 

2028 def deleteVDIs(self, vdiList): 

2029 for vdi in vdiList: 

2030 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

2031 raise AbortException("Aborting due to signal") 

2032 Util.log("Deleting unlinked VDI %s" % vdi) 

2033 self.deleteVDI(vdi) 

2034 

2035 def deleteVDI(self, vdi): 

2036 assert(len(vdi.children) == 0) 

2037 del self.vdis[vdi.uuid] 

2038 if vdi.parent: 2038 ↛ 2040line 2038 didn't jump to line 2040, because the condition on line 2038 was never false

2039 vdi.parent.children.remove(vdi) 

2040 if vdi in self.vdiTrees: 2040 ↛ 2041line 2040 didn't jump to line 2041, because the condition on line 2040 was never true

2041 self.vdiTrees.remove(vdi) 

2042 vdi.delete() 

2043 

2044 def forgetVDI(self, vdiUuid): 

2045 self.xapi.forgetVDI(self.uuid, vdiUuid) 

2046 

2047 def pauseVDIs(self, vdiList): 

2048 paused = [] 

2049 failed = False 

2050 for vdi in vdiList: 

2051 try: 

2052 vdi.pause() 

2053 paused.append(vdi) 

2054 except: 

2055 Util.logException("pauseVDIs") 

2056 failed = True 

2057 break 

2058 

2059 if failed: 

2060 self.unpauseVDIs(paused) 

2061 raise util.SMException("Failed to pause VDIs") 

2062 

2063 def unpauseVDIs(self, vdiList): 

2064 failed = False 

2065 for vdi in vdiList: 

2066 try: 

2067 vdi.unpause() 

2068 except: 

2069 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

2070 failed = True 

2071 if failed: 

2072 raise util.SMException("Failed to unpause VDIs") 

2073 

2074 def getFreeSpace(self): 

2075 return 0 

2076 

2077 def cleanup(self): 

2078 Util.log("In cleanup") 

2079 return 

2080 

2081 def __str__(self): 

2082 if self.name: 

2083 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

2084 else: 

2085 ret = "%s" % self.uuid 

2086 return ret 

2087 

2088 def lock(self): 

2089 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

2090 signal to avoid deadlocking (trying to acquire the SR lock while the 

2091 lock is held by a process that is trying to abort us)""" 

2092 if not self._srLock: 

2093 return 

2094 

2095 if self._locked == 0: 

2096 abortFlag = IPCFlag(self.uuid) 

2097 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

2098 if self._srLock.acquireNoblock(): 

2099 self._locked += 1 

2100 return 

2101 if abortFlag.test(FLAG_TYPE_ABORT): 

2102 raise AbortException("Abort requested") 

2103 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2104 raise util.SMException("Unable to acquire the SR lock") 

2105 

2106 self._locked += 1 

2107 

2108 def unlock(self): 

2109 if not self._srLock: 2109 ↛ 2111line 2109 didn't jump to line 2111, because the condition on line 2109 was never false

2110 return 

2111 assert(self._locked > 0) 

2112 self._locked -= 1 

2113 if self._locked == 0: 

2114 self._srLock.release() 

2115 

2116 def needUpdateBlockInfo(self): 

2117 for vdi in self.vdis.values(): 

2118 if vdi.scanError or len(vdi.children) == 0: 

2119 continue 

2120 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2121 return True 

2122 return False 

2123 

2124 def updateBlockInfo(self): 

2125 for vdi in self.vdis.values(): 

2126 if vdi.scanError or len(vdi.children) == 0: 

2127 continue 

2128 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2129 vdi.updateBlockInfo() 

2130 

2131 def cleanupCoalesceJournals(self): 

2132 """Remove stale coalesce VDI indicators""" 

2133 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2134 for uuid, jval in entries.items(): 

2135 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2136 

2137 def cleanupJournals(self, dryRun=False): 

2138 """delete journal entries for non-existing VDIs""" 

2139 for t in [LVHDVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2140 entries = self.journaler.getAll(t) 

2141 for uuid, jval in entries.items(): 

2142 if self.getVDI(uuid): 

2143 continue 

2144 if t == SR.JRN_CLONE: 

2145 baseUuid, clonUuid = jval.split("_") 

2146 if self.getVDI(baseUuid): 

2147 continue 

2148 Util.log(" Deleting stale '%s' journal entry for %s " 

2149 "(%s)" % (t, uuid, jval)) 

2150 if not dryRun: 

2151 self.journaler.remove(t, uuid) 

2152 

2153 def cleanupCache(self, maxAge=-1): 

2154 return 0 

2155 

2156 def _coalesce(self, vdi): 

2157 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2157 ↛ 2160line 2157 didn't jump to line 2160, because the condition on line 2157 was never true

2158 # this means we had done the actual coalescing already and just 

2159 # need to finish relinking and/or refreshing the children 

2160 Util.log("==> Coalesce apparently already done: skipping") 

2161 else: 

2162 # JRN_COALESCE is used to check which VDI is being coalesced in 

2163 # order to decide whether to abort the coalesce. We remove the 

2164 # journal as soon as the VHD coalesce step is done, because we 

2165 # don't expect the rest of the process to take long 

2166 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2167 vdi._doCoalesce() 

2168 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2169 

2170 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2171 

2172 # we now need to relink the children: lock the SR to prevent ops 

2173 # like SM.clone from manipulating the VDIs we'll be relinking and 

2174 # rescan the SR first in case the children changed since the last 

2175 # scan 

2176 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2177 

2178 self.lock() 

2179 try: 

2180 vdi.parent._tagChildrenForRelink() 

2181 self.scan() 

2182 vdi._relinkSkip() 

2183 finally: 

2184 self.unlock() 

2185 # Reload the children to leave things consistent 

2186 vdi.parent._reloadChildren(vdi) 

2187 

2188 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2189 self.deleteVDI(vdi) 

2190 

2191 class CoalesceTracker: 

2192 GRACE_ITERATIONS = 1 

2193 MAX_ITERATIONS_NO_PROGRESS = 3 

2194 MAX_ITERATIONS = 10 

2195 MAX_INCREASE_FROM_MINIMUM = 1.2 

2196 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2197 " --> Final size {finSize}" 

2198 

2199 def __init__(self, sr): 

2200 self.itsNoProgress = 0 

2201 self.its = 0 

2202 self.minSize = float("inf") 

2203 self.history = [] 

2204 self.reason = "" 

2205 self.startSize = None 

2206 self.finishSize = None 

2207 self.sr = sr 

2208 

2209 def abortCoalesce(self, prevSize, curSize): 

2210 res = False 

2211 

2212 self.its += 1 

2213 self.history.append(self.HISTORY_STRING.format(its=self.its, 

2214 initSize=prevSize, 

2215 finSize=curSize)) 

2216 

2217 self.finishSize = curSize 

2218 

2219 if self.startSize is None: 

2220 self.startSize = prevSize 

2221 

2222 if curSize < self.minSize: 

2223 self.minSize = curSize 

2224 

2225 if prevSize < self.minSize: 

2226 self.minSize = prevSize 

2227 

2228 if prevSize < curSize: 

2229 self.itsNoProgress += 1 

2230 Util.log("No progress, attempt:" 

2231 " {attempt}".format(attempt=self.itsNoProgress)) 

2232 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2233 

2234 if (not res) and (self.its > self.MAX_ITERATIONS): 

2235 max = self.MAX_ITERATIONS 

2236 self.reason = \ 

2237 "Max iterations ({max}) exceeded".format(max=max) 

2238 res = True 

2239 

2240 if (not res) and (self.itsNoProgress > 

2241 self.MAX_ITERATIONS_NO_PROGRESS): 

2242 max = self.MAX_ITERATIONS_NO_PROGRESS 

2243 self.reason = \ 

2244 "No progress made for {max} iterations".format(max=max) 

2245 res = True 

2246 

2247 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2248 if (self.its > self.GRACE_ITERATIONS and 

2249 (not res) and (curSize > maxSizeFromMin)): 

2250 self.reason = "Unexpected bump in size," \ 

2251 " compared to minimum acheived" 

2252 res = True 

2253 

2254 return res 

2255 

2256 def printReasoning(self): 

2257 Util.log("Aborted coalesce") 

2258 for hist in self.history: 

2259 Util.log(hist) 

2260 Util.log(self.reason) 

2261 Util.log("Starting size was {size}" 

2262 .format(size=self.startSize)) 

2263 Util.log("Final size was {size}" 

2264 .format(size=self.finishSize)) 

2265 Util.log("Minimum size acheived was {size}" 

2266 .format(size=self.minSize)) 

2267 

2268 def _coalesceLeaf(self, vdi): 

2269 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2270 complete due to external changes, namely vdi_delete and vdi_snapshot 

2271 that alter leaf-coalescibility of vdi""" 

2272 tracker = self.CoalesceTracker(self) 

2273 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2274 prevSizeVHD = vdi.getSizeVHD() 

2275 if not self._snapshotCoalesce(vdi): 2275 ↛ 2276line 2275 didn't jump to line 2276, because the condition on line 2275 was never true

2276 return False 

2277 if tracker.abortCoalesce(prevSizeVHD, vdi.getSizeVHD()): 

2278 tracker.printReasoning() 

2279 raise util.SMException("VDI {uuid} could not be coalesced" 

2280 .format(uuid=vdi.uuid)) 

2281 return self._liveLeafCoalesce(vdi) 

2282 

2283 def calcStorageSpeed(self, startTime, endTime, vhdSize): 

2284 speed = None 

2285 total_time = endTime - startTime 

2286 if total_time > 0: 

2287 speed = float(vhdSize) / float(total_time) 

2288 return speed 

2289 

2290 def writeSpeedToFile(self, speed): 

2291 content = [] 

2292 speedFile = None 

2293 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2294 self.lock() 

2295 try: 

2296 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2297 lines = "" 

2298 if not os.path.isfile(path): 

2299 lines = str(speed) + "\n" 

2300 else: 

2301 speedFile = open(path, "r+") 

2302 content = speedFile.readlines() 

2303 content.append(str(speed) + "\n") 

2304 if len(content) > N_RUNNING_AVERAGE: 

2305 del content[0] 

2306 lines = "".join(content) 

2307 

2308 util.atomicFileWrite(path, VAR_RUN, lines) 

2309 finally: 

2310 if speedFile is not None: 

2311 speedFile.close() 

2312 Util.log("Closing file: {myfile}".format(myfile=path)) 

2313 self.unlock() 

2314 

2315 def recordStorageSpeed(self, startTime, endTime, vhdSize): 

2316 speed = self.calcStorageSpeed(startTime, endTime, vhdSize) 

2317 if speed is None: 

2318 return 

2319 

2320 self.writeSpeedToFile(speed) 

2321 

2322 def getStorageSpeed(self): 

2323 speedFile = None 

2324 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2325 self.lock() 

2326 try: 

2327 speed = None 

2328 if os.path.isfile(path): 

2329 speedFile = open(path) 

2330 content = speedFile.readlines() 

2331 try: 

2332 content = [float(i) for i in content] 

2333 except ValueError: 

2334 Util.log("Something bad in the speed log:{log}". 

2335 format(log=speedFile.readlines())) 

2336 return speed 

2337 

2338 if len(content): 

2339 speed = sum(content) / float(len(content)) 

2340 if speed <= 0: 2340 ↛ 2342line 2340 didn't jump to line 2342, because the condition on line 2340 was never true

2341 # Defensive, should be impossible. 

2342 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2343 format(speed=speed, uuid=self.uuid)) 

2344 speed = None 

2345 else: 

2346 Util.log("Speed file empty for SR: {uuid}". 

2347 format(uuid=self.uuid)) 

2348 else: 

2349 Util.log("Speed log missing for SR: {uuid}". 

2350 format(uuid=self.uuid)) 

2351 return speed 

2352 finally: 

2353 if not (speedFile is None): 

2354 speedFile.close() 

2355 self.unlock() 

2356 

2357 def _snapshotCoalesce(self, vdi): 

2358 # Note that because we are not holding any locks here, concurrent SM 

2359 # operations may change this tree under our feet. In particular, vdi 

2360 # can be deleted, or it can be snapshotted. 

2361 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2362 Util.log("Single-snapshotting %s" % vdi) 

2363 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2364 try: 

2365 ret = self.xapi.singleSnapshotVDI(vdi) 

2366 Util.log("Single-snapshot returned: %s" % ret) 

2367 except XenAPI.Failure as e: 

2368 if util.isInvalidVDI(e): 

2369 Util.log("The VDI appears to have been concurrently deleted") 

2370 return False 

2371 raise 

2372 self.scanLocked() 

2373 tempSnap = vdi.parent 

2374 if not tempSnap.isCoalesceable(): 

2375 Util.log("The VDI appears to have been concurrently snapshotted") 

2376 return False 

2377 Util.log("Coalescing parent %s" % tempSnap) 

2378 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2379 vhdSize = vdi.getSizeVHD() 

2380 self._coalesce(tempSnap) 

2381 if not vdi.isLeafCoalesceable(): 

2382 Util.log("The VDI tree appears to have been altered since") 

2383 return False 

2384 return True 

2385 

2386 def _liveLeafCoalesce(self, vdi): 

2387 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2388 self.lock() 

2389 try: 

2390 self.scan() 

2391 if not self.getVDI(vdi.uuid): 

2392 Util.log("The VDI appears to have been deleted meanwhile") 

2393 return False 

2394 if not vdi.isLeafCoalesceable(): 

2395 Util.log("The VDI is no longer leaf-coalesceable") 

2396 return False 

2397 

2398 uuid = vdi.uuid 

2399 vdi.pause(failfast=True) 

2400 try: 

2401 try: 

2402 # "vdi" object will no longer be valid after this call 

2403 self._doCoalesceLeaf(vdi) 

2404 except: 

2405 Util.logException("_doCoalesceLeaf") 

2406 self._handleInterruptedCoalesceLeaf() 

2407 raise 

2408 finally: 

2409 vdi = self.getVDI(uuid) 

2410 if vdi: 

2411 vdi.ensureUnpaused() 

2412 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2413 if vdiOld: 

2414 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2415 self.deleteVDI(vdiOld) 

2416 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2417 finally: 

2418 self.cleanup() 

2419 self.unlock() 

2420 self.logFilter.logState() 

2421 return True 

2422 

2423 def _doCoalesceLeaf(self, vdi): 

2424 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2425 offline/atomic context""" 

2426 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2427 self._prepareCoalesceLeaf(vdi) 

2428 vdi.parent._setHidden(False) 

2429 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2430 vdi.validate(True) 

2431 vdi.parent.validate(True) 

2432 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2433 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2434 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 

2435 Util.log("Leaf-coalesce forced, will not use timeout") 

2436 timeout = 0 

2437 vdi._coalesceVHD(timeout) 

2438 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2439 vdi.parent.validate(True) 

2440 #vdi._verifyContents(timeout / 2) 

2441 

2442 # rename 

2443 vdiUuid = vdi.uuid 

2444 oldName = vdi.fileName 

2445 origParentUuid = vdi.parent.uuid 

2446 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2447 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2448 vdi.parent.rename(vdiUuid) 

2449 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2450 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2451 

2452 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2453 # garbage 

2454 

2455 # update the VDI record 

2456 vdi.parent.delConfig(VDI.DB_VHD_PARENT) 

2457 if vdi.parent.raw: 

2458 vdi.parent.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_RAW) 

2459 vdi.parent.delConfig(VDI.DB_VHD_BLOCKS) 

2460 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2461 

2462 self._updateNode(vdi) 

2463 

2464 # delete the obsolete leaf & inflate the parent (in that order, to 

2465 # minimize free space requirements) 

2466 parent = vdi.parent 

2467 vdi._setHidden(True) 

2468 vdi.parent.children = [] 

2469 vdi.parent = None 

2470 

2471 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2472 freeSpace = self.getFreeSpace() 

2473 if freeSpace < extraSpace: 

2474 # don't delete unless we need the space: deletion is time-consuming 

2475 # because it requires contacting the slaves, and we're paused here 

2476 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2477 self.deleteVDI(vdi) 

2478 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2479 

2480 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2481 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2482 

2483 self.forgetVDI(origParentUuid) 

2484 self._finishCoalesceLeaf(parent) 

2485 self._updateSlavesOnResize(parent) 

2486 

2487 def _calcExtraSpaceNeeded(self, child, parent): 

2488 assert(not parent.raw) # raw parents not supported 

2489 extra = child.getSizeVHD() - parent.getSizeVHD() 

2490 if extra < 0: 

2491 extra = 0 

2492 return extra 

2493 

2494 def _prepareCoalesceLeaf(self, vdi): 

2495 pass 

2496 

2497 def _updateNode(self, vdi): 

2498 pass 

2499 

2500 def _finishCoalesceLeaf(self, parent): 

2501 pass 

2502 

2503 def _updateSlavesOnUndoLeafCoalesce(self, parent, child): 

2504 pass 

2505 

2506 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid): 

2507 pass 

2508 

2509 def _updateSlavesOnResize(self, vdi): 

2510 pass 

2511 

2512 def _removeStaleVDIs(self, uuidsPresent): 

2513 for uuid in list(self.vdis.keys()): 

2514 if not uuid in uuidsPresent: 

2515 Util.log("VDI %s disappeared since last scan" % \ 

2516 self.vdis[uuid]) 

2517 del self.vdis[uuid] 

2518 

2519 def _handleInterruptedCoalesceLeaf(self): 

2520 """An interrupted leaf-coalesce operation may leave the VHD tree in an 

2521 inconsistent state. If the old-leaf VDI is still present, we revert the 

2522 operation (in case the original error is persistent); otherwise we must 

2523 finish the operation""" 

2524 # abstract 

2525 pass 

2526 

2527 def _buildTree(self, force): 

2528 self.vdiTrees = [] 

2529 for vdi in self.vdis.values(): 

2530 if vdi.parentUuid: 

2531 parent = self.getVDI(vdi.parentUuid) 

2532 if not parent: 

2533 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2534 self.vdiTrees.append(vdi) 

2535 continue 

2536 if force: 

2537 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2538 (vdi.parentUuid, vdi.uuid)) 

2539 self.vdiTrees.append(vdi) 

2540 continue 

2541 else: 

2542 raise util.SMException("Parent VDI %s of %s not " \ 

2543 "found" % (vdi.parentUuid, vdi.uuid)) 

2544 vdi.parent = parent 

2545 parent.children.append(vdi) 

2546 else: 

2547 self.vdiTrees.append(vdi) 

2548 

2549 

2550class FileSR(SR): 

2551 TYPE = SR.TYPE_FILE 

2552 CACHE_FILE_EXT = ".vhdcache" 

2553 # cache cleanup actions 

2554 CACHE_ACTION_KEEP = 0 

2555 CACHE_ACTION_REMOVE = 1 

2556 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

2557 

2558 def __init__(self, uuid, xapi, createLock, force): 

2559 SR.__init__(self, uuid, xapi, createLock, force) 

2560 self.path = "/var/run/sr-mount/%s" % self.uuid 

2561 self.journaler = fjournaler.Journaler(self.path) 

2562 

2563 def scan(self, force=False): 

2564 if not util.pathexists(self.path): 

2565 raise util.SMException("directory %s not found!" % self.uuid) 

2566 vhds = self._scan(force) 

2567 for uuid, vhdInfo in vhds.items(): 

2568 vdi = self.getVDI(uuid) 

2569 if not vdi: 

2570 self.logFilter.logNewVDI(uuid) 

2571 vdi = FileVDI(self, uuid, False) 

2572 self.vdis[uuid] = vdi 

2573 vdi.load(vhdInfo) 

2574 uuidsPresent = list(vhds.keys()) 

2575 rawList = [x for x in os.listdir(self.path) if x.endswith(vhdutil.FILE_EXTN_RAW)] 

2576 for rawName in rawList: 

2577 uuid = FileVDI.extractUuid(rawName) 

2578 uuidsPresent.append(uuid) 

2579 vdi = self.getVDI(uuid) 

2580 if not vdi: 

2581 self.logFilter.logNewVDI(uuid) 

2582 vdi = FileVDI(self, uuid, True) 

2583 self.vdis[uuid] = vdi 

2584 self._removeStaleVDIs(uuidsPresent) 

2585 self._buildTree(force) 

2586 self.logFilter.logState() 

2587 self._handleInterruptedCoalesceLeaf() 

2588 

2589 def getFreeSpace(self): 

2590 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

2591 

2592 def deleteVDIs(self, vdiList): 

2593 rootDeleted = False 

2594 for vdi in vdiList: 

2595 if not vdi.parent: 

2596 rootDeleted = True 

2597 break 

2598 SR.deleteVDIs(self, vdiList) 

2599 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

2600 self.xapi.markCacheSRsDirty() 

2601 

2602 def cleanupCache(self, maxAge=-1): 

2603 """Clean up IntelliCache cache files. Caches for leaf nodes are 

2604 removed when the leaf node no longer exists or its allow-caching 

2605 attribute is not set. Caches for parent nodes are removed when the 

2606 parent node no longer exists or it hasn't been used in more than 

2607 <maxAge> hours. 

2608 Return number of caches removed. 

2609 """ 

2610 numRemoved = 0 

2611 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

2612 Util.log("Found %d cache files" % len(cacheFiles)) 

2613 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

2614 for cacheFile in cacheFiles: 

2615 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

2616 action = self.CACHE_ACTION_KEEP 

2617 rec = self.xapi.getRecordVDI(uuid) 

2618 if not rec: 

2619 Util.log("Cache %s: VDI doesn't exist" % uuid) 

2620 action = self.CACHE_ACTION_REMOVE 

2621 elif rec["managed"] and not rec["allow_caching"]: 

2622 Util.log("Cache %s: caching disabled" % uuid) 

2623 action = self.CACHE_ACTION_REMOVE 

2624 elif not rec["managed"] and maxAge >= 0: 

2625 lastAccess = datetime.datetime.fromtimestamp( \ 

2626 os.path.getatime(os.path.join(self.path, cacheFile))) 

2627 if lastAccess < cutoff: 

2628 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

2629 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

2630 

2631 if action == self.CACHE_ACTION_KEEP: 

2632 Util.log("Keeping cache %s" % uuid) 

2633 continue 

2634 

2635 lockId = uuid 

2636 parentUuid = None 

2637 if rec and rec["managed"]: 

2638 parentUuid = rec["sm_config"].get("vhd-parent") 

2639 if parentUuid: 

2640 lockId = parentUuid 

2641 

2642 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

2643 cacheLock.acquire() 

2644 try: 

2645 if self._cleanupCache(uuid, action): 

2646 numRemoved += 1 

2647 finally: 

2648 cacheLock.release() 

2649 return numRemoved 

2650 

2651 def _cleanupCache(self, uuid, action): 

2652 assert(action != self.CACHE_ACTION_KEEP) 

2653 rec = self.xapi.getRecordVDI(uuid) 

2654 if rec and rec["allow_caching"]: 

2655 Util.log("Cache %s appears to have become valid" % uuid) 

2656 return False 

2657 

2658 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

2659 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

2660 if tapdisk: 

2661 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

2662 Util.log("Cache %s still in use" % uuid) 

2663 return False 

2664 Util.log("Shutting down tapdisk for %s" % fullPath) 

2665 tapdisk.shutdown() 

2666 

2667 Util.log("Deleting file %s" % fullPath) 

2668 os.unlink(fullPath) 

2669 return True 

2670 

2671 def _isCacheFileName(self, name): 

2672 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

2673 name.endswith(self.CACHE_FILE_EXT) 

2674 

2675 def _scan(self, force): 

2676 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2677 error = False 

2678 pattern = os.path.join(self.path, "*%s" % vhdutil.FILE_EXTN_VHD) 

2679 vhds = vhdutil.getAllVHDs(pattern, FileVDI.extractUuid) 

2680 for uuid, vhdInfo in vhds.items(): 

2681 if vhdInfo.error: 

2682 error = True 

2683 break 

2684 if not error: 

2685 return vhds 

2686 Util.log("Scan error on attempt %d" % i) 

2687 if force: 

2688 return vhds 

2689 raise util.SMException("Scan error") 

2690 

2691 def deleteVDI(self, vdi): 

2692 self._checkSlaves(vdi) 

2693 SR.deleteVDI(self, vdi) 

2694 

2695 def _checkSlaves(self, vdi): 

2696 onlineHosts = self.xapi.getOnlineHosts() 

2697 abortFlag = IPCFlag(self.uuid) 

2698 for pbdRecord in self.xapi.getAttachedPBDs(): 

2699 hostRef = pbdRecord["host"] 

2700 if hostRef == self.xapi._hostRef: 

2701 continue 

2702 if abortFlag.test(FLAG_TYPE_ABORT): 

2703 raise AbortException("Aborting due to signal") 

2704 try: 

2705 self._checkSlave(hostRef, vdi) 

2706 except util.CommandException: 

2707 if hostRef in onlineHosts: 

2708 raise 

2709 

2710 def _checkSlave(self, hostRef, vdi): 

2711 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

2712 Util.log("Checking with slave: %s" % repr(call)) 

2713 _host = self.xapi.session.xenapi.host 

2714 text = _host.call_plugin( * call) 

2715 

2716 def _handleInterruptedCoalesceLeaf(self): 

2717 entries = self.journaler.getAll(VDI.JRN_LEAF) 

2718 for uuid, parentUuid in entries.items(): 

2719 fileList = os.listdir(self.path) 

2720 childName = uuid + vhdutil.FILE_EXTN_VHD 

2721 tmpChildName = self.TMP_RENAME_PREFIX + uuid + vhdutil.FILE_EXTN_VHD 

2722 parentName1 = parentUuid + vhdutil.FILE_EXTN_VHD 

2723 parentName2 = parentUuid + vhdutil.FILE_EXTN_RAW 

2724 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

2725 if parentPresent or tmpChildName in fileList: 

2726 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

2727 else: 

2728 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

2729 self.journaler.remove(VDI.JRN_LEAF, uuid) 

2730 vdi = self.getVDI(uuid) 

2731 if vdi: 

2732 vdi.ensureUnpaused() 

2733 

2734 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2735 Util.log("*** UNDO LEAF-COALESCE") 

2736 parent = self.getVDI(parentUuid) 

2737 if not parent: 

2738 parent = self.getVDI(childUuid) 

2739 if not parent: 

2740 raise util.SMException("Neither %s nor %s found" % \ 

2741 (parentUuid, childUuid)) 

2742 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

2743 parent.rename(parentUuid) 

2744 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

2745 

2746 child = self.getVDI(childUuid) 

2747 if not child: 

2748 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

2749 if not child: 

2750 raise util.SMException("Neither %s nor %s found" % \ 

2751 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

2752 Util.log("Renaming child back to %s" % childUuid) 

2753 child.rename(childUuid) 

2754 Util.log("Updating the VDI record") 

2755 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

2756 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

2757 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

2758 

2759 if child.hidden: 

2760 child._setHidden(False) 

2761 if not parent.hidden: 

2762 parent._setHidden(True) 

2763 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

2764 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

2765 Util.log("*** leaf-coalesce undo successful") 

2766 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

2767 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

2768 

2769 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2770 Util.log("*** FINISH LEAF-COALESCE") 

2771 vdi = self.getVDI(childUuid) 

2772 if not vdi: 

2773 raise util.SMException("VDI %s not found" % childUuid) 

2774 try: 

2775 self.forgetVDI(parentUuid) 

2776 except XenAPI.Failure: 

2777 pass 

2778 self._updateSlavesOnResize(vdi) 

2779 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

2780 Util.log("*** finished leaf-coalesce successfully") 

2781 

2782 

2783class LVHDSR(SR): 

2784 TYPE = SR.TYPE_LVHD 

2785 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

2786 

2787 def __init__(self, uuid, xapi, createLock, force): 

2788 SR.__init__(self, uuid, xapi, createLock, force) 

2789 self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid) 

2790 self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName) 

2791 self.lvmCache = lvmcache.LVMCache(self.vgName) 

2792 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

2793 self.journaler = journaler.Journaler(self.lvmCache) 

2794 

2795 def deleteVDI(self, vdi): 

2796 if self.lvActivator.get(vdi.uuid, False): 

2797 self.lvActivator.deactivate(vdi.uuid, False) 

2798 self._checkSlaves(vdi) 

2799 SR.deleteVDI(self, vdi) 

2800 

2801 def forgetVDI(self, vdiUuid): 

2802 SR.forgetVDI(self, vdiUuid) 

2803 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

2804 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

2805 

2806 def getFreeSpace(self): 

2807 stats = lvutil._getVGstats(self.vgName) 

2808 return stats['physical_size'] - stats['physical_utilisation'] 

2809 

2810 def cleanup(self): 

2811 if not self.lvActivator.deactivateAll(): 

2812 Util.log("ERROR deactivating LVs while cleaning up") 

2813 

2814 def needUpdateBlockInfo(self): 

2815 for vdi in self.vdis.values(): 

2816 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

2817 continue 

2818 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2819 return True 

2820 return False 

2821 

2822 def updateBlockInfo(self): 

2823 numUpdated = 0 

2824 for vdi in self.vdis.values(): 

2825 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

2826 continue 

2827 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2828 vdi.updateBlockInfo() 

2829 numUpdated += 1 

2830 if numUpdated: 

2831 # deactivate the LVs back sooner rather than later. If we don't 

2832 # now, by the time this thread gets to deactivations, another one 

2833 # might have leaf-coalesced a node and deleted it, making the child 

2834 # inherit the refcount value and preventing the correct decrement 

2835 self.cleanup() 

2836 

2837 def scan(self, force=False): 

2838 vdis = self._scan(force) 

2839 for uuid, vdiInfo in vdis.items(): 

2840 vdi = self.getVDI(uuid) 

2841 if not vdi: 

2842 self.logFilter.logNewVDI(uuid) 

2843 vdi = LVHDVDI(self, uuid, 

2844 vdiInfo.vdiType == vhdutil.VDI_TYPE_RAW) 

2845 self.vdis[uuid] = vdi 

2846 vdi.load(vdiInfo) 

2847 self._removeStaleVDIs(vdis.keys()) 

2848 self._buildTree(force) 

2849 self.logFilter.logState() 

2850 self._handleInterruptedCoalesceLeaf() 

2851 

2852 def _scan(self, force): 

2853 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2854 error = False 

2855 self.lvmCache.refresh() 

2856 vdis = lvhdutil.getVDIInfo(self.lvmCache) 

2857 for uuid, vdiInfo in vdis.items(): 

2858 if vdiInfo.scanError: 

2859 error = True 

2860 break 

2861 if not error: 

2862 return vdis 

2863 Util.log("Scan error, retrying (%d)" % i) 

2864 if force: 

2865 return vdis 

2866 raise util.SMException("Scan error") 

2867 

2868 def _removeStaleVDIs(self, uuidsPresent): 

2869 for uuid in list(self.vdis.keys()): 

2870 if not uuid in uuidsPresent: 

2871 Util.log("VDI %s disappeared since last scan" % \ 

2872 self.vdis[uuid]) 

2873 del self.vdis[uuid] 

2874 if self.lvActivator.get(uuid, False): 

2875 self.lvActivator.remove(uuid, False) 

2876 

2877 def _liveLeafCoalesce(self, vdi): 

2878 """If the parent is raw and the child was resized (virt. size), then 

2879 we'll need to resize the parent, which can take a while due to zeroing 

2880 out of the extended portion of the LV. Do it before pausing the child 

2881 to avoid a protracted downtime""" 

2882 if vdi.parent.raw and vdi.sizeVirt > vdi.parent.sizeVirt: 

2883 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

2884 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

2885 

2886 return SR._liveLeafCoalesce(self, vdi) 

2887 

2888 def _prepareCoalesceLeaf(self, vdi): 

2889 vdi._activateChain() 

2890 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

2891 vdi.deflate() 

2892 vdi.inflateParentForCoalesce() 

2893 

2894 def _updateNode(self, vdi): 

2895 # fix the refcounts: the remaining node should inherit the binary 

2896 # refcount from the leaf (because if it was online, it should remain 

2897 # refcounted as such), but the normal refcount from the parent (because 

2898 # this node is really the parent node) - minus 1 if it is online (since 

2899 # non-leaf nodes increment their normal counts when they are online and 

2900 # we are now a leaf, storing that 1 in the binary refcount). 

2901 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

2902 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

2903 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

2904 pCnt = pCnt - cBcnt 

2905 assert(pCnt >= 0) 

2906 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

2907 

2908 def _finishCoalesceLeaf(self, parent): 

2909 if not parent.isSnapshot() or parent.isAttachedRW(): 

2910 parent.inflateFully() 

2911 else: 

2912 parent.deflate() 

2913 

2914 def _calcExtraSpaceNeeded(self, child, parent): 

2915 return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV 

2916 

2917 def _handleInterruptedCoalesceLeaf(self): 

2918 entries = self.journaler.getAll(VDI.JRN_LEAF) 

2919 for uuid, parentUuid in entries.items(): 

2920 childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid 

2921 tmpChildLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

2922 self.TMP_RENAME_PREFIX + uuid 

2923 parentLV1 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + parentUuid 

2924 parentLV2 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + parentUuid 

2925 parentPresent = (self.lvmCache.checkLV(parentLV1) or \ 

2926 self.lvmCache.checkLV(parentLV2)) 

2927 if parentPresent or self.lvmCache.checkLV(tmpChildLV): 

2928 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

2929 else: 

2930 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

2931 self.journaler.remove(VDI.JRN_LEAF, uuid) 

2932 vdi = self.getVDI(uuid) 

2933 if vdi: 

2934 vdi.ensureUnpaused() 

2935 

2936 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2937 Util.log("*** UNDO LEAF-COALESCE") 

2938 parent = self.getVDI(parentUuid) 

2939 if not parent: 

2940 parent = self.getVDI(childUuid) 

2941 if not parent: 

2942 raise util.SMException("Neither %s nor %s found" % \ 

2943 (parentUuid, childUuid)) 

2944 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

2945 parent.rename(parentUuid) 

2946 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

2947 

2948 child = self.getVDI(childUuid) 

2949 if not child: 

2950 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

2951 if not child: 

2952 raise util.SMException("Neither %s nor %s found" % \ 

2953 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

2954 Util.log("Renaming child back to %s" % childUuid) 

2955 child.rename(childUuid) 

2956 Util.log("Updating the VDI record") 

2957 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

2958 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

2959 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

2960 

2961 # refcount (best effort - assume that it had succeeded if the 

2962 # second rename succeeded; if not, this adjustment will be wrong, 

2963 # leading to a non-deactivation of the LV) 

2964 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

2965 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

2966 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

2967 pCnt = pCnt + cBcnt 

2968 RefCounter.set(parent.uuid, pCnt, 0, ns) 

2969 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

2970 

2971 parent.deflate() 

2972 child.inflateFully() 

2973 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

2974 if child.hidden: 

2975 child._setHidden(False) 

2976 if not parent.hidden: 

2977 parent._setHidden(True) 

2978 if not parent.lvReadonly: 

2979 self.lvmCache.setReadonly(parent.fileName, True) 

2980 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

2981 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

2982 Util.log("*** leaf-coalesce undo successful") 

2983 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

2984 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

2985 

2986 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2987 Util.log("*** FINISH LEAF-COALESCE") 

2988 vdi = self.getVDI(childUuid) 

2989 if not vdi: 

2990 raise util.SMException("VDI %s not found" % childUuid) 

2991 vdi.inflateFully() 

2992 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

2993 try: 

2994 self.forgetVDI(parentUuid) 

2995 except XenAPI.Failure: 

2996 pass 

2997 self._updateSlavesOnResize(vdi) 

2998 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

2999 Util.log("*** finished leaf-coalesce successfully") 

3000 

3001 def _checkSlaves(self, vdi): 

3002 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

3003 try to check all slaves, including those that the Agent believes are 

3004 offline, but ignore failures for offline hosts. This is to avoid cases 

3005 where the Agent thinks a host is offline but the host is up.""" 

3006 args = {"vgName": self.vgName, 

3007 "action1": "deactivateNoRefcount", 

3008 "lvName1": vdi.fileName, 

3009 "action2": "cleanupLockAndRefcount", 

3010 "uuid2": vdi.uuid, 

3011 "ns2": lvhdutil.NS_PREFIX_LVM + self.uuid} 

3012 onlineHosts = self.xapi.getOnlineHosts() 

3013 abortFlag = IPCFlag(self.uuid) 

3014 for pbdRecord in self.xapi.getAttachedPBDs(): 

3015 hostRef = pbdRecord["host"] 

3016 if hostRef == self.xapi._hostRef: 

3017 continue 

3018 if abortFlag.test(FLAG_TYPE_ABORT): 

3019 raise AbortException("Aborting due to signal") 

3020 Util.log("Checking with slave %s (path %s)" % ( 

3021 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

3022 try: 

3023 self.xapi.ensureInactive(hostRef, args) 

3024 except XenAPI.Failure: 

3025 if hostRef in onlineHosts: 

3026 raise 

3027 

3028 def _updateSlavesOnUndoLeafCoalesce(self, parent, child): 

3029 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

3030 if not slaves: 

3031 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

3032 child) 

3033 return 

3034 

3035 tmpName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

3036 self.TMP_RENAME_PREFIX + child.uuid 

3037 args = {"vgName": self.vgName, 

3038 "action1": "deactivateNoRefcount", 

3039 "lvName1": tmpName, 

3040 "action2": "deactivateNoRefcount", 

3041 "lvName2": child.fileName, 

3042 "action3": "refresh", 

3043 "lvName3": child.fileName, 

3044 "action4": "refresh", 

3045 "lvName4": parent.fileName} 

3046 for slave in slaves: 

3047 Util.log("Updating %s, %s, %s on slave %s" % \ 

3048 (tmpName, child.fileName, parent.fileName, 

3049 self.xapi.getRecordHost(slave)['hostname'])) 

3050 text = self.xapi.session.xenapi.host.call_plugin( \ 

3051 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3052 Util.log("call-plugin returned: '%s'" % text) 

3053 

3054 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid): 

3055 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

3056 if not slaves: 

3057 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

3058 return 

3059 

3060 args = {"vgName": self.vgName, 

3061 "action1": "deactivateNoRefcount", 

3062 "lvName1": oldNameLV, 

3063 "action2": "refresh", 

3064 "lvName2": vdi.fileName, 

3065 "action3": "cleanupLockAndRefcount", 

3066 "uuid3": origParentUuid, 

3067 "ns3": lvhdutil.NS_PREFIX_LVM + self.uuid} 

3068 for slave in slaves: 

3069 Util.log("Updating %s to %s on slave %s" % \ 

3070 (oldNameLV, vdi.fileName, 

3071 self.xapi.getRecordHost(slave)['hostname'])) 

3072 text = self.xapi.session.xenapi.host.call_plugin( \ 

3073 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3074 Util.log("call-plugin returned: '%s'" % text) 

3075 

3076 def _updateSlavesOnResize(self, vdi): 

3077 uuids = [x.uuid for x in vdi.getAllLeaves()] 

3078 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

3079 if not slaves: 

3080 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

3081 return 

3082 lvhdutil.lvRefreshOnSlaves(self.xapi.session, self.uuid, self.vgName, 

3083 vdi.fileName, vdi.uuid, slaves) 

3084 

3085 

3086class LinstorSR(SR): 

3087 TYPE = SR.TYPE_LINSTOR 

3088 

3089 def __init__(self, uuid, xapi, createLock, force): 

3090 if not LINSTOR_AVAILABLE: 

3091 raise util.SMException( 

3092 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

3093 ) 

3094 

3095 SR.__init__(self, uuid, xapi, createLock, force) 

3096 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

3097 self._reloadLinstor() 

3098 

3099 def deleteVDI(self, vdi): 

3100 self._checkSlaves(vdi) 

3101 SR.deleteVDI(self, vdi) 

3102 

3103 def getFreeSpace(self): 

3104 return self._linstor.max_volume_size_allowed 

3105 

3106 def scan(self, force=False): 

3107 all_vdi_info = self._scan(force) 

3108 for uuid, vdiInfo in all_vdi_info.items(): 

3109 # When vdiInfo is None, the VDI is RAW. 

3110 vdi = self.getVDI(uuid) 

3111 if not vdi: 

3112 self.logFilter.logNewVDI(uuid) 

3113 vdi = LinstorVDI(self, uuid, not vdiInfo) 

3114 self.vdis[uuid] = vdi 

3115 if vdiInfo: 

3116 vdi.load(vdiInfo) 

3117 self._removeStaleVDIs(all_vdi_info.keys()) 

3118 self._buildTree(force) 

3119 self.logFilter.logState() 

3120 self._handleInterruptedCoalesceLeaf() 

3121 

3122 def pauseVDIs(self, vdiList): 

3123 self._linstor.ensure_volume_list_is_not_locked( 

3124 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3125 ) 

3126 return super(LinstorSR, self).pauseVDIs(vdiList) 

3127 

3128 def _reloadLinstor(self): 

3129 session = self.xapi.session 

3130 host_ref = util.get_this_host_ref(session) 

3131 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3132 

3133 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3134 if pbd is None: 

3135 raise util.SMException('Failed to find PBD') 

3136 

3137 dconf = session.xenapi.PBD.get_device_config(pbd) 

3138 group_name = dconf['group-name'] 

3139 

3140 controller_uri = get_controller_uri() 

3141 self.journaler = LinstorJournaler( 

3142 controller_uri, group_name, logger=util.SMlog 

3143 ) 

3144 

3145 self._linstor = LinstorVolumeManager( 

3146 controller_uri, 

3147 group_name, 

3148 repair=True, 

3149 logger=util.SMlog 

3150 ) 

3151 self._vhdutil = LinstorVhdUtil(session, self._linstor) 

3152 

3153 def _scan(self, force): 

3154 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3155 self._reloadLinstor() 

3156 error = False 

3157 try: 

3158 all_vdi_info = self._load_vdi_info() 

3159 for uuid, vdiInfo in all_vdi_info.items(): 

3160 if vdiInfo and vdiInfo.error: 

3161 error = True 

3162 break 

3163 if not error: 

3164 return all_vdi_info 

3165 Util.log('Scan error, retrying ({})'.format(i)) 

3166 except Exception as e: 

3167 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3168 Util.log(traceback.format_exc()) 

3169 

3170 if force: 

3171 return all_vdi_info 

3172 raise util.SMException('Scan error') 

3173 

3174 def _load_vdi_info(self): 

3175 all_vdi_info = {} 

3176 

3177 # TODO: Ensure metadata contains the right info. 

3178 

3179 all_volume_info = self._linstor.get_volumes_with_info() 

3180 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3181 for vdi_uuid, volume_info in all_volume_info.items(): 

3182 try: 

3183 volume_metadata = volumes_metadata[vdi_uuid] 

3184 if not volume_info.name and not list(volume_metadata.items()): 

3185 continue # Ignore it, probably deleted. 

3186 

3187 if vdi_uuid.startswith('DELETED_'): 

3188 # Assume it's really a RAW volume of a failed snap without VHD header/footer. 

3189 # We must remove this VDI now without adding it in the VDI list. 

3190 # Otherwise `Relinking` calls and other actions can be launched on it. 

3191 # We don't want that... 

3192 Util.log('Deleting bad VDI {}'.format(vdi_uuid)) 

3193 

3194 self.lock() 

3195 try: 

3196 self._linstor.destroy_volume(vdi_uuid) 

3197 try: 

3198 self.forgetVDI(vdi_uuid) 

3199 except: 

3200 pass 

3201 except Exception as e: 

3202 Util.log('Cannot delete bad VDI: {}'.format(e)) 

3203 finally: 

3204 self.unlock() 

3205 continue 

3206 

3207 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

3208 volume_name = self._linstor.get_volume_name(vdi_uuid) 

3209 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX): 

3210 # Always RAW! 

3211 info = None 

3212 elif vdi_type == vhdutil.VDI_TYPE_VHD: 

3213 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3214 else: 

3215 # Ensure it's not a VHD... 

3216 try: 

3217 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3218 except: 

3219 try: 

3220 self._vhdutil.force_repair( 

3221 self._linstor.get_device_path(vdi_uuid) 

3222 ) 

3223 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3224 except: 

3225 info = None 

3226 

3227 except Exception as e: 

3228 Util.log( 

3229 ' [VDI {}: failed to load VDI info]: {}' 

3230 .format(vdi_uuid, e) 

3231 ) 

3232 info = vhdutil.VHDInfo(vdi_uuid) 

3233 info.error = 1 

3234 

3235 all_vdi_info[vdi_uuid] = info 

3236 

3237 return all_vdi_info 

3238 

3239 def _prepareCoalesceLeaf(self, vdi): 

3240 vdi._activateChain() 

3241 vdi.deflate() 

3242 vdi._inflateParentForCoalesce() 

3243 

3244 def _finishCoalesceLeaf(self, parent): 

3245 if not parent.isSnapshot() or parent.isAttachedRW(): 

3246 parent.inflateFully() 

3247 else: 

3248 parent.deflate() 

3249 

3250 def _calcExtraSpaceNeeded(self, child, parent): 

3251 return LinstorVhdUtil.compute_volume_size(parent.sizeVirt, parent.vdi_type) - parent.getDrbdSize() 

3252 

3253 def _hasValidDevicePath(self, uuid): 

3254 try: 

3255 self._linstor.get_device_path(uuid) 

3256 except Exception: 

3257 # TODO: Maybe log exception. 

3258 return False 

3259 return True 

3260 

3261 def _liveLeafCoalesce(self, vdi): 

3262 self.lock() 

3263 try: 

3264 self._linstor.ensure_volume_is_not_locked( 

3265 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3266 ) 

3267 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3268 finally: 

3269 self.unlock() 

3270 

3271 def _handleInterruptedCoalesceLeaf(self): 

3272 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3273 for uuid, parentUuid in entries.items(): 

3274 if self._hasValidDevicePath(parentUuid) or \ 

3275 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3276 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3277 else: 

3278 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3279 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3280 vdi = self.getVDI(uuid) 

3281 if vdi: 

3282 vdi.ensureUnpaused() 

3283 

3284 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3285 Util.log('*** UNDO LEAF-COALESCE') 

3286 parent = self.getVDI(parentUuid) 

3287 if not parent: 

3288 parent = self.getVDI(childUuid) 

3289 if not parent: 

3290 raise util.SMException( 

3291 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3292 ) 

3293 Util.log( 

3294 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3295 ) 

3296 parent.rename(parentUuid) 

3297 

3298 child = self.getVDI(childUuid) 

3299 if not child: 

3300 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3301 if not child: 

3302 raise util.SMException( 

3303 'Neither {} nor {} found'.format( 

3304 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3305 ) 

3306 ) 

3307 Util.log('Renaming child back to {}'.format(childUuid)) 

3308 child.rename(childUuid) 

3309 Util.log('Updating the VDI record') 

3310 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3311 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3312 

3313 # TODO: Maybe deflate here. 

3314 

3315 if child.hidden: 

3316 child._setHidden(False) 

3317 if not parent.hidden: 

3318 parent._setHidden(True) 

3319 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3320 Util.log('*** leaf-coalesce undo successful') 

3321 

3322 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3323 Util.log('*** FINISH LEAF-COALESCE') 

3324 vdi = self.getVDI(childUuid) 

3325 if not vdi: 

3326 raise util.SMException('VDI {} not found'.format(childUuid)) 

3327 # TODO: Maybe inflate. 

3328 try: 

3329 self.forgetVDI(parentUuid) 

3330 except XenAPI.Failure: 

3331 pass 

3332 self._updateSlavesOnResize(vdi) 

3333 Util.log('*** finished leaf-coalesce successfully') 

3334 

3335 def _checkSlaves(self, vdi): 

3336 try: 

3337 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3338 for openers in all_openers.values(): 

3339 for opener in openers.values(): 

3340 if opener['process-name'] != 'tapdisk': 

3341 raise util.SMException( 

3342 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3343 ) 

3344 except LinstorVolumeManagerError as e: 

3345 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3346 raise 

3347 

3348 

3349################################################################################ 

3350# 

3351# Helpers 

3352# 

3353def daemonize(): 

3354 pid = os.fork() 

3355 if pid: 3355 ↛ 3359line 3355 didn't jump to line 3359, because the condition on line 3355 was never false

3356 os.waitpid(pid, 0) 

3357 Util.log("New PID [%d]" % pid) 

3358 return False 

3359 os.chdir("/") 

3360 os.setsid() 

3361 pid = os.fork() 

3362 if pid: 

3363 Util.log("Will finish as PID [%d]" % pid) 

3364 os._exit(0) 

3365 for fd in [0, 1, 2]: 

3366 try: 

3367 os.close(fd) 

3368 except OSError: 

3369 pass 

3370 # we need to fill those special fd numbers or pread won't work 

3371 sys.stdin = open("/dev/null", 'r') 

3372 sys.stderr = open("/dev/null", 'w') 

3373 sys.stdout = open("/dev/null", 'w') 

3374 # As we're a new process we need to clear the lock objects 

3375 lock.Lock.clearAll() 

3376 return True 

3377 

3378 

3379def normalizeType(type): 

3380 if type in LVHDSR.SUBTYPES: 

3381 type = SR.TYPE_LVHD 

3382 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3383 # temporary while LVHD is symlinked as LVM 

3384 type = SR.TYPE_LVHD 

3385 if type in [ 

3386 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3387 "moosefs", "xfs", "zfs" 

3388 ]: 

3389 type = SR.TYPE_FILE 

3390 if type in ["linstor"]: 

3391 type = SR.TYPE_LINSTOR 

3392 if type not in SR.TYPES: 

3393 raise util.SMException("Unsupported SR type: %s" % type) 

3394 return type 

3395 

3396GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3397 

3398 

3399def _gc_init_file(sr_uuid): 

3400 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3401 

3402 

3403def _create_init_file(sr_uuid): 

3404 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3405 with open(os.path.join( 

3406 NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init'), 'w+') as f: 

3407 f.write('1') 

3408 

3409 

3410def _gcLoopPause(sr, dryRun=False, immediate=False): 

3411 if immediate: 

3412 return 

3413 

3414 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3415 # point will just return. Otherwise, fall back on an abortable sleep. 

3416 

3417 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3418 

3419 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3419 ↛ exitline 3419 didn't jump to the function exit

3420 lambda *args: None) 

3421 elif os.path.exists(_gc_init_file(sr.uuid)): 

3422 def abortTest(): 

3423 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3424 

3425 # If time.sleep hangs we are in deep trouble, however for 

3426 # completeness we set the timeout of the abort thread to 

3427 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3428 Util.log("GC active, about to go quiet") 

3429 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3429 ↛ exitline 3429 didn't run the lambda on line 3429

3430 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3431 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3432 Util.log("GC active, quiet period ended") 

3433 

3434 

3435def _gcLoop(sr, dryRun=False, immediate=False): 

3436 if not lockActive.acquireNoblock(): 3436 ↛ 3437line 3436 didn't jump to line 3437, because the condition on line 3436 was never true

3437 Util.log("Another GC instance already active, exiting") 

3438 return 

3439 # Track how many we do 

3440 coalesced = 0 

3441 task_status = "success" 

3442 try: 

3443 # Check if any work needs to be done 

3444 sr.scanLocked() 

3445 if not sr.hasWork(): 

3446 Util.log("No work, exiting") 

3447 return 

3448 sr.xapi.create_task( 

3449 "Garbage Collection", 

3450 "Garbage collection for SR %s" % sr.uuid) 

3451 _gcLoopPause(sr, dryRun, immediate=immediate) 

3452 while True: 

3453 if not sr.xapi.isPluggedHere(): 3453 ↛ 3454line 3453 didn't jump to line 3454, because the condition on line 3453 was never true

3454 Util.log("SR no longer attached, exiting") 

3455 break 

3456 sr.scanLocked() 

3457 if not sr.hasWork(): 

3458 Util.log("No work, exiting") 

3459 break 

3460 

3461 if not lockRunning.acquireNoblock(): 3461 ↛ 3462line 3461 didn't jump to line 3462, because the condition on line 3461 was never true

3462 Util.log("Unable to acquire GC running lock.") 

3463 return 

3464 try: 

3465 if not sr.gcEnabled(): 3465 ↛ 3466line 3465 didn't jump to line 3466, because the condition on line 3465 was never true

3466 break 

3467 

3468 sr.xapi.update_task_progress("done", coalesced) 

3469 

3470 sr.cleanupCoalesceJournals() 

3471 # Create the init file here in case startup is waiting on it 

3472 _create_init_file(sr.uuid) 

3473 sr.scanLocked() 

3474 sr.updateBlockInfo() 

3475 

3476 howmany = len(sr.findGarbage()) 

3477 if howmany > 0: 

3478 Util.log("Found %d orphaned vdis" % howmany) 

3479 sr.lock() 

3480 try: 

3481 sr.garbageCollect(dryRun) 

3482 finally: 

3483 sr.unlock() 

3484 sr.xapi.srUpdate() 

3485 

3486 candidate = sr.findCoalesceable() 

3487 if candidate: 

3488 util.fistpoint.activate( 

3489 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

3490 sr.coalesce(candidate, dryRun) 

3491 sr.xapi.srUpdate() 

3492 coalesced += 1 

3493 continue 

3494 

3495 candidate = sr.findLeafCoalesceable() 

3496 if candidate: 3496 ↛ 3503line 3496 didn't jump to line 3503, because the condition on line 3496 was never false

3497 sr.coalesceLeaf(candidate, dryRun) 

3498 sr.xapi.srUpdate() 

3499 coalesced += 1 

3500 continue 

3501 

3502 finally: 

3503 lockRunning.release() 3503 ↛ 3508line 3503 didn't jump to line 3508, because the break on line 3466 wasn't executed

3504 except: 

3505 task_status = "failure" 

3506 raise 

3507 finally: 

3508 sr.xapi.set_task_status(task_status) 

3509 Util.log("GC process exiting, no work left") 

3510 _create_init_file(sr.uuid) 

3511 lockActive.release() 

3512 

3513 

3514def _xapi_enabled(session, hostref): 

3515 host = session.xenapi.host.get_record(hostref) 

3516 return host['enabled'] 

3517 

3518 

3519def _ensure_xapi_initialised(session): 

3520 """ 

3521 Don't want to start GC until Xapi is fully initialised 

3522 """ 

3523 local_session = None 

3524 if session is None: 

3525 local_session = util.get_localAPI_session() 

3526 session = local_session 

3527 

3528 try: 

3529 hostref = session.xenapi.host.get_by_uuid(util.get_this_host()) 

3530 while not _xapi_enabled(session, hostref): 

3531 util.SMlog("Xapi not ready, GC waiting") 

3532 time.sleep(15) 

3533 finally: 

3534 if local_session is not None: 

3535 local_session.logout() 

3536 

3537def _gc(session, srUuid, dryRun=False, immediate=False): 

3538 init(srUuid) 

3539 _ensure_xapi_initialised(session) 

3540 sr = SR.getInstance(srUuid, session) 

3541 if not sr.gcEnabled(False): 3541 ↛ 3542line 3541 didn't jump to line 3542, because the condition on line 3541 was never true

3542 return 

3543 

3544 sr.cleanupCache() 

3545 try: 

3546 _gcLoop(sr, dryRun, immediate=immediate) 

3547 finally: 

3548 sr.cleanup() 

3549 sr.logFilter.logState() 

3550 del sr.xapi 

3551 

3552 

3553def _abort(srUuid, soft=False): 

3554 """Aborts an GC/coalesce. 

3555 

3556 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

3557 soft: If set to True and there is a pending abort signal, the function 

3558 doesn't do anything. If set to False, a new abort signal is issued. 

3559 

3560 returns: If soft is set to False, we return True holding lockActive. If 

3561 soft is set to False and an abort signal is pending, we return False 

3562 without holding lockActive. An exception is raised in case of error.""" 

3563 Util.log("=== SR %s: abort ===" % (srUuid)) 

3564 init(srUuid) 

3565 if not lockActive.acquireNoblock(): 

3566 gotLock = False 

3567 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

3568 abortFlag = IPCFlag(srUuid) 

3569 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

3570 return False 

3571 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

3572 gotLock = lockActive.acquireNoblock() 

3573 if gotLock: 

3574 break 

3575 time.sleep(SR.LOCK_RETRY_INTERVAL) 

3576 abortFlag.clear(FLAG_TYPE_ABORT) 

3577 if not gotLock: 

3578 raise util.CommandException(code=errno.ETIMEDOUT, 

3579 reason="SR %s: error aborting existing process" % srUuid) 

3580 return True 

3581 

3582 

3583def init(srUuid): 

3584 global lockRunning 

3585 if not lockRunning: 3585 ↛ 3586line 3585 didn't jump to line 3586, because the condition on line 3585 was never true

3586 lockRunning = lock.Lock(LOCK_TYPE_RUNNING, srUuid) 

3587 global lockActive 

3588 if not lockActive: 3588 ↛ 3589line 3588 didn't jump to line 3589, because the condition on line 3588 was never true

3589 lockActive = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

3590 

3591 

3592def usage(): 

3593 output = """Garbage collect and/or coalesce VHDs in a VHD-based SR 

3594 

3595Parameters: 

3596 -u --uuid UUID SR UUID 

3597 and one of: 

3598 -g --gc garbage collect, coalesce, and repeat while there is work 

3599 -G --gc_force garbage collect once, aborting any current operations 

3600 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

3601 max_age hours 

3602 -a --abort abort any currently running operation (GC or coalesce) 

3603 -q --query query the current state (GC'ing, coalescing or not running) 

3604 -x --disable disable GC/coalesce (will be in effect until you exit) 

3605 -t --debug see Debug below 

3606 

3607Options: 

3608 -b --background run in background (return immediately) (valid for -g only) 

3609 -f --force continue in the presence of VHDs with errors (when doing 

3610 GC, this might cause removal of any such VHDs) (only valid 

3611 for -G) (DANGEROUS) 

3612 

3613Debug: 

3614 The --debug parameter enables manipulation of LVHD VDIs for debugging 

3615 purposes. ** NEVER USE IT ON A LIVE VM ** 

3616 The following parameters are required: 

3617 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

3618 "deflate". 

3619 -v --vdi_uuid VDI UUID 

3620 """ 

3621 #-d --dry-run don't actually perform any SR-modifying operations 

3622 print(output) 

3623 Util.log("(Invalid usage)") 

3624 sys.exit(1) 

3625 

3626 

3627############################################################################## 

3628# 

3629# API 

3630# 

3631def abort(srUuid, soft=False): 

3632 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

3633 """ 

3634 if _abort(srUuid, soft): 

3635 Util.log("abort: releasing the process lock") 

3636 lockActive.release() 

3637 return True 

3638 else: 

3639 return False 

3640 

3641 

3642def gc(session, srUuid, inBackground, dryRun=False): 

3643 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

3644 immediately if inBackground=True. 

3645 

3646 The following algorithm is used: 

3647 1. If we are already GC'ing in this SR, return 

3648 2. If we are already coalescing a VDI pair: 

3649 a. Scan the SR and determine if the VDI pair is GC'able 

3650 b. If the pair is not GC'able, return 

3651 c. If the pair is GC'able, abort coalesce 

3652 3. Scan the SR 

3653 4. If there is nothing to collect, nor to coalesce, return 

3654 5. If there is something to collect, GC all, then goto 3 

3655 6. If there is something to coalesce, coalesce one pair, then goto 3 

3656 """ 

3657 Util.log("=== SR %s: gc ===" % srUuid) 

3658 if inBackground: 

3659 if daemonize(): 

3660 # we are now running in the background. Catch & log any errors 

3661 # because there is no other way to propagate them back at this 

3662 # point 

3663 

3664 try: 

3665 _gc(None, srUuid, dryRun) 

3666 except AbortException: 

3667 Util.log("Aborted") 

3668 except Exception: 

3669 Util.logException("gc") 

3670 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

3671 os._exit(0) 

3672 else: 

3673 _gc(session, srUuid, dryRun, immediate=True) 

3674 

3675 

3676def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

3677 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

3678 the SR lock is held. 

3679 The following algorithm is used: 

3680 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

3681 2. Scan the SR 

3682 3. GC 

3683 4. return 

3684 """ 

3685 Util.log("=== SR %s: gc_force ===" % srUuid) 

3686 init(srUuid) 

3687 sr = SR.getInstance(srUuid, session, lockSR, True) 

3688 if not lockActive.acquireNoblock(): 

3689 abort(srUuid) 

3690 else: 

3691 Util.log("Nothing was running, clear to proceed") 

3692 

3693 if force: 

3694 Util.log("FORCED: will continue even if there are VHD errors") 

3695 sr.scanLocked(force) 

3696 sr.cleanupCoalesceJournals() 

3697 

3698 try: 

3699 sr.cleanupCache() 

3700 sr.garbageCollect(dryRun) 

3701 finally: 

3702 sr.cleanup() 

3703 sr.logFilter.logState() 

3704 lockActive.release() 

3705 

3706 

3707def get_state(srUuid): 

3708 """Return whether GC/coalesce is currently running or not. The information 

3709 is not guaranteed for any length of time if the call is not protected by 

3710 locking. 

3711 """ 

3712 init(srUuid) 

3713 if lockActive.acquireNoblock(): 

3714 lockActive.release() 

3715 return False 

3716 return True 

3717 

3718 

3719def should_preempt(session, srUuid): 

3720 sr = SR.getInstance(srUuid, session) 

3721 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

3722 if len(entries) == 0: 

3723 return False 

3724 elif len(entries) > 1: 

3725 raise util.SMException("More than one coalesce entry: " + str(entries)) 

3726 sr.scanLocked() 

3727 coalescedUuid = entries.popitem()[0] 

3728 garbage = sr.findGarbage() 

3729 for vdi in garbage: 

3730 if vdi.uuid == coalescedUuid: 

3731 return True 

3732 return False 

3733 

3734 

3735def get_coalesceable_leaves(session, srUuid, vdiUuids): 

3736 coalesceable = [] 

3737 sr = SR.getInstance(srUuid, session) 

3738 sr.scanLocked() 

3739 for uuid in vdiUuids: 

3740 vdi = sr.getVDI(uuid) 

3741 if not vdi: 

3742 raise util.SMException("VDI %s not found" % uuid) 

3743 if vdi.isLeafCoalesceable(): 

3744 coalesceable.append(uuid) 

3745 return coalesceable 

3746 

3747 

3748def cache_cleanup(session, srUuid, maxAge): 

3749 sr = SR.getInstance(srUuid, session) 

3750 return sr.cleanupCache(maxAge) 

3751 

3752 

3753def debug(sr_uuid, cmd, vdi_uuid): 

3754 Util.log("Debug command: %s" % cmd) 

3755 sr = SR.getInstance(sr_uuid, None) 

3756 if not isinstance(sr, LVHDSR): 

3757 print("Error: not an LVHD SR") 

3758 return 

3759 sr.scanLocked() 

3760 vdi = sr.getVDI(vdi_uuid) 

3761 if not vdi: 

3762 print("Error: VDI %s not found") 

3763 return 

3764 print("Running %s on SR %s" % (cmd, sr)) 

3765 print("VDI before: %s" % vdi) 

3766 if cmd == "activate": 

3767 vdi._activate() 

3768 print("VDI file: %s" % vdi.path) 

3769 if cmd == "deactivate": 

3770 ns = lvhdutil.NS_PREFIX_LVM + sr.uuid 

3771 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

3772 if cmd == "inflate": 

3773 vdi.inflateFully() 

3774 sr.cleanup() 

3775 if cmd == "deflate": 

3776 vdi.deflate() 

3777 sr.cleanup() 

3778 sr.scanLocked() 

3779 print("VDI after: %s" % vdi) 

3780 

3781 

3782def abort_optional_reenable(uuid): 

3783 print("Disabling GC/coalesce for %s" % uuid) 

3784 ret = _abort(uuid) 

3785 input("Press enter to re-enable...") 

3786 print("GC/coalesce re-enabled") 

3787 lockRunning.release() 

3788 if ret: 

3789 lockActive.release() 

3790 

3791 

3792############################################################################## 

3793# 

3794# CLI 

3795# 

3796def main(): 

3797 action = "" 

3798 uuid = "" 

3799 background = False 

3800 force = False 

3801 dryRun = False 

3802 debug_cmd = "" 

3803 vdi_uuid = "" 

3804 shortArgs = "gGc:aqxu:bfdt:v:" 

3805 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

3806 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

3807 

3808 try: 

3809 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

3810 except getopt.GetoptError: 

3811 usage() 

3812 for o, a in opts: 

3813 if o in ("-g", "--gc"): 

3814 action = "gc" 

3815 if o in ("-G", "--gc_force"): 

3816 action = "gc_force" 

3817 if o in ("-c", "--clean_cache"): 

3818 action = "clean_cache" 

3819 maxAge = int(a) 

3820 if o in ("-a", "--abort"): 

3821 action = "abort" 

3822 if o in ("-q", "--query"): 

3823 action = "query" 

3824 if o in ("-x", "--disable"): 

3825 action = "disable" 

3826 if o in ("-u", "--uuid"): 

3827 uuid = a 

3828 if o in ("-b", "--background"): 

3829 background = True 

3830 if o in ("-f", "--force"): 

3831 force = True 

3832 if o in ("-d", "--dry-run"): 

3833 Util.log("Dry run mode") 

3834 dryRun = True 

3835 if o in ("-t", "--debug"): 

3836 action = "debug" 

3837 debug_cmd = a 

3838 if o in ("-v", "--vdi_uuid"): 

3839 vdi_uuid = a 

3840 

3841 if not action or not uuid: 

3842 usage() 

3843 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

3844 action != "debug" and (debug_cmd or vdi_uuid): 

3845 usage() 

3846 

3847 if action != "query" and action != "debug": 

3848 print("All output goes to log") 

3849 

3850 if action == "gc": 

3851 gc(None, uuid, background, dryRun) 

3852 elif action == "gc_force": 

3853 gc_force(None, uuid, force, dryRun, True) 

3854 elif action == "clean_cache": 

3855 cache_cleanup(None, uuid, maxAge) 

3856 elif action == "abort": 

3857 abort(uuid) 

3858 elif action == "query": 

3859 print("Currently running: %s" % get_state(uuid)) 

3860 elif action == "disable": 

3861 abort_optional_reenable(uuid) 

3862 elif action == "debug": 

3863 debug(uuid, debug_cmd, vdi_uuid) 

3864 

3865 

3866if __name__ == '__main__': 3866 ↛ 3867line 3866 didn't jump to line 3867, because the condition on line 3866 was never true

3867 main()