Coverage for drivers/cleanup.py : 31%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/python3
2#
3# Copyright (C) Citrix Systems Inc.
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; version 2.1 only.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public License
15# along with this program; if not, write to the Free Software Foundation, Inc.,
16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17#
18# Script to coalesce and garbage collect VHD-based SR's in the background
19#
21import os
22import os.path
23import sys
24import time
25import signal
26import subprocess
27import getopt
28import datetime
29import traceback
30import base64
31import zlib
32import errno
33import stat
35import XenAPI # pylint: disable=import-error
36import util
37import lvutil
38import vhdutil
39import lvhdutil
40import lvmcache
41import journaler
42import fjournaler
43import lock
44import blktap2
45import xs_errors
46from refcounter import RefCounter
47from ipc import IPCFlag
48from lvmanager import LVActivator
49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG
50from functools import reduce
51from time import monotonic as _time
53try:
54 from linstorjournaler import LinstorJournaler
55 from linstorvhdutil import LinstorVhdUtil
56 from linstorvolumemanager import get_controller_uri
57 from linstorvolumemanager import LinstorVolumeManager
58 from linstorvolumemanager import LinstorVolumeManagerError
59 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX
61 LINSTOR_AVAILABLE = True
62except ImportError:
63 LINSTOR_AVAILABLE = False
65# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not
66# possible due to lvhd_stop_using_() not working correctly. However, we leave
67# this option available through the explicit LEAFCLSC_FORCE flag in the VDI
68# record for use by the offline tool (which makes the operation safe by pausing
69# the VM first)
70AUTO_ONLINE_LEAF_COALESCE_ENABLED = True
72FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce
74# process "lock", used simply as an indicator that a process already exists
75# that is doing GC/coalesce on this SR (such a process holds the lock, and we
76# check for the fact by trying the lock).
77lockGCRunning = None
79# process "lock" to indicate that the GC process has been activated but may not
80# yet be running, stops a second process from being started.
81LOCK_TYPE_GC_ACTIVE = "gc_active"
82lockGCActive = None
84# Default coalesce error rate limit, in messages per minute. A zero value
85# disables throttling, and a negative value disables error reporting.
86DEFAULT_COALESCE_ERR_RATE = 1.0 / 60
88COALESCE_LAST_ERR_TAG = 'last-coalesce-error'
89COALESCE_ERR_RATE_TAG = 'coalesce-error-rate'
90VAR_RUN = "/var/run/"
91SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log"
93N_RUNNING_AVERAGE = 10
95NON_PERSISTENT_DIR = '/run/nonpersistent/sm'
98class AbortException(util.SMException):
99 pass
102################################################################################
103#
104# Util
105#
106class Util:
107 RET_RC = 1
108 RET_STDOUT = 2
109 RET_STDERR = 4
111 UUID_LEN = 36
113 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024}
115 def log(text):
116 util.SMlog(text, ident="SMGC")
117 log = staticmethod(log)
119 def logException(tag):
120 info = sys.exc_info()
121 if info[0] == SystemExit: 121 ↛ 123line 121 didn't jump to line 123, because the condition on line 121 was never true
122 # this should not be happening when catching "Exception", but it is
123 sys.exit(0)
124 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2]))
125 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*")
126 Util.log(" ***********************")
127 Util.log(" * E X C E P T I O N *")
128 Util.log(" ***********************")
129 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1]))
130 Util.log(tb)
131 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*")
132 logException = staticmethod(logException)
134 def doexec(args, expectedRC, inputtext=None, ret=None, log=True):
135 "Execute a subprocess, then return its return code, stdout, stderr"
136 proc = subprocess.Popen(args,
137 stdin=subprocess.PIPE, \
138 stdout=subprocess.PIPE, \
139 stderr=subprocess.PIPE, \
140 shell=True, \
141 close_fds=True)
142 (stdout, stderr) = proc.communicate(inputtext)
143 stdout = str(stdout)
144 stderr = str(stderr)
145 rc = proc.returncode
146 if log:
147 Util.log("`%s`: %s" % (args, rc))
148 if type(expectedRC) != type([]):
149 expectedRC = [expectedRC]
150 if not rc in expectedRC:
151 reason = stderr.strip()
152 if stdout.strip():
153 reason = "%s (stdout: %s)" % (reason, stdout.strip())
154 Util.log("Failed: %s" % reason)
155 raise util.CommandException(rc, args, reason)
157 if ret == Util.RET_RC:
158 return rc
159 if ret == Util.RET_STDERR:
160 return stderr
161 return stdout
162 doexec = staticmethod(doexec)
164 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut):
165 """execute func in a separate thread and kill it if abortTest signals
166 so"""
167 abortSignaled = abortTest() # check now before we clear resultFlag
168 resultFlag = IPCFlag(ns)
169 resultFlag.clearAll()
170 pid = os.fork()
171 if pid:
172 startTime = _time()
173 try:
174 while True:
175 if resultFlag.test("success"):
176 Util.log(" Child process completed successfully")
177 resultFlag.clear("success")
178 return
179 if resultFlag.test("failure"):
180 resultFlag.clear("failure")
181 raise util.SMException("Child process exited with error")
182 if abortTest() or abortSignaled:
183 os.killpg(pid, signal.SIGKILL)
184 raise AbortException("Aborting due to signal")
185 if timeOut and _time() - startTime > timeOut:
186 os.killpg(pid, signal.SIGKILL)
187 resultFlag.clearAll()
188 raise util.SMException("Timed out")
189 time.sleep(pollInterval)
190 finally:
191 wait_pid = 0
192 rc = -1
193 count = 0
194 while wait_pid == 0 and count < 10:
195 wait_pid, rc = os.waitpid(pid, os.WNOHANG)
196 if wait_pid == 0:
197 time.sleep(2)
198 count += 1
200 if wait_pid == 0:
201 Util.log("runAbortable: wait for process completion timed out")
202 else:
203 os.setpgrp()
204 try:
205 if func() == ret:
206 resultFlag.set("success")
207 else:
208 resultFlag.set("failure")
209 except Exception as e:
210 Util.log("Child process failed with : (%s)" % e)
211 resultFlag.set("failure")
212 Util.logException("This exception has occured")
213 os._exit(0)
214 runAbortable = staticmethod(runAbortable)
216 def num2str(number):
217 for prefix in ("G", "M", "K"):
218 if number >= Util.PREFIX[prefix]:
219 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix)
220 return "%s" % number
221 num2str = staticmethod(num2str)
223 def numBits(val):
224 count = 0
225 while val:
226 count += val & 1
227 val = val >> 1
228 return count
229 numBits = staticmethod(numBits)
231 def countBits(bitmap1, bitmap2):
232 """return bit count in the bitmap produced by ORing the two bitmaps"""
233 len1 = len(bitmap1)
234 len2 = len(bitmap2)
235 lenLong = len1
236 lenShort = len2
237 bitmapLong = bitmap1
238 if len2 > len1:
239 lenLong = len2
240 lenShort = len1
241 bitmapLong = bitmap2
243 count = 0
244 for i in range(lenShort):
245 val = bitmap1[i] | bitmap2[i]
246 count += Util.numBits(val)
248 for i in range(i + 1, lenLong):
249 val = bitmapLong[i]
250 count += Util.numBits(val)
251 return count
252 countBits = staticmethod(countBits)
254 def getThisScript():
255 thisScript = util.get_real_path(__file__)
256 if thisScript.endswith(".pyc"):
257 thisScript = thisScript[:-1]
258 return thisScript
259 getThisScript = staticmethod(getThisScript)
262################################################################################
263#
264# XAPI
265#
266class XAPI:
267 USER = "root"
268 PLUGIN_ON_SLAVE = "on-slave"
270 CONFIG_SM = 0
271 CONFIG_OTHER = 1
272 CONFIG_ON_BOOT = 2
273 CONFIG_ALLOW_CACHING = 3
275 CONFIG_NAME = {
276 CONFIG_SM: "sm-config",
277 CONFIG_OTHER: "other-config",
278 CONFIG_ON_BOOT: "on-boot",
279 CONFIG_ALLOW_CACHING: "allow_caching"
280 }
282 class LookupError(util.SMException):
283 pass
285 def getSession():
286 session = XenAPI.xapi_local()
287 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM')
288 return session
289 getSession = staticmethod(getSession)
291 def __init__(self, session, srUuid):
292 self.sessionPrivate = False
293 self.session = session
294 if self.session is None:
295 self.session = self.getSession()
296 self.sessionPrivate = True
297 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid)
298 self.srRecord = self.session.xenapi.SR.get_record(self._srRef)
299 self.hostUuid = util.get_this_host()
300 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid)
301 self.task = None
302 self.task_progress = {"coalescable": 0, "done": 0}
304 def __del__(self):
305 if self.sessionPrivate:
306 self.session.xenapi.session.logout()
308 def isPluggedHere(self):
309 pbds = self.getAttachedPBDs()
310 for pbdRec in pbds:
311 if pbdRec["host"] == self._hostRef:
312 return True
313 return False
315 def poolOK(self):
316 host_recs = self.session.xenapi.host.get_all_records()
317 for host_ref, host_rec in host_recs.items():
318 if not host_rec["enabled"]:
319 Util.log("Host %s not enabled" % host_rec["uuid"])
320 return False
321 return True
323 def isMaster(self):
324 if self.srRecord["shared"]:
325 pool = list(self.session.xenapi.pool.get_all_records().values())[0]
326 return pool["master"] == self._hostRef
327 else:
328 pbds = self.getAttachedPBDs()
329 if len(pbds) < 1:
330 raise util.SMException("Local SR not attached")
331 elif len(pbds) > 1:
332 raise util.SMException("Local SR multiply attached")
333 return pbds[0]["host"] == self._hostRef
335 def getAttachedPBDs(self):
336 """Return PBD records for all PBDs of this SR that are currently
337 attached"""
338 attachedPBDs = []
339 pbds = self.session.xenapi.PBD.get_all_records()
340 for pbdRec in pbds.values():
341 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]:
342 attachedPBDs.append(pbdRec)
343 return attachedPBDs
345 def getOnlineHosts(self):
346 return util.get_online_hosts(self.session)
348 def ensureInactive(self, hostRef, args):
349 text = self.session.xenapi.host.call_plugin( \
350 hostRef, self.PLUGIN_ON_SLAVE, "multi", args)
351 Util.log("call-plugin returned: '%s'" % text)
353 def getRecordHost(self, hostRef):
354 return self.session.xenapi.host.get_record(hostRef)
356 def _getRefVDI(self, uuid):
357 return self.session.xenapi.VDI.get_by_uuid(uuid)
359 def getRefVDI(self, vdi):
360 return self._getRefVDI(vdi.uuid)
362 def getRecordVDI(self, uuid):
363 try:
364 ref = self._getRefVDI(uuid)
365 return self.session.xenapi.VDI.get_record(ref)
366 except XenAPI.Failure:
367 return None
369 def singleSnapshotVDI(self, vdi):
370 return self.session.xenapi.VDI.snapshot(vdi.getRef(),
371 {"type": "internal"})
373 def forgetVDI(self, srUuid, vdiUuid):
374 """Forget the VDI, but handle the case where the VDI has already been
375 forgotten (i.e. ignore errors)"""
376 try:
377 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid)
378 self.session.xenapi.VDI.forget(vdiRef)
379 except XenAPI.Failure:
380 pass
382 def getConfigVDI(self, vdi, key):
383 kind = vdi.CONFIG_TYPE[key]
384 if kind == self.CONFIG_SM:
385 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef())
386 elif kind == self.CONFIG_OTHER:
387 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef())
388 elif kind == self.CONFIG_ON_BOOT:
389 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef())
390 elif kind == self.CONFIG_ALLOW_CACHING:
391 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef())
392 else:
393 assert(False)
394 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg)))
395 return cfg
397 def removeFromConfigVDI(self, vdi, key):
398 kind = vdi.CONFIG_TYPE[key]
399 if kind == self.CONFIG_SM:
400 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key)
401 elif kind == self.CONFIG_OTHER:
402 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key)
403 else:
404 assert(False)
406 def addToConfigVDI(self, vdi, key, val):
407 kind = vdi.CONFIG_TYPE[key]
408 if kind == self.CONFIG_SM:
409 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val)
410 elif kind == self.CONFIG_OTHER:
411 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val)
412 else:
413 assert(False)
415 def isSnapshot(self, vdi):
416 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef())
418 def markCacheSRsDirty(self):
419 sr_refs = self.session.xenapi.SR.get_all_records_where( \
420 'field "local_cache_enabled" = "true"')
421 for sr_ref in sr_refs:
422 Util.log("Marking SR %s dirty" % sr_ref)
423 util.set_dirty(self.session, sr_ref)
425 def srUpdate(self):
426 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"])
427 abortFlag = IPCFlag(self.srRecord["uuid"])
428 task = self.session.xenapi.Async.SR.update(self._srRef)
429 cancelTask = True
430 try:
431 for i in range(60):
432 status = self.session.xenapi.task.get_status(task)
433 if not status == "pending":
434 Util.log("SR.update_asynch status changed to [%s]" % status)
435 cancelTask = False
436 return
437 if abortFlag.test(FLAG_TYPE_ABORT):
438 Util.log("Abort signalled during srUpdate, cancelling task...")
439 try:
440 self.session.xenapi.task.cancel(task)
441 cancelTask = False
442 Util.log("Task cancelled")
443 except:
444 pass
445 return
446 time.sleep(1)
447 finally:
448 if cancelTask:
449 self.session.xenapi.task.cancel(task)
450 self.session.xenapi.task.destroy(task)
451 Util.log("Asynch srUpdate still running, but timeout exceeded.")
453 def update_task(self):
454 self.session.xenapi.task.set_other_config(
455 self.task,
456 {
457 "applies_to": self._srRef
458 })
459 total = self.task_progress['coalescable'] + self.task_progress['done']
460 if (total > 0):
461 self.session.xenapi.task.set_progress(
462 self.task, float(self.task_progress['done']) / total)
464 def create_task(self, label, description):
465 self.task = self.session.xenapi.task.create(label, description)
466 self.update_task()
468 def update_task_progress(self, key, value):
469 self.task_progress[key] = value
470 if self.task:
471 self.update_task()
473 def set_task_status(self, status):
474 if self.task:
475 self.session.xenapi.task.set_status(self.task, status)
478################################################################################
479#
480# VDI
481#
482class VDI(object):
483 """Object representing a VDI of a VHD-based SR"""
485 POLL_INTERVAL = 1
486 POLL_TIMEOUT = 30
487 DEVICE_MAJOR = 202
488 DRIVER_NAME_VHD = "vhd"
490 # config keys & values
491 DB_VHD_PARENT = "vhd-parent"
492 DB_VDI_TYPE = "vdi_type"
493 DB_VHD_BLOCKS = "vhd-blocks"
494 DB_VDI_PAUSED = "paused"
495 DB_VDI_RELINKING = "relinking"
496 DB_VDI_ACTIVATING = "activating"
497 DB_GC = "gc"
498 DB_COALESCE = "coalesce"
499 DB_LEAFCLSC = "leaf-coalesce" # config key
500 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce
501 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce
502 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means
503 # no space to snap-coalesce or unable to keep
504 # up with VDI. This is not used by the SM, it
505 # might be used by external components.
506 DB_ONBOOT = "on-boot"
507 ONBOOT_RESET = "reset"
508 DB_ALLOW_CACHING = "allow_caching"
510 CONFIG_TYPE = {
511 DB_VHD_PARENT: XAPI.CONFIG_SM,
512 DB_VDI_TYPE: XAPI.CONFIG_SM,
513 DB_VHD_BLOCKS: XAPI.CONFIG_SM,
514 DB_VDI_PAUSED: XAPI.CONFIG_SM,
515 DB_VDI_RELINKING: XAPI.CONFIG_SM,
516 DB_VDI_ACTIVATING: XAPI.CONFIG_SM,
517 DB_GC: XAPI.CONFIG_OTHER,
518 DB_COALESCE: XAPI.CONFIG_OTHER,
519 DB_LEAFCLSC: XAPI.CONFIG_OTHER,
520 DB_ONBOOT: XAPI.CONFIG_ON_BOOT,
521 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING,
522 }
524 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes
525 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds
526 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating
527 # feasibility of leaf coalesce
529 JRN_RELINK = "relink" # journal entry type for relinking children
530 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced
531 JRN_LEAF = "leaf" # used in coalesce-leaf
533 STR_TREE_INDENT = 4
535 def __init__(self, sr, uuid, raw):
536 self.sr = sr
537 self.scanError = True
538 self.uuid = uuid
539 self.raw = raw
540 self.fileName = ""
541 self.parentUuid = ""
542 self.sizeVirt = -1
543 self._sizeVHD = -1
544 self._sizeAllocated = -1
545 self.hidden = False
546 self.parent = None
547 self.children = []
548 self._vdiRef = None
549 self._clearRef()
551 @staticmethod
552 def extractUuid(path):
553 raise NotImplementedError("Implement in sub class")
555 def load(self, info=None):
556 """Load VDI info"""
557 pass # abstract
559 def getDriverName(self):
560 return self.DRIVER_NAME_VHD
562 def getRef(self):
563 if self._vdiRef is None:
564 self._vdiRef = self.sr.xapi.getRefVDI(self)
565 return self._vdiRef
567 def getConfig(self, key, default=None):
568 config = self.sr.xapi.getConfigVDI(self, key)
569 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 569 ↛ 570line 569 didn't jump to line 570, because the condition on line 569 was never true
570 val = config
571 else:
572 val = config.get(key)
573 if val:
574 return val
575 return default
577 def setConfig(self, key, val):
578 self.sr.xapi.removeFromConfigVDI(self, key)
579 self.sr.xapi.addToConfigVDI(self, key, val)
580 Util.log("Set %s = %s for %s" % (key, val, self))
582 def delConfig(self, key):
583 self.sr.xapi.removeFromConfigVDI(self, key)
584 Util.log("Removed %s from %s" % (key, self))
586 def ensureUnpaused(self):
587 if self.getConfig(self.DB_VDI_PAUSED) == "true":
588 Util.log("Unpausing VDI %s" % self)
589 self.unpause()
591 def pause(self, failfast=False):
592 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid,
593 self.uuid, failfast):
594 raise util.SMException("Failed to pause VDI %s" % self)
596 def _report_tapdisk_unpause_error(self):
597 try:
598 xapi = self.sr.xapi.session.xenapi
599 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid)
600 msg_name = "failed to unpause tapdisk"
601 msg_body = "Failed to unpause tapdisk for VDI %s, " \
602 "VMs using this tapdisk have lost access " \
603 "to the corresponding disk(s)" % self.uuid
604 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body)
605 except Exception as e:
606 util.SMlog("failed to generate message: %s" % e)
608 def unpause(self):
609 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid,
610 self.uuid):
611 self._report_tapdisk_unpause_error()
612 raise util.SMException("Failed to unpause VDI %s" % self)
614 def refresh(self, ignoreNonexistent=True):
615 """Pause-unpause in one step"""
616 self.sr.lock()
617 try:
618 try:
619 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 619 ↛ 621line 619 didn't jump to line 621, because the condition on line 619 was never true
620 self.sr.uuid, self.uuid):
621 self._report_tapdisk_unpause_error()
622 raise util.SMException("Failed to refresh %s" % self)
623 except XenAPI.Failure as e:
624 if util.isInvalidVDI(e) and ignoreNonexistent:
625 Util.log("VDI %s not found, ignoring" % self)
626 return
627 raise
628 finally:
629 self.sr.unlock()
631 def isSnapshot(self):
632 return self.sr.xapi.isSnapshot(self)
634 def isAttachedRW(self):
635 return util.is_attached_rw(
636 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef()))
638 def getVHDBlocks(self):
639 val = self.updateBlockInfo()
640 bitmap = zlib.decompress(base64.b64decode(val))
641 return bitmap
643 def isCoalesceable(self):
644 """A VDI is coalesceable if it has no siblings and is not a leaf"""
645 return not self.scanError and \
646 self.parent and \
647 len(self.parent.children) == 1 and \
648 self.hidden and \
649 len(self.children) > 0
651 def isLeafCoalesceable(self):
652 """A VDI is leaf-coalesceable if it has no siblings and is a leaf"""
653 return not self.scanError and \
654 self.parent and \
655 len(self.parent.children) == 1 and \
656 not self.hidden and \
657 len(self.children) == 0
659 def canLiveCoalesce(self, speed):
660 """Can we stop-and-leaf-coalesce this VDI? The VDI must be
661 isLeafCoalesceable() already"""
662 feasibleSize = False
663 allowedDownTime = \
664 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT
665 vhd_size = self.getAllocatedSize()
666 if speed:
667 feasibleSize = \
668 vhd_size // speed < allowedDownTime
669 else:
670 feasibleSize = \
671 vhd_size < self.LIVE_LEAF_COALESCE_MAX_SIZE
673 return (feasibleSize or
674 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE)
676 def getAllPrunable(self):
677 if len(self.children) == 0: # base case
678 # it is possible to have a hidden leaf that was recently coalesced
679 # onto its parent, its children already relinked but not yet
680 # reloaded - in which case it may not be garbage collected yet:
681 # some tapdisks could still be using the file.
682 if self.sr.journaler.get(self.JRN_RELINK, self.uuid):
683 return []
684 if not self.scanError and self.hidden:
685 return [self]
686 return []
688 thisPrunable = True
689 vdiList = []
690 for child in self.children:
691 childList = child.getAllPrunable()
692 vdiList.extend(childList)
693 if child not in childList:
694 thisPrunable = False
696 # We can destroy the current VDI if all childs are hidden BUT the
697 # current VDI must be hidden too to do that!
698 # Example in this case (after a failed live leaf coalesce):
699 #
700 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees):
701 # SMGC: [32436] b5458d61(1.000G/4.127M)
702 # SMGC: [32436] *OLD_b545(1.000G/4.129M)
703 #
704 # OLD_b545 is hidden and must be removed, but b5458d61 not.
705 # Normally we are not in this function when the delete action is
706 # executed but in `_liveLeafCoalesce`.
708 if not self.scanError and not self.hidden and thisPrunable:
709 vdiList.append(self)
710 return vdiList
712 def getSizeVHD(self):
713 return self._sizeVHD
715 def getAllocatedSize(self):
716 return self._sizeAllocated
718 def getTreeRoot(self):
719 "Get the root of the tree that self belongs to"
720 root = self
721 while root.parent:
722 root = root.parent
723 return root
725 def getTreeHeight(self):
726 "Get the height of the subtree rooted at self"
727 if len(self.children) == 0:
728 return 1
730 maxChildHeight = 0
731 for child in self.children:
732 childHeight = child.getTreeHeight()
733 if childHeight > maxChildHeight:
734 maxChildHeight = childHeight
736 return maxChildHeight + 1
738 def getAllLeaves(self):
739 "Get all leaf nodes in the subtree rooted at self"
740 if len(self.children) == 0:
741 return [self]
743 leaves = []
744 for child in self.children:
745 leaves.extend(child.getAllLeaves())
746 return leaves
748 def updateBlockInfo(self):
749 val = base64.b64encode(self._queryVHDBlocks()).decode()
750 self.setConfig(VDI.DB_VHD_BLOCKS, val)
751 return val
753 def rename(self, uuid):
754 "Rename the VDI file"
755 assert(not self.sr.vdis.get(uuid))
756 self._clearRef()
757 oldUuid = self.uuid
758 self.uuid = uuid
759 self.children = []
760 # updating the children themselves is the responsibility of the caller
761 del self.sr.vdis[oldUuid]
762 self.sr.vdis[self.uuid] = self
764 def delete(self):
765 "Physically delete the VDI"
766 lock.Lock.cleanup(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid)
767 lock.Lock.cleanupAll(self.uuid)
768 self._clear()
770 def getParent(self):
771 return vhdutil.getParent(self.path, lambda x: x.strip()) 771 ↛ exitline 771 didn't run the lambda on line 771
773 def repair(self, parent):
774 vhdutil.repair(parent)
776 def __str__(self):
777 strHidden = ""
778 if self.hidden: 778 ↛ 779line 778 didn't jump to line 779, because the condition on line 778 was never true
779 strHidden = "*"
780 strSizeVirt = "?"
781 if self.sizeVirt > 0: 781 ↛ 782line 781 didn't jump to line 782, because the condition on line 781 was never true
782 strSizeVirt = Util.num2str(self.sizeVirt)
783 strSizeVHD = "?"
784 if self._sizeVHD > 0: 784 ↛ 785line 784 didn't jump to line 785, because the condition on line 784 was never true
785 strSizeVHD = "/%s" % Util.num2str(self._sizeVHD)
786 strSizeAllocated = "?"
787 if self._sizeAllocated >= 0: 787 ↛ 788line 787 didn't jump to line 788, because the condition on line 787 was never true
788 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated)
789 strType = ""
790 if self.raw:
791 strType = "[RAW]"
792 strSizeVHD = ""
794 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt,
795 strSizeVHD, strSizeAllocated, strType)
797 def validate(self, fast=False):
798 if not vhdutil.check(self.path, fast=fast): 798 ↛ 799line 798 didn't jump to line 799, because the condition on line 798 was never true
799 raise util.SMException("VHD %s corrupted" % self)
801 def _clear(self):
802 self.uuid = ""
803 self.path = ""
804 self.parentUuid = ""
805 self.parent = None
806 self._clearRef()
808 def _clearRef(self):
809 self._vdiRef = None
811 def _doCoalesce(self):
812 """Coalesce self onto parent. Only perform the actual coalescing of
813 VHD, but not the subsequent relinking. We'll do that as the next step,
814 after reloading the entire SR in case things have changed while we
815 were coalescing"""
816 self.validate()
817 self.parent.validate(True)
818 self.parent._increaseSizeVirt(self.sizeVirt)
819 self.sr._updateSlavesOnResize(self.parent)
820 self._coalesceVHD(0)
821 self.parent.validate(True)
822 #self._verifyContents(0)
823 self.parent.updateBlockInfo()
825 def _verifyContents(self, timeOut):
826 Util.log(" Coalesce verification on %s" % self)
827 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
828 Util.runAbortable(lambda: self._runTapdiskDiff(), True,
829 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut)
830 Util.log(" Coalesce verification succeeded")
832 def _runTapdiskDiff(self):
833 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \
834 (self.getDriverName(), self.path, \
835 self.parent.getDriverName(), self.parent.path)
836 Util.doexec(cmd, 0)
837 return True
839 def _reportCoalesceError(vdi, ce):
840 """Reports a coalesce error to XenCenter.
842 vdi: the VDI object on which the coalesce error occured
843 ce: the CommandException that was raised"""
845 msg_name = os.strerror(ce.code)
846 if ce.code == errno.ENOSPC:
847 # TODO We could add more information here, e.g. exactly how much
848 # space is required for the particular coalesce, as well as actions
849 # to be taken by the user and consequences of not taking these
850 # actions.
851 msg_body = 'Run out of space while coalescing.'
852 elif ce.code == errno.EIO:
853 msg_body = 'I/O error while coalescing.'
854 else:
855 msg_body = ''
856 util.SMlog('Coalesce failed on SR %s: %s (%s)'
857 % (vdi.sr.uuid, msg_name, msg_body))
859 # Create a XenCenter message, but don't spam.
860 xapi = vdi.sr.xapi.session.xenapi
861 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid)
862 oth_cfg = xapi.SR.get_other_config(sr_ref)
863 if COALESCE_ERR_RATE_TAG in oth_cfg:
864 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG])
865 else:
866 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE
868 xcmsg = False
869 if coalesce_err_rate == 0:
870 xcmsg = True
871 elif coalesce_err_rate > 0:
872 now = datetime.datetime.now()
873 sm_cfg = xapi.SR.get_sm_config(sr_ref)
874 if COALESCE_LAST_ERR_TAG in sm_cfg:
875 # seconds per message (minimum distance in time between two
876 # messages in seconds)
877 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60)
878 last = datetime.datetime.fromtimestamp(
879 float(sm_cfg[COALESCE_LAST_ERR_TAG]))
880 if now - last >= spm:
881 xapi.SR.remove_from_sm_config(sr_ref,
882 COALESCE_LAST_ERR_TAG)
883 xcmsg = True
884 else:
885 xcmsg = True
886 if xcmsg:
887 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG,
888 str(now.strftime('%s')))
889 if xcmsg:
890 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body)
891 _reportCoalesceError = staticmethod(_reportCoalesceError)
893 def coalesce(self):
894 # size is returned in sectors
895 return vhdutil.coalesce(self.path) * 512
897 def _doCoalesceVHD(vdi):
898 try:
899 startTime = time.time()
900 vhdSize = vdi.getAllocatedSize()
901 coalesced_size = vdi.coalesce()
902 endTime = time.time()
903 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size)
904 except util.CommandException as ce:
905 # We use try/except for the following piece of code because it runs
906 # in a separate process context and errors will not be caught and
907 # reported by anyone.
908 try:
909 # Report coalesce errors back to user via XC
910 VDI._reportCoalesceError(vdi, ce)
911 except Exception as e:
912 util.SMlog('failed to create XenCenter message: %s' % e)
913 raise ce
914 except:
915 raise
916 _doCoalesceVHD = staticmethod(_doCoalesceVHD)
918 def _vdi_is_raw(self, vdi_path):
919 """
920 Given path to vdi determine if it is raw
921 """
922 uuid = self.extractUuid(vdi_path)
923 return self.sr.vdis[uuid].raw
925 def _coalesceVHD(self, timeOut):
926 Util.log(" Running VHD coalesce on %s" % self)
927 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 927 ↛ exitline 927 didn't run the lambda on line 927
928 try:
929 util.fistpoint.activate_custom_fn(
930 "cleanup_coalesceVHD_inject_failure",
931 util.inject_failure)
932 Util.runAbortable(lambda: VDI._doCoalesceVHD(self), None,
933 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut)
934 except:
935 #exception at this phase could indicate a failure in vhd coalesce
936 # or a kill of vhd coalesce by runAbortable due to timeOut
937 # Try a repair and reraise the exception
938 parent = ""
939 try:
940 parent = self.getParent()
941 if not self._vdi_is_raw(parent):
942 # Repair error is logged and ignored. Error reraised later
943 util.SMlog('Coalesce failed on %s, attempting repair on ' \
944 'parent %s' % (self.uuid, parent))
945 self.repair(parent)
946 except Exception as e:
947 util.SMlog('(error ignored) Failed to repair parent %s ' \
948 'after failed coalesce on %s, err: %s' %
949 (parent, self.path, e))
950 raise
952 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid)
954 def _relinkSkip(self):
955 """Relink children of this VDI to point to the parent of this VDI"""
956 abortFlag = IPCFlag(self.sr.uuid)
957 for child in self.children:
958 if abortFlag.test(FLAG_TYPE_ABORT): 958 ↛ 959line 958 didn't jump to line 959, because the condition on line 958 was never true
959 raise AbortException("Aborting due to signal")
960 Util.log(" Relinking %s from %s to %s" % \
961 (child, self, self.parent))
962 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid)
963 child._setParent(self.parent)
964 self.children = []
966 def _reloadChildren(self, vdiSkip):
967 """Pause & unpause all VDIs in the subtree to cause blktap to reload
968 the VHD metadata for this file in any online VDI"""
969 abortFlag = IPCFlag(self.sr.uuid)
970 for child in self.children:
971 if child == vdiSkip:
972 continue
973 if abortFlag.test(FLAG_TYPE_ABORT): 973 ↛ 974line 973 didn't jump to line 974, because the condition on line 973 was never true
974 raise AbortException("Aborting due to signal")
975 Util.log(" Reloading VDI %s" % child)
976 child._reload()
978 def _reload(self):
979 """Pause & unpause to cause blktap to reload the VHD metadata"""
980 for child in self.children: 980 ↛ 981line 980 didn't jump to line 981, because the loop on line 980 never started
981 child._reload()
983 # only leaves can be attached
984 if len(self.children) == 0: 984 ↛ exitline 984 didn't return from function '_reload', because the condition on line 984 was never false
985 try:
986 self.delConfig(VDI.DB_VDI_RELINKING)
987 except XenAPI.Failure as e:
988 if not util.isInvalidVDI(e):
989 raise
990 self.refresh()
992 def _tagChildrenForRelink(self):
993 if len(self.children) == 0:
994 retries = 0
995 try:
996 while retries < 15:
997 retries += 1
998 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None:
999 Util.log("VDI %s is activating, wait to relink" %
1000 self.uuid)
1001 else:
1002 self.setConfig(VDI.DB_VDI_RELINKING, "True")
1004 if self.getConfig(VDI.DB_VDI_ACTIVATING):
1005 self.delConfig(VDI.DB_VDI_RELINKING)
1006 Util.log("VDI %s started activating while tagging" %
1007 self.uuid)
1008 else:
1009 return
1010 time.sleep(2)
1012 raise util.SMException("Failed to tag vdi %s for relink" % self)
1013 except XenAPI.Failure as e:
1014 if not util.isInvalidVDI(e):
1015 raise
1017 for child in self.children:
1018 child._tagChildrenForRelink()
1020 def _loadInfoParent(self):
1021 ret = vhdutil.getParent(self.path, lvhdutil.extractUuid)
1022 if ret:
1023 self.parentUuid = ret
1025 def _setParent(self, parent):
1026 vhdutil.setParent(self.path, parent.path, False)
1027 self.parent = parent
1028 self.parentUuid = parent.uuid
1029 parent.children.append(self)
1030 try:
1031 self.setConfig(self.DB_VHD_PARENT, self.parentUuid)
1032 Util.log("Updated the vhd-parent field for child %s with %s" % \
1033 (self.uuid, self.parentUuid))
1034 except:
1035 Util.log("Failed to update %s with vhd-parent field %s" % \
1036 (self.uuid, self.parentUuid))
1038 def _loadInfoHidden(self):
1039 hidden = vhdutil.getHidden(self.path)
1040 self.hidden = (hidden != 0)
1042 def _setHidden(self, hidden=True):
1043 vhdutil.setHidden(self.path, hidden)
1044 self.hidden = hidden
1046 def _increaseSizeVirt(self, size, atomic=True):
1047 """ensure the virtual size of 'self' is at least 'size'. Note that
1048 resizing a VHD must always be offline and atomically: the file must
1049 not be open by anyone and no concurrent operations may take place.
1050 Thus we use the Agent API call for performing paused atomic
1051 operations. If the caller is already in the atomic context, it must
1052 call with atomic = False"""
1053 if self.sizeVirt >= size: 1053 ↛ 1055line 1053 didn't jump to line 1055, because the condition on line 1053 was never false
1054 return
1055 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \
1056 (self, Util.num2str(self.sizeVirt), Util.num2str(size)))
1058 msize = vhdutil.getMaxResizeSize(self.path) * 1024 * 1024
1059 if (size <= msize):
1060 vhdutil.setSizeVirtFast(self.path, size)
1061 else:
1062 if atomic:
1063 vdiList = self._getAllSubtree()
1064 self.sr.lock()
1065 try:
1066 self.sr.pauseVDIs(vdiList)
1067 try:
1068 self._setSizeVirt(size)
1069 finally:
1070 self.sr.unpauseVDIs(vdiList)
1071 finally:
1072 self.sr.unlock()
1073 else:
1074 self._setSizeVirt(size)
1076 self.sizeVirt = vhdutil.getSizeVirt(self.path)
1078 def _setSizeVirt(self, size):
1079 """WARNING: do not call this method directly unless all VDIs in the
1080 subtree are guaranteed to be unplugged (and remain so for the duration
1081 of the operation): this operation is only safe for offline VHDs"""
1082 jFile = os.path.join(self.sr.path, self.uuid)
1083 vhdutil.setSizeVirt(self.path, size, jFile)
1085 def _queryVHDBlocks(self):
1086 return vhdutil.getBlockBitmap(self.path)
1088 def _getCoalescedSizeData(self):
1089 """Get the data size of the resulting VHD if we coalesce self onto
1090 parent. We calculate the actual size by using the VHD block allocation
1091 information (as opposed to just adding up the two VHD sizes to get an
1092 upper bound)"""
1093 # make sure we don't use stale BAT info from vdi_rec since the child
1094 # was writable all this time
1095 self.delConfig(VDI.DB_VHD_BLOCKS)
1096 blocksChild = self.getVHDBlocks()
1097 blocksParent = self.parent.getVHDBlocks()
1098 numBlocks = Util.countBits(blocksChild, blocksParent)
1099 Util.log("Num combined blocks = %d" % numBlocks)
1100 sizeData = numBlocks * vhdutil.VHD_BLOCK_SIZE
1101 assert(sizeData <= self.sizeVirt)
1102 return sizeData
1104 def _calcExtraSpaceForCoalescing(self):
1105 sizeData = self._getCoalescedSizeData()
1106 sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \
1107 vhdutil.calcOverheadEmpty(self.sizeVirt)
1108 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced))
1109 return sizeCoalesced - self.parent.getSizeVHD()
1111 def _calcExtraSpaceForLeafCoalescing(self):
1112 """How much extra space in the SR will be required to
1113 [live-]leaf-coalesce this VDI"""
1114 # the space requirements are the same as for inline coalesce
1115 return self._calcExtraSpaceForCoalescing()
1117 def _calcExtraSpaceForSnapshotCoalescing(self):
1118 """How much extra space in the SR will be required to
1119 snapshot-coalesce this VDI"""
1120 return self._calcExtraSpaceForCoalescing() + \
1121 vhdutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf
1123 def _getAllSubtree(self):
1124 """Get self and all VDIs in the subtree of self as a flat list"""
1125 vdiList = [self]
1126 for child in self.children:
1127 vdiList.extend(child._getAllSubtree())
1128 return vdiList
1131class FileVDI(VDI):
1132 """Object representing a VDI in a file-based SR (EXT or NFS)"""
1134 @staticmethod
1135 def extractUuid(path):
1136 path = os.path.basename(path.strip())
1137 if not (path.endswith(vhdutil.FILE_EXTN_VHD) or \ 1137 ↛ 1139line 1137 didn't jump to line 1139, because the condition on line 1137 was never true
1138 path.endswith(vhdutil.FILE_EXTN_RAW)):
1139 return None
1140 uuid = path.replace(vhdutil.FILE_EXTN_VHD, "").replace( \
1141 vhdutil.FILE_EXTN_RAW, "")
1142 # TODO: validate UUID format
1143 return uuid
1145 def __init__(self, sr, uuid, raw):
1146 VDI.__init__(self, sr, uuid, raw)
1147 if self.raw: 1147 ↛ 1148line 1147 didn't jump to line 1148, because the condition on line 1147 was never true
1148 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_RAW)
1149 else:
1150 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD)
1152 def load(self, info=None):
1153 if not info:
1154 if not util.pathexists(self.path):
1155 raise util.SMException("%s not found" % self.path)
1156 try:
1157 info = vhdutil.getVHDInfo(self.path, self.extractUuid)
1158 except util.SMException:
1159 Util.log(" [VDI %s: failed to read VHD metadata]" % self.uuid)
1160 return
1161 self.parent = None
1162 self.children = []
1163 self.parentUuid = info.parentUuid
1164 self.sizeVirt = info.sizeVirt
1165 self._sizeVHD = info.sizePhys
1166 self._sizeAllocated = info.sizeAllocated
1167 self.hidden = info.hidden
1168 self.scanError = False
1169 self.path = os.path.join(self.sr.path, "%s%s" % \
1170 (self.uuid, vhdutil.FILE_EXTN_VHD))
1172 def rename(self, uuid):
1173 oldPath = self.path
1174 VDI.rename(self, uuid)
1175 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD)
1176 self.path = os.path.join(self.sr.path, self.fileName)
1177 assert(not util.pathexists(self.path))
1178 Util.log("Renaming %s -> %s" % (oldPath, self.path))
1179 os.rename(oldPath, self.path)
1181 def delete(self):
1182 if len(self.children) > 0: 1182 ↛ 1183line 1182 didn't jump to line 1183, because the condition on line 1182 was never true
1183 raise util.SMException("VDI %s has children, can't delete" % \
1184 self.uuid)
1185 try:
1186 self.sr.lock()
1187 try:
1188 os.unlink(self.path)
1189 self.sr.forgetVDI(self.uuid)
1190 finally:
1191 self.sr.unlock()
1192 except OSError:
1193 raise util.SMException("os.unlink(%s) failed" % self.path)
1194 VDI.delete(self)
1197class LVHDVDI(VDI):
1198 """Object representing a VDI in an LVHD SR"""
1200 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent
1201 DRIVER_NAME_RAW = "aio"
1203 def load(self, vdiInfo):
1204 self.parent = None
1205 self.children = []
1206 self._sizeVHD = -1
1207 self._sizeAllocated = -1
1208 self.scanError = vdiInfo.scanError
1209 self.sizeLV = vdiInfo.sizeLV
1210 self.sizeVirt = vdiInfo.sizeVirt
1211 self.fileName = vdiInfo.lvName
1212 self.lvActive = vdiInfo.lvActive
1213 self.lvOpen = vdiInfo.lvOpen
1214 self.lvReadonly = vdiInfo.lvReadonly
1215 self.hidden = vdiInfo.hidden
1216 self.parentUuid = vdiInfo.parentUuid
1217 self.path = os.path.join(self.sr.path, self.fileName)
1219 @staticmethod
1220 def extractUuid(path):
1221 return lvhdutil.extractUuid(path)
1223 def getDriverName(self):
1224 if self.raw:
1225 return self.DRIVER_NAME_RAW
1226 return self.DRIVER_NAME_VHD
1228 def inflate(self, size):
1229 """inflate the LV containing the VHD to 'size'"""
1230 if self.raw:
1231 return
1232 self._activate()
1233 self.sr.lock()
1234 try:
1235 lvhdutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, size)
1236 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid)
1237 finally:
1238 self.sr.unlock()
1239 self.sizeLV = self.sr.lvmCache.getSize(self.fileName)
1240 self._sizeVHD = -1
1241 self._sizeAllocated = -1
1243 def deflate(self):
1244 """deflate the LV containing the VHD to minimum"""
1245 if self.raw:
1246 return
1247 self._activate()
1248 self.sr.lock()
1249 try:
1250 lvhdutil.deflate(self.sr.lvmCache, self.fileName, self.getSizeVHD())
1251 finally:
1252 self.sr.unlock()
1253 self.sizeLV = self.sr.lvmCache.getSize(self.fileName)
1254 self._sizeVHD = -1
1255 self._sizeAllocated = -1
1257 def inflateFully(self):
1258 self.inflate(lvhdutil.calcSizeVHDLV(self.sizeVirt))
1260 def inflateParentForCoalesce(self):
1261 """Inflate the parent only as much as needed for the purposes of
1262 coalescing"""
1263 if self.parent.raw:
1264 return
1265 inc = self._calcExtraSpaceForCoalescing()
1266 if inc > 0:
1267 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid)
1268 self.parent.inflate(self.parent.sizeLV + inc)
1270 def updateBlockInfo(self):
1271 if not self.raw:
1272 return VDI.updateBlockInfo(self)
1274 def rename(self, uuid):
1275 oldUuid = self.uuid
1276 oldLVName = self.fileName
1277 VDI.rename(self, uuid)
1278 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + self.uuid
1279 if self.raw:
1280 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + self.uuid
1281 self.path = os.path.join(self.sr.path, self.fileName)
1282 assert(not self.sr.lvmCache.checkLV(self.fileName))
1284 self.sr.lvmCache.rename(oldLVName, self.fileName)
1285 if self.sr.lvActivator.get(oldUuid, False):
1286 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False)
1288 ns = lvhdutil.NS_PREFIX_LVM + self.sr.uuid
1289 (cnt, bcnt) = RefCounter.check(oldUuid, ns)
1290 RefCounter.set(self.uuid, cnt, bcnt, ns)
1291 RefCounter.reset(oldUuid, ns)
1293 def delete(self):
1294 if len(self.children) > 0:
1295 raise util.SMException("VDI %s has children, can't delete" % \
1296 self.uuid)
1297 self.sr.lock()
1298 try:
1299 self.sr.lvmCache.remove(self.fileName)
1300 self.sr.forgetVDI(self.uuid)
1301 finally:
1302 self.sr.unlock()
1303 RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid)
1304 VDI.delete(self)
1306 def getSizeVHD(self):
1307 if self._sizeVHD == -1:
1308 self._loadInfoSizeVHD()
1309 return self._sizeVHD
1311 def _loadInfoSizeVHD(self):
1312 """Get the physical utilization of the VHD file. We do it individually
1313 (and not using the VHD batch scanner) as an optimization: this info is
1314 relatively expensive and we need it only for VDI's involved in
1315 coalescing."""
1316 if self.raw:
1317 return
1318 self._activate()
1319 self._sizeVHD = vhdutil.getSizePhys(self.path)
1320 if self._sizeVHD <= 0:
1321 raise util.SMException("phys size of %s = %d" % \
1322 (self, self._sizeVHD))
1324 def getAllocatedSize(self):
1325 if self._sizeAllocated == -1:
1326 self._loadInfoSizeAllocated()
1327 return self._sizeAllocated
1329 def _loadInfoSizeAllocated(self):
1330 """
1331 Get the allocated size of the VHD volume.
1332 """
1333 if self.raw:
1334 return
1335 self._activate()
1336 self._sizeAllocated = vhdutil.getAllocatedSize(self.path)
1338 def _loadInfoHidden(self):
1339 if self.raw:
1340 self.hidden = self.sr.lvmCache.getHidden(self.fileName)
1341 else:
1342 VDI._loadInfoHidden(self)
1344 def _setHidden(self, hidden=True):
1345 if self.raw:
1346 self.sr.lvmCache.setHidden(self.fileName, hidden)
1347 self.hidden = hidden
1348 else:
1349 VDI._setHidden(self, hidden)
1351 def __str__(self):
1352 strType = "VHD"
1353 if self.raw:
1354 strType = "RAW"
1355 strHidden = ""
1356 if self.hidden:
1357 strHidden = "*"
1358 strSizeVHD = ""
1359 if self._sizeVHD > 0:
1360 strSizeVHD = Util.num2str(self._sizeVHD)
1361 strSizeAllocated = ""
1362 if self._sizeAllocated >= 0:
1363 strSizeAllocated = Util.num2str(self._sizeAllocated)
1364 strActive = "n"
1365 if self.lvActive:
1366 strActive = "a"
1367 if self.lvOpen:
1368 strActive += "o"
1369 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType,
1370 Util.num2str(self.sizeVirt), strSizeVHD, strSizeAllocated,
1371 Util.num2str(self.sizeLV), strActive)
1373 def validate(self, fast=False):
1374 if not self.raw:
1375 VDI.validate(self, fast)
1377 def _doCoalesce(self):
1378 """LVHD parents must first be activated, inflated, and made writable"""
1379 try:
1380 self._activateChain()
1381 self.sr.lvmCache.setReadonly(self.parent.fileName, False)
1382 self.parent.validate()
1383 self.inflateParentForCoalesce()
1384 VDI._doCoalesce(self)
1385 finally:
1386 self.parent._loadInfoSizeVHD()
1387 self.parent.deflate()
1388 self.sr.lvmCache.setReadonly(self.parent.fileName, True)
1390 def _setParent(self, parent):
1391 self._activate()
1392 if self.lvReadonly:
1393 self.sr.lvmCache.setReadonly(self.fileName, False)
1395 try:
1396 vhdutil.setParent(self.path, parent.path, parent.raw)
1397 finally:
1398 if self.lvReadonly:
1399 self.sr.lvmCache.setReadonly(self.fileName, True)
1400 self._deactivate()
1401 self.parent = parent
1402 self.parentUuid = parent.uuid
1403 parent.children.append(self)
1404 try:
1405 self.setConfig(self.DB_VHD_PARENT, self.parentUuid)
1406 Util.log("Updated the vhd-parent field for child %s with %s" % \
1407 (self.uuid, self.parentUuid))
1408 except:
1409 Util.log("Failed to update the vhd-parent with %s for child %s" % \
1410 (self.parentUuid, self.uuid))
1412 def _activate(self):
1413 self.sr.lvActivator.activate(self.uuid, self.fileName, False)
1415 def _activateChain(self):
1416 vdi = self
1417 while vdi:
1418 vdi._activate()
1419 vdi = vdi.parent
1421 def _deactivate(self):
1422 self.sr.lvActivator.deactivate(self.uuid, False)
1424 def _increaseSizeVirt(self, size, atomic=True):
1425 "ensure the virtual size of 'self' is at least 'size'"
1426 self._activate()
1427 if not self.raw:
1428 VDI._increaseSizeVirt(self, size, atomic)
1429 return
1431 # raw VDI case
1432 offset = self.sizeLV
1433 if self.sizeVirt < size:
1434 oldSize = self.sizeLV
1435 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size)
1436 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV))
1437 self.sr.lvmCache.setSize(self.fileName, self.sizeLV)
1438 offset = oldSize
1439 unfinishedZero = False
1440 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid)
1441 if jval:
1442 unfinishedZero = True
1443 offset = int(jval)
1444 length = self.sizeLV - offset
1445 if not length:
1446 return
1448 if unfinishedZero:
1449 Util.log(" ==> Redoing unfinished zeroing out")
1450 else:
1451 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \
1452 str(offset))
1453 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length))
1454 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1455 func = lambda: util.zeroOut(self.path, offset, length)
1456 Util.runAbortable(func, True, self.sr.uuid, abortTest,
1457 VDI.POLL_INTERVAL, 0)
1458 self.sr.journaler.remove(self.JRN_ZERO, self.uuid)
1460 def _setSizeVirt(self, size):
1461 """WARNING: do not call this method directly unless all VDIs in the
1462 subtree are guaranteed to be unplugged (and remain so for the duration
1463 of the operation): this operation is only safe for offline VHDs"""
1464 self._activate()
1465 jFile = lvhdutil.createVHDJournalLV(self.sr.lvmCache, self.uuid,
1466 vhdutil.MAX_VHD_JOURNAL_SIZE)
1467 try:
1468 lvhdutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid,
1469 size, jFile)
1470 finally:
1471 lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid)
1473 def _queryVHDBlocks(self):
1474 self._activate()
1475 return VDI._queryVHDBlocks(self)
1477 def _calcExtraSpaceForCoalescing(self):
1478 if self.parent.raw:
1479 return 0 # raw parents are never deflated in the first place
1480 sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData())
1481 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced))
1482 return sizeCoalesced - self.parent.sizeLV
1484 def _calcExtraSpaceForLeafCoalescing(self):
1485 """How much extra space in the SR will be required to
1486 [live-]leaf-coalesce this VDI"""
1487 # we can deflate the leaf to minimize the space requirements
1488 deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD())
1489 return self._calcExtraSpaceForCoalescing() - deflateDiff
1491 def _calcExtraSpaceForSnapshotCoalescing(self):
1492 return self._calcExtraSpaceForCoalescing() + \
1493 lvhdutil.calcSizeLV(self.getSizeVHD())
1496class LinstorVDI(VDI):
1497 """Object representing a VDI in a LINSTOR SR"""
1499 VOLUME_LOCK_TIMEOUT = 30
1501 def load(self, info=None):
1502 self.parentUuid = info.parentUuid
1503 self.scanError = True
1504 self.parent = None
1505 self.children = []
1507 self.fileName = self.sr._linstor.get_volume_name(self.uuid)
1508 self.path = self.sr._linstor.build_device_path(self.fileName)
1510 if not info:
1511 try:
1512 info = self.sr._vhdutil.get_vhd_info(self.uuid)
1513 except util.SMException:
1514 Util.log(
1515 ' [VDI {}: failed to read VHD metadata]'.format(self.uuid)
1516 )
1517 return
1519 self.parentUuid = info.parentUuid
1520 self.sizeVirt = info.sizeVirt
1521 self._sizeVHD = -1
1522 self._sizeAllocated = -1
1523 self.drbd_size = -1
1524 self.hidden = info.hidden
1525 self.scanError = False
1526 self.vdi_type = vhdutil.VDI_TYPE_VHD
1528 def getSizeVHD(self, fetch=False):
1529 if self._sizeVHD < 0 or fetch:
1530 self._sizeVHD = self.sr._vhdutil.get_size_phys(self.uuid)
1531 return self._sizeVHD
1533 def getDrbdSize(self, fetch=False):
1534 if self.drbd_size < 0 or fetch:
1535 self.drbd_size = self.sr._vhdutil.get_drbd_size(self.uuid)
1536 return self.drbd_size
1538 def getAllocatedSize(self):
1539 if self._sizeAllocated == -1:
1540 if not self.raw:
1541 self._sizeAllocated = self.sr._vhdutil.get_allocated_size(self.uuid)
1542 return self._sizeAllocated
1544 def inflate(self, size):
1545 if self.raw:
1546 return
1547 self.sr.lock()
1548 try:
1549 # Ensure we use the real DRBD size and not the cached one.
1550 # Why? Because this attribute can be changed if volume is resized by user.
1551 self.drbd_size = self.getDrbdSize(fetch=True)
1552 self.sr._vhdutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size)
1553 finally:
1554 self.sr.unlock()
1555 self.drbd_size = -1
1556 self._sizeVHD = -1
1557 self._sizeAllocated = -1
1559 def deflate(self):
1560 if self.raw:
1561 return
1562 self.sr.lock()
1563 try:
1564 # Ensure we use the real sizes and not the cached info.
1565 self.drbd_size = self.getDrbdSize(fetch=True)
1566 self._sizeVHD = self.getSizeVHD(fetch=True)
1567 self.sr._vhdutil.force_deflate(self.path, self._sizeVHD, self.drbd_size, zeroize=False)
1568 finally:
1569 self.sr.unlock()
1570 self.drbd_size = -1
1571 self._sizeVHD = -1
1572 self._sizeAllocated = -1
1574 def inflateFully(self):
1575 if not self.raw:
1576 self.inflate(LinstorVhdUtil.compute_volume_size(self.sizeVirt, self.vdi_type))
1578 def rename(self, uuid):
1579 Util.log('Renaming {} -> {} (path={})'.format(
1580 self.uuid, uuid, self.path
1581 ))
1582 self.sr._linstor.update_volume_uuid(self.uuid, uuid)
1583 VDI.rename(self, uuid)
1585 def delete(self):
1586 if len(self.children) > 0:
1587 raise util.SMException(
1588 'VDI {} has children, can\'t delete'.format(self.uuid)
1589 )
1590 self.sr.lock()
1591 try:
1592 self.sr._linstor.destroy_volume(self.uuid)
1593 self.sr.forgetVDI(self.uuid)
1594 finally:
1595 self.sr.unlock()
1596 VDI.delete(self)
1598 def validate(self, fast=False):
1599 if not self.raw and not self.sr._vhdutil.check(self.uuid, fast=fast):
1600 raise util.SMException('VHD {} corrupted'.format(self))
1602 def pause(self, failfast=False):
1603 self.sr._linstor.ensure_volume_is_not_locked(
1604 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT
1605 )
1606 return super(LinstorVDI, self).pause(failfast)
1608 def coalesce(self):
1609 # Note: We raise `SMException` here to skip the current coalesce in case of failure.
1610 # Using another exception we can't execute the next coalesce calls.
1611 return self.sr._vhdutil.force_coalesce(self.path) * 512
1613 def getParent(self):
1614 return self.sr._vhdutil.get_parent(
1615 self.sr._linstor.get_volume_uuid_from_device_path(self.path)
1616 )
1618 def repair(self, parent_uuid):
1619 self.sr._vhdutil.force_repair(
1620 self.sr._linstor.get_device_path(parent_uuid)
1621 )
1623 def _relinkSkip(self):
1624 abortFlag = IPCFlag(self.sr.uuid)
1625 for child in self.children:
1626 if abortFlag.test(FLAG_TYPE_ABORT):
1627 raise AbortException('Aborting due to signal')
1628 Util.log(
1629 ' Relinking {} from {} to {}'.format(
1630 child, self, self.parent
1631 )
1632 )
1634 session = child.sr.xapi.session
1635 sr_uuid = child.sr.uuid
1636 vdi_uuid = child.uuid
1637 try:
1638 self.sr._linstor.ensure_volume_is_not_locked(
1639 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT
1640 )
1641 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid)
1642 child._setParent(self.parent)
1643 finally:
1644 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid)
1645 self.children = []
1647 def _setParent(self, parent):
1648 self.sr._linstor.get_device_path(self.uuid)
1649 self.sr._vhdutil.force_parent(self.path, parent.path)
1650 self.parent = parent
1651 self.parentUuid = parent.uuid
1652 parent.children.append(self)
1653 try:
1654 self.setConfig(self.DB_VHD_PARENT, self.parentUuid)
1655 Util.log("Updated the vhd-parent field for child %s with %s" % \
1656 (self.uuid, self.parentUuid))
1657 except:
1658 Util.log("Failed to update %s with vhd-parent field %s" % \
1659 (self.uuid, self.parentUuid))
1661 def _doCoalesce(self):
1662 try:
1663 self._activateChain()
1664 self.parent.validate()
1665 self._inflateParentForCoalesce()
1666 VDI._doCoalesce(self)
1667 finally:
1668 self.parent.deflate()
1670 def _activateChain(self):
1671 vdi = self
1672 while vdi:
1673 try:
1674 p = self.sr._linstor.get_device_path(vdi.uuid)
1675 except Exception as e:
1676 # Use SMException to skip coalesce.
1677 # Otherwise the GC is stopped...
1678 raise util.SMException(str(e))
1679 vdi = vdi.parent
1681 def _setHidden(self, hidden=True):
1682 HIDDEN_TAG = 'hidden'
1684 if self.raw:
1685 self.sr._linstor.update_volume_metadata(self.uuid, {
1686 HIDDEN_TAG: hidden
1687 })
1688 self.hidden = hidden
1689 else:
1690 VDI._setHidden(self, hidden)
1692 def _setSizeVirt(self, size):
1693 jfile = self.uuid + '-jvhd'
1694 self.sr._linstor.create_volume(
1695 jfile, vhdutil.MAX_VHD_JOURNAL_SIZE, persistent=False, volume_name=jfile
1696 )
1697 try:
1698 self.inflate(LinstorVhdUtil.compute_volume_size(size, self.vdi_type))
1699 self.sr._vhdutil.set_size_virt(size, jfile)
1700 finally:
1701 try:
1702 self.sr._linstor.destroy_volume(jfile)
1703 except Exception:
1704 # We can ignore it, in any case this volume is not persistent.
1705 pass
1707 def _queryVHDBlocks(self):
1708 return self.sr._vhdutil.get_block_bitmap(self.uuid)
1710 def _inflateParentForCoalesce(self):
1711 if self.parent.raw:
1712 return
1713 inc = self._calcExtraSpaceForCoalescing()
1714 if inc > 0:
1715 self.parent.inflate(self.parent.getDrbdSize() + inc)
1717 def _calcExtraSpaceForCoalescing(self):
1718 if self.parent.raw:
1719 return 0
1720 size_coalesced = LinstorVhdUtil.compute_volume_size(
1721 self._getCoalescedSizeData(), self.vdi_type
1722 )
1723 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced))
1724 return size_coalesced - self.parent.getDrbdSize()
1726 def _calcExtraSpaceForLeafCoalescing(self):
1727 assert self.getDrbdSize() > 0
1728 assert self.getSizeVHD() > 0
1729 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizeVHD())
1730 assert deflate_diff >= 0
1731 return self._calcExtraSpaceForCoalescing() - deflate_diff
1733 def _calcExtraSpaceForSnapshotCoalescing(self):
1734 assert self.getSizeVHD() > 0
1735 return self._calcExtraSpaceForCoalescing() + \
1736 LinstorVolumeManager.round_up_volume_size(self.getSizeVHD())
1738################################################################################
1739#
1740# SR
1741#
1742class SR(object):
1743 class LogFilter:
1744 def __init__(self, sr):
1745 self.sr = sr
1746 self.stateLogged = False
1747 self.prevState = {}
1748 self.currState = {}
1750 def logState(self):
1751 changes = ""
1752 self.currState.clear()
1753 for vdi in self.sr.vdiTrees:
1754 self.currState[vdi.uuid] = self._getTreeStr(vdi)
1755 if not self.prevState.get(vdi.uuid) or \
1756 self.prevState[vdi.uuid] != self.currState[vdi.uuid]:
1757 changes += self.currState[vdi.uuid]
1759 for uuid in self.prevState:
1760 if not self.currState.get(uuid):
1761 changes += "Tree %s gone\n" % uuid
1763 result = "SR %s (%d VDIs in %d VHD trees): " % \
1764 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees))
1766 if len(changes) > 0:
1767 if self.stateLogged:
1768 result += "showing only VHD trees that changed:"
1769 result += "\n%s" % changes
1770 else:
1771 result += "no changes"
1773 for line in result.split("\n"):
1774 Util.log("%s" % line)
1775 self.prevState.clear()
1776 for key, val in self.currState.items():
1777 self.prevState[key] = val
1778 self.stateLogged = True
1780 def logNewVDI(self, uuid):
1781 if self.stateLogged:
1782 Util.log("Found new VDI when scanning: %s" % uuid)
1784 def _getTreeStr(self, vdi, indent=8):
1785 treeStr = "%s%s\n" % (" " * indent, vdi)
1786 for child in vdi.children:
1787 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT)
1788 return treeStr
1790 TYPE_FILE = "file"
1791 TYPE_LVHD = "lvhd"
1792 TYPE_LINSTOR = "linstor"
1793 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR]
1795 LOCK_RETRY_INTERVAL = 3
1796 LOCK_RETRY_ATTEMPTS = 20
1797 LOCK_RETRY_ATTEMPTS_LOCK = 100
1799 SCAN_RETRY_ATTEMPTS = 3
1801 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM)
1802 TMP_RENAME_PREFIX = "OLD_"
1804 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline"
1805 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override"
1807 def getInstance(uuid, xapiSession, createLock=True, force=False):
1808 xapi = XAPI(xapiSession, uuid)
1809 type = normalizeType(xapi.srRecord["type"])
1810 if type == SR.TYPE_FILE:
1811 return FileSR(uuid, xapi, createLock, force)
1812 elif type == SR.TYPE_LVHD:
1813 return LVHDSR(uuid, xapi, createLock, force)
1814 elif type == SR.TYPE_LINSTOR:
1815 return LinstorSR(uuid, xapi, createLock, force)
1816 raise util.SMException("SR type %s not recognized" % type)
1817 getInstance = staticmethod(getInstance)
1819 def __init__(self, uuid, xapi, createLock, force):
1820 self.logFilter = self.LogFilter(self)
1821 self.uuid = uuid
1822 self.path = ""
1823 self.name = ""
1824 self.vdis = {}
1825 self.vdiTrees = []
1826 self.journaler = None
1827 self.xapi = xapi
1828 self._locked = 0
1829 self._srLock = None
1830 if createLock: 1830 ↛ 1831line 1830 didn't jump to line 1831, because the condition on line 1830 was never true
1831 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, self.uuid)
1832 else:
1833 Util.log("Requested no SR locking")
1834 self.name = self.xapi.srRecord["name_label"]
1835 self._failedCoalesceTargets = []
1837 if not self.xapi.isPluggedHere():
1838 if force: 1838 ↛ 1839line 1838 didn't jump to line 1839, because the condition on line 1838 was never true
1839 Util.log("SR %s not attached on this host, ignoring" % uuid)
1840 else:
1841 if not self.wait_for_plug():
1842 raise util.SMException("SR %s not attached on this host" % uuid)
1844 if force: 1844 ↛ 1845line 1844 didn't jump to line 1845, because the condition on line 1844 was never true
1845 Util.log("Not checking if we are Master (SR %s)" % uuid)
1846 elif not self.xapi.isMaster(): 1846 ↛ 1847line 1846 didn't jump to line 1847, because the condition on line 1846 was never true
1847 raise util.SMException("This host is NOT master, will not run")
1849 def wait_for_plug(self):
1850 for _ in range(1, 10):
1851 time.sleep(2)
1852 if self.xapi.isPluggedHere():
1853 return True
1854 return False
1856 def gcEnabled(self, refresh=True):
1857 if refresh:
1858 self.xapi.srRecord = \
1859 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef)
1860 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false":
1861 Util.log("GC is disabled for this SR, abort")
1862 return False
1863 return True
1865 def scan(self, force=False):
1866 """Scan the SR and load VDI info for each VDI. If called repeatedly,
1867 update VDI objects if they already exist"""
1868 pass # abstract
1870 def scanLocked(self, force=False):
1871 self.lock()
1872 try:
1873 self.scan(force)
1874 finally:
1875 self.unlock()
1877 def getVDI(self, uuid):
1878 return self.vdis.get(uuid)
1880 def hasWork(self):
1881 if len(self.findGarbage()) > 0:
1882 return True
1883 if self.findCoalesceable():
1884 return True
1885 if self.findLeafCoalesceable():
1886 return True
1887 if self.needUpdateBlockInfo():
1888 return True
1889 return False
1891 def findCoalesceable(self):
1892 """Find a coalesceable VDI. Return a vdi that should be coalesced
1893 (choosing one among all coalesceable candidates according to some
1894 criteria) or None if there is no VDI that could be coalesced"""
1896 candidates = []
1898 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE)
1899 if srSwitch == "false":
1900 Util.log("Coalesce disabled for this SR")
1901 return candidates
1903 # finish any VDI for which a relink journal entry exists first
1904 journals = self.journaler.getAll(VDI.JRN_RELINK)
1905 for uuid in journals:
1906 vdi = self.getVDI(uuid)
1907 if vdi and vdi not in self._failedCoalesceTargets:
1908 return vdi
1910 for vdi in self.vdis.values():
1911 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets:
1912 candidates.append(vdi)
1913 Util.log("%s is coalescable" % vdi.uuid)
1915 self.xapi.update_task_progress("coalescable", len(candidates))
1917 # pick one in the tallest tree
1918 treeHeight = dict()
1919 for c in candidates:
1920 height = c.getTreeRoot().getTreeHeight()
1921 if treeHeight.get(height):
1922 treeHeight[height].append(c)
1923 else:
1924 treeHeight[height] = [c]
1926 freeSpace = self.getFreeSpace()
1927 heights = list(treeHeight.keys())
1928 heights.sort(reverse=True)
1929 for h in heights:
1930 for c in treeHeight[h]:
1931 spaceNeeded = c._calcExtraSpaceForCoalescing()
1932 if spaceNeeded <= freeSpace:
1933 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h))
1934 return c
1935 else:
1936 Util.log("No space to coalesce %s (free space: %d)" % \
1937 (c, freeSpace))
1938 return None
1940 def getSwitch(self, key):
1941 return self.xapi.srRecord["other_config"].get(key)
1943 def forbiddenBySwitch(self, switch, condition, fail_msg):
1944 srSwitch = self.getSwitch(switch)
1945 ret = False
1946 if srSwitch:
1947 ret = srSwitch == condition
1949 if ret:
1950 Util.log(fail_msg)
1952 return ret
1954 def leafCoalesceForbidden(self):
1955 return (self.forbiddenBySwitch(VDI.DB_COALESCE,
1956 "false",
1957 "Coalesce disabled for this SR") or
1958 self.forbiddenBySwitch(VDI.DB_LEAFCLSC,
1959 VDI.LEAFCLSC_DISABLED,
1960 "Leaf-coalesce disabled for this SR"))
1962 def findLeafCoalesceable(self):
1963 """Find leaf-coalesceable VDIs in each VHD tree"""
1965 candidates = []
1966 if self.leafCoalesceForbidden():
1967 return candidates
1969 self.gatherLeafCoalesceable(candidates)
1971 self.xapi.update_task_progress("coalescable", len(candidates))
1973 freeSpace = self.getFreeSpace()
1974 for candidate in candidates:
1975 # check the space constraints to see if leaf-coalesce is actually
1976 # feasible for this candidate
1977 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing()
1978 spaceNeededLive = spaceNeeded
1979 if spaceNeeded > freeSpace:
1980 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing()
1981 if candidate.canLiveCoalesce(self.getStorageSpeed()):
1982 spaceNeeded = spaceNeededLive
1984 if spaceNeeded <= freeSpace:
1985 Util.log("Leaf-coalesce candidate: %s" % candidate)
1986 return candidate
1987 else:
1988 Util.log("No space to leaf-coalesce %s (free space: %d)" % \
1989 (candidate, freeSpace))
1990 if spaceNeededLive <= freeSpace:
1991 Util.log("...but enough space if skip snap-coalesce")
1992 candidate.setConfig(VDI.DB_LEAFCLSC,
1993 VDI.LEAFCLSC_OFFLINE)
1995 return None
1997 def gatherLeafCoalesceable(self, candidates):
1998 for vdi in self.vdis.values():
1999 if not vdi.isLeafCoalesceable():
2000 continue
2001 if vdi in self._failedCoalesceTargets:
2002 continue
2003 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET:
2004 Util.log("Skipping reset-on-boot %s" % vdi)
2005 continue
2006 if vdi.getConfig(vdi.DB_ALLOW_CACHING):
2007 Util.log("Skipping allow_caching=true %s" % vdi)
2008 continue
2009 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED:
2010 Util.log("Leaf-coalesce disabled for %s" % vdi)
2011 continue
2012 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or
2013 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE):
2014 continue
2015 candidates.append(vdi)
2017 def coalesce(self, vdi, dryRun=False):
2018 """Coalesce vdi onto parent"""
2019 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent))
2020 if dryRun: 2020 ↛ 2021line 2020 didn't jump to line 2021, because the condition on line 2020 was never true
2021 return
2023 try:
2024 self._coalesce(vdi)
2025 except util.SMException as e:
2026 if isinstance(e, AbortException): 2026 ↛ 2027line 2026 didn't jump to line 2027, because the condition on line 2026 was never true
2027 self.cleanup()
2028 raise
2029 else:
2030 self._failedCoalesceTargets.append(vdi)
2031 Util.logException("coalesce")
2032 Util.log("Coalesce failed, skipping")
2033 self.cleanup()
2035 def coalesceLeaf(self, vdi, dryRun=False):
2036 """Leaf-coalesce vdi onto parent"""
2037 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent))
2038 if dryRun:
2039 return
2041 try:
2042 uuid = vdi.uuid
2043 try:
2044 # "vdi" object will no longer be valid after this call
2045 self._coalesceLeaf(vdi)
2046 finally:
2047 vdi = self.getVDI(uuid)
2048 if vdi:
2049 vdi.delConfig(vdi.DB_LEAFCLSC)
2050 except AbortException:
2051 self.cleanup()
2052 raise
2053 except (util.SMException, XenAPI.Failure) as e:
2054 self._failedCoalesceTargets.append(vdi)
2055 Util.logException("leaf-coalesce")
2056 Util.log("Leaf-coalesce failed on %s, skipping" % vdi)
2057 self.cleanup()
2059 def garbageCollect(self, dryRun=False):
2060 vdiList = self.findGarbage()
2061 Util.log("Found %d VDIs for deletion:" % len(vdiList))
2062 for vdi in vdiList:
2063 Util.log(" %s" % vdi)
2064 if not dryRun:
2065 self.deleteVDIs(vdiList)
2066 self.cleanupJournals(dryRun)
2068 def findGarbage(self):
2069 vdiList = []
2070 for vdi in self.vdiTrees:
2071 vdiList.extend(vdi.getAllPrunable())
2072 return vdiList
2074 def deleteVDIs(self, vdiList):
2075 for vdi in vdiList:
2076 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT):
2077 raise AbortException("Aborting due to signal")
2078 Util.log("Deleting unlinked VDI %s" % vdi)
2079 self.deleteVDI(vdi)
2081 def deleteVDI(self, vdi):
2082 assert(len(vdi.children) == 0)
2083 del self.vdis[vdi.uuid]
2084 if vdi.parent: 2084 ↛ 2086line 2084 didn't jump to line 2086, because the condition on line 2084 was never false
2085 vdi.parent.children.remove(vdi)
2086 if vdi in self.vdiTrees: 2086 ↛ 2087line 2086 didn't jump to line 2087, because the condition on line 2086 was never true
2087 self.vdiTrees.remove(vdi)
2088 vdi.delete()
2090 def forgetVDI(self, vdiUuid):
2091 self.xapi.forgetVDI(self.uuid, vdiUuid)
2093 def pauseVDIs(self, vdiList):
2094 paused = []
2095 failed = False
2096 for vdi in vdiList:
2097 try:
2098 vdi.pause()
2099 paused.append(vdi)
2100 except:
2101 Util.logException("pauseVDIs")
2102 failed = True
2103 break
2105 if failed:
2106 self.unpauseVDIs(paused)
2107 raise util.SMException("Failed to pause VDIs")
2109 def unpauseVDIs(self, vdiList):
2110 failed = False
2111 for vdi in vdiList:
2112 try:
2113 vdi.unpause()
2114 except:
2115 Util.log("ERROR: Failed to unpause VDI %s" % vdi)
2116 failed = True
2117 if failed:
2118 raise util.SMException("Failed to unpause VDIs")
2120 def getFreeSpace(self):
2121 return 0
2123 def cleanup(self):
2124 Util.log("In cleanup")
2125 return
2127 def __str__(self):
2128 if self.name:
2129 ret = "%s ('%s')" % (self.uuid[0:4], self.name)
2130 else:
2131 ret = "%s" % self.uuid
2132 return ret
2134 def lock(self):
2135 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort
2136 signal to avoid deadlocking (trying to acquire the SR lock while the
2137 lock is held by a process that is trying to abort us)"""
2138 if not self._srLock:
2139 return
2141 if self._locked == 0:
2142 abortFlag = IPCFlag(self.uuid)
2143 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK):
2144 if self._srLock.acquireNoblock():
2145 self._locked += 1
2146 return
2147 if abortFlag.test(FLAG_TYPE_ABORT):
2148 raise AbortException("Abort requested")
2149 time.sleep(SR.LOCK_RETRY_INTERVAL)
2150 raise util.SMException("Unable to acquire the SR lock")
2152 self._locked += 1
2154 def unlock(self):
2155 if not self._srLock: 2155 ↛ 2157line 2155 didn't jump to line 2157, because the condition on line 2155 was never false
2156 return
2157 assert(self._locked > 0)
2158 self._locked -= 1
2159 if self._locked == 0:
2160 self._srLock.release()
2162 def needUpdateBlockInfo(self):
2163 for vdi in self.vdis.values():
2164 if vdi.scanError or len(vdi.children) == 0:
2165 continue
2166 if not vdi.getConfig(vdi.DB_VHD_BLOCKS):
2167 return True
2168 return False
2170 def updateBlockInfo(self):
2171 for vdi in self.vdis.values():
2172 if vdi.scanError or len(vdi.children) == 0:
2173 continue
2174 if not vdi.getConfig(vdi.DB_VHD_BLOCKS):
2175 vdi.updateBlockInfo()
2177 def cleanupCoalesceJournals(self):
2178 """Remove stale coalesce VDI indicators"""
2179 entries = self.journaler.getAll(VDI.JRN_COALESCE)
2180 for uuid, jval in entries.items():
2181 self.journaler.remove(VDI.JRN_COALESCE, uuid)
2183 def cleanupJournals(self, dryRun=False):
2184 """delete journal entries for non-existing VDIs"""
2185 for t in [LVHDVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]:
2186 entries = self.journaler.getAll(t)
2187 for uuid, jval in entries.items():
2188 if self.getVDI(uuid):
2189 continue
2190 if t == SR.JRN_CLONE:
2191 baseUuid, clonUuid = jval.split("_")
2192 if self.getVDI(baseUuid):
2193 continue
2194 Util.log(" Deleting stale '%s' journal entry for %s "
2195 "(%s)" % (t, uuid, jval))
2196 if not dryRun:
2197 self.journaler.remove(t, uuid)
2199 def cleanupCache(self, maxAge=-1):
2200 return 0
2202 def _coalesce(self, vdi):
2203 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2203 ↛ 2206line 2203 didn't jump to line 2206, because the condition on line 2203 was never true
2204 # this means we had done the actual coalescing already and just
2205 # need to finish relinking and/or refreshing the children
2206 Util.log("==> Coalesce apparently already done: skipping")
2207 else:
2208 # JRN_COALESCE is used to check which VDI is being coalesced in
2209 # order to decide whether to abort the coalesce. We remove the
2210 # journal as soon as the VHD coalesce step is done, because we
2211 # don't expect the rest of the process to take long
2212 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1")
2213 vdi._doCoalesce()
2214 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid)
2216 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid)
2218 # we now need to relink the children: lock the SR to prevent ops
2219 # like SM.clone from manipulating the VDIs we'll be relinking and
2220 # rescan the SR first in case the children changed since the last
2221 # scan
2222 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1")
2224 self.lock()
2225 try:
2226 vdi.parent._tagChildrenForRelink()
2227 self.scan()
2228 vdi._relinkSkip()
2229 finally:
2230 self.unlock()
2231 # Reload the children to leave things consistent
2232 vdi.parent._reloadChildren(vdi)
2234 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid)
2235 self.deleteVDI(vdi)
2237 class CoalesceTracker:
2238 GRACE_ITERATIONS = 1
2239 MAX_ITERATIONS_NO_PROGRESS = 3
2240 MAX_ITERATIONS = 10
2241 MAX_INCREASE_FROM_MINIMUM = 1.2
2242 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \
2243 " --> Final size {finSize}"
2245 def __init__(self, sr):
2246 self.itsNoProgress = 0
2247 self.its = 0
2248 self.minSize = float("inf")
2249 self.history = []
2250 self.reason = ""
2251 self.startSize = None
2252 self.finishSize = None
2253 self.sr = sr
2255 def abortCoalesce(self, prevSize, curSize):
2256 res = False
2258 self.its += 1
2259 self.history.append(self.HISTORY_STRING.format(its=self.its,
2260 initSize=prevSize,
2261 finSize=curSize))
2263 self.finishSize = curSize
2265 if self.startSize is None:
2266 self.startSize = prevSize
2268 if curSize < self.minSize:
2269 self.minSize = curSize
2271 if prevSize < self.minSize:
2272 self.minSize = prevSize
2274 if prevSize < curSize:
2275 self.itsNoProgress += 1
2276 Util.log("No progress, attempt:"
2277 " {attempt}".format(attempt=self.itsNoProgress))
2278 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid)
2280 if (not res) and (self.its > self.MAX_ITERATIONS):
2281 max = self.MAX_ITERATIONS
2282 self.reason = \
2283 "Max iterations ({max}) exceeded".format(max=max)
2284 res = True
2286 if (not res) and (self.itsNoProgress >
2287 self.MAX_ITERATIONS_NO_PROGRESS):
2288 max = self.MAX_ITERATIONS_NO_PROGRESS
2289 self.reason = \
2290 "No progress made for {max} iterations".format(max=max)
2291 res = True
2293 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize
2294 if (self.its > self.GRACE_ITERATIONS and
2295 (not res) and (curSize > maxSizeFromMin)):
2296 self.reason = "Unexpected bump in size," \
2297 " compared to minimum acheived"
2298 res = True
2300 return res
2302 def printReasoning(self):
2303 Util.log("Aborted coalesce")
2304 for hist in self.history:
2305 Util.log(hist)
2306 Util.log(self.reason)
2307 Util.log("Starting size was {size}"
2308 .format(size=self.startSize))
2309 Util.log("Final size was {size}"
2310 .format(size=self.finishSize))
2311 Util.log("Minimum size acheived was {size}"
2312 .format(size=self.minSize))
2314 def _coalesceLeaf(self, vdi):
2315 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot
2316 complete due to external changes, namely vdi_delete and vdi_snapshot
2317 that alter leaf-coalescibility of vdi"""
2318 tracker = self.CoalesceTracker(self)
2319 while not vdi.canLiveCoalesce(self.getStorageSpeed()):
2320 prevSizeVHD = vdi.getSizeVHD()
2321 if not self._snapshotCoalesce(vdi): 2321 ↛ 2322line 2321 didn't jump to line 2322, because the condition on line 2321 was never true
2322 return False
2323 if tracker.abortCoalesce(prevSizeVHD, vdi.getSizeVHD()):
2324 tracker.printReasoning()
2325 raise util.SMException("VDI {uuid} could not be coalesced"
2326 .format(uuid=vdi.uuid))
2327 return self._liveLeafCoalesce(vdi)
2329 def calcStorageSpeed(self, startTime, endTime, vhdSize):
2330 speed = None
2331 total_time = endTime - startTime
2332 if total_time > 0:
2333 speed = float(vhdSize) / float(total_time)
2334 return speed
2336 def writeSpeedToFile(self, speed):
2337 content = []
2338 speedFile = None
2339 path = SPEED_LOG_ROOT.format(uuid=self.uuid)
2340 self.lock()
2341 try:
2342 Util.log("Writing to file: {myfile}".format(myfile=path))
2343 lines = ""
2344 if not os.path.isfile(path):
2345 lines = str(speed) + "\n"
2346 else:
2347 speedFile = open(path, "r+")
2348 content = speedFile.readlines()
2349 content.append(str(speed) + "\n")
2350 if len(content) > N_RUNNING_AVERAGE:
2351 del content[0]
2352 lines = "".join(content)
2354 util.atomicFileWrite(path, VAR_RUN, lines)
2355 finally:
2356 if speedFile is not None:
2357 speedFile.close()
2358 Util.log("Closing file: {myfile}".format(myfile=path))
2359 self.unlock()
2361 def recordStorageSpeed(self, startTime, endTime, vhdSize):
2362 speed = self.calcStorageSpeed(startTime, endTime, vhdSize)
2363 if speed is None:
2364 return
2366 self.writeSpeedToFile(speed)
2368 def getStorageSpeed(self):
2369 speedFile = None
2370 path = SPEED_LOG_ROOT.format(uuid=self.uuid)
2371 self.lock()
2372 try:
2373 speed = None
2374 if os.path.isfile(path):
2375 speedFile = open(path)
2376 content = speedFile.readlines()
2377 try:
2378 content = [float(i) for i in content]
2379 except ValueError:
2380 Util.log("Something bad in the speed log:{log}".
2381 format(log=speedFile.readlines()))
2382 return speed
2384 if len(content):
2385 speed = sum(content) / float(len(content))
2386 if speed <= 0: 2386 ↛ 2388line 2386 didn't jump to line 2388, because the condition on line 2386 was never true
2387 # Defensive, should be impossible.
2388 Util.log("Bad speed: {speed} calculated for SR: {uuid}".
2389 format(speed=speed, uuid=self.uuid))
2390 speed = None
2391 else:
2392 Util.log("Speed file empty for SR: {uuid}".
2393 format(uuid=self.uuid))
2394 else:
2395 Util.log("Speed log missing for SR: {uuid}".
2396 format(uuid=self.uuid))
2397 return speed
2398 finally:
2399 if not (speedFile is None):
2400 speedFile.close()
2401 self.unlock()
2403 def _snapshotCoalesce(self, vdi):
2404 # Note that because we are not holding any locks here, concurrent SM
2405 # operations may change this tree under our feet. In particular, vdi
2406 # can be deleted, or it can be snapshotted.
2407 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED)
2408 Util.log("Single-snapshotting %s" % vdi)
2409 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid)
2410 try:
2411 ret = self.xapi.singleSnapshotVDI(vdi)
2412 Util.log("Single-snapshot returned: %s" % ret)
2413 except XenAPI.Failure as e:
2414 if util.isInvalidVDI(e):
2415 Util.log("The VDI appears to have been concurrently deleted")
2416 return False
2417 raise
2418 self.scanLocked()
2419 tempSnap = vdi.parent
2420 if not tempSnap.isCoalesceable():
2421 Util.log("The VDI appears to have been concurrently snapshotted")
2422 return False
2423 Util.log("Coalescing parent %s" % tempSnap)
2424 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid)
2425 vhdSize = vdi.getSizeVHD()
2426 self._coalesce(tempSnap)
2427 if not vdi.isLeafCoalesceable():
2428 Util.log("The VDI tree appears to have been altered since")
2429 return False
2430 return True
2432 def _liveLeafCoalesce(self, vdi):
2433 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid)
2434 self.lock()
2435 try:
2436 self.scan()
2437 if not self.getVDI(vdi.uuid):
2438 Util.log("The VDI appears to have been deleted meanwhile")
2439 return False
2440 if not vdi.isLeafCoalesceable():
2441 Util.log("The VDI is no longer leaf-coalesceable")
2442 return False
2444 uuid = vdi.uuid
2445 vdi.pause(failfast=True)
2446 try:
2447 try:
2448 # "vdi" object will no longer be valid after this call
2449 self._doCoalesceLeaf(vdi)
2450 except:
2451 Util.logException("_doCoalesceLeaf")
2452 self._handleInterruptedCoalesceLeaf()
2453 raise
2454 finally:
2455 vdi = self.getVDI(uuid)
2456 if vdi:
2457 vdi.ensureUnpaused()
2458 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid)
2459 if vdiOld:
2460 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid)
2461 self.deleteVDI(vdiOld)
2462 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid)
2463 finally:
2464 self.cleanup()
2465 self.unlock()
2466 self.logFilter.logState()
2467 return True
2469 def _doCoalesceLeaf(self, vdi):
2470 """Actual coalescing of a leaf VDI onto parent. Must be called in an
2471 offline/atomic context"""
2472 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid)
2473 self._prepareCoalesceLeaf(vdi)
2474 vdi.parent._setHidden(False)
2475 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False)
2476 vdi.validate(True)
2477 vdi.parent.validate(True)
2478 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid)
2479 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT
2480 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE:
2481 Util.log("Leaf-coalesce forced, will not use timeout")
2482 timeout = 0
2483 vdi._coalesceVHD(timeout)
2484 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid)
2485 vdi.parent.validate(True)
2486 #vdi._verifyContents(timeout / 2)
2488 # rename
2489 vdiUuid = vdi.uuid
2490 oldName = vdi.fileName
2491 origParentUuid = vdi.parent.uuid
2492 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid)
2493 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid)
2494 vdi.parent.rename(vdiUuid)
2495 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid)
2496 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid)
2498 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is
2499 # garbage
2501 # update the VDI record
2502 vdi.parent.delConfig(VDI.DB_VHD_PARENT)
2503 if vdi.parent.raw:
2504 vdi.parent.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_RAW)
2505 vdi.parent.delConfig(VDI.DB_VHD_BLOCKS)
2506 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid)
2508 self._updateNode(vdi)
2510 # delete the obsolete leaf & inflate the parent (in that order, to
2511 # minimize free space requirements)
2512 parent = vdi.parent
2513 vdi._setHidden(True)
2514 vdi.parent.children = []
2515 vdi.parent = None
2517 extraSpace = self._calcExtraSpaceNeeded(vdi, parent)
2518 freeSpace = self.getFreeSpace()
2519 if freeSpace < extraSpace:
2520 # don't delete unless we need the space: deletion is time-consuming
2521 # because it requires contacting the slaves, and we're paused here
2522 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid)
2523 self.deleteVDI(vdi)
2524 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid)
2526 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid)
2527 self.journaler.remove(VDI.JRN_LEAF, vdiUuid)
2529 self.forgetVDI(origParentUuid)
2530 self._finishCoalesceLeaf(parent)
2531 self._updateSlavesOnResize(parent)
2533 def _calcExtraSpaceNeeded(self, child, parent):
2534 assert(not parent.raw) # raw parents not supported
2535 extra = child.getSizeVHD() - parent.getSizeVHD()
2536 if extra < 0:
2537 extra = 0
2538 return extra
2540 def _prepareCoalesceLeaf(self, vdi):
2541 pass
2543 def _updateNode(self, vdi):
2544 pass
2546 def _finishCoalesceLeaf(self, parent):
2547 pass
2549 def _updateSlavesOnUndoLeafCoalesce(self, parent, child):
2550 pass
2552 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid):
2553 pass
2555 def _updateSlavesOnResize(self, vdi):
2556 pass
2558 def _removeStaleVDIs(self, uuidsPresent):
2559 for uuid in list(self.vdis.keys()):
2560 if not uuid in uuidsPresent:
2561 Util.log("VDI %s disappeared since last scan" % \
2562 self.vdis[uuid])
2563 del self.vdis[uuid]
2565 def _handleInterruptedCoalesceLeaf(self):
2566 """An interrupted leaf-coalesce operation may leave the VHD tree in an
2567 inconsistent state. If the old-leaf VDI is still present, we revert the
2568 operation (in case the original error is persistent); otherwise we must
2569 finish the operation"""
2570 # abstract
2571 pass
2573 def _buildTree(self, force):
2574 self.vdiTrees = []
2575 for vdi in self.vdis.values():
2576 if vdi.parentUuid:
2577 parent = self.getVDI(vdi.parentUuid)
2578 if not parent:
2579 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX):
2580 self.vdiTrees.append(vdi)
2581 continue
2582 if force:
2583 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \
2584 (vdi.parentUuid, vdi.uuid))
2585 self.vdiTrees.append(vdi)
2586 continue
2587 else:
2588 raise util.SMException("Parent VDI %s of %s not " \
2589 "found" % (vdi.parentUuid, vdi.uuid))
2590 vdi.parent = parent
2591 parent.children.append(vdi)
2592 else:
2593 self.vdiTrees.append(vdi)
2596class FileSR(SR):
2597 TYPE = SR.TYPE_FILE
2598 CACHE_FILE_EXT = ".vhdcache"
2599 # cache cleanup actions
2600 CACHE_ACTION_KEEP = 0
2601 CACHE_ACTION_REMOVE = 1
2602 CACHE_ACTION_REMOVE_IF_INACTIVE = 2
2604 def __init__(self, uuid, xapi, createLock, force):
2605 SR.__init__(self, uuid, xapi, createLock, force)
2606 self.path = "/var/run/sr-mount/%s" % self.uuid
2607 self.journaler = fjournaler.Journaler(self.path)
2609 def scan(self, force=False):
2610 if not util.pathexists(self.path):
2611 raise util.SMException("directory %s not found!" % self.uuid)
2612 vhds = self._scan(force)
2613 for uuid, vhdInfo in vhds.items():
2614 vdi = self.getVDI(uuid)
2615 if not vdi:
2616 self.logFilter.logNewVDI(uuid)
2617 vdi = FileVDI(self, uuid, False)
2618 self.vdis[uuid] = vdi
2619 vdi.load(vhdInfo)
2620 uuidsPresent = list(vhds.keys())
2621 rawList = [x for x in os.listdir(self.path) if x.endswith(vhdutil.FILE_EXTN_RAW)]
2622 for rawName in rawList:
2623 uuid = FileVDI.extractUuid(rawName)
2624 uuidsPresent.append(uuid)
2625 vdi = self.getVDI(uuid)
2626 if not vdi:
2627 self.logFilter.logNewVDI(uuid)
2628 vdi = FileVDI(self, uuid, True)
2629 self.vdis[uuid] = vdi
2630 self._removeStaleVDIs(uuidsPresent)
2631 self._buildTree(force)
2632 self.logFilter.logState()
2633 self._handleInterruptedCoalesceLeaf()
2635 def getFreeSpace(self):
2636 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path)
2638 def deleteVDIs(self, vdiList):
2639 rootDeleted = False
2640 for vdi in vdiList:
2641 if not vdi.parent:
2642 rootDeleted = True
2643 break
2644 SR.deleteVDIs(self, vdiList)
2645 if self.xapi.srRecord["type"] == "nfs" and rootDeleted:
2646 self.xapi.markCacheSRsDirty()
2648 def cleanupCache(self, maxAge=-1):
2649 """Clean up IntelliCache cache files. Caches for leaf nodes are
2650 removed when the leaf node no longer exists or its allow-caching
2651 attribute is not set. Caches for parent nodes are removed when the
2652 parent node no longer exists or it hasn't been used in more than
2653 <maxAge> hours.
2654 Return number of caches removed.
2655 """
2656 numRemoved = 0
2657 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)]
2658 Util.log("Found %d cache files" % len(cacheFiles))
2659 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge)
2660 for cacheFile in cacheFiles:
2661 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)]
2662 action = self.CACHE_ACTION_KEEP
2663 rec = self.xapi.getRecordVDI(uuid)
2664 if not rec:
2665 Util.log("Cache %s: VDI doesn't exist" % uuid)
2666 action = self.CACHE_ACTION_REMOVE
2667 elif rec["managed"] and not rec["allow_caching"]:
2668 Util.log("Cache %s: caching disabled" % uuid)
2669 action = self.CACHE_ACTION_REMOVE
2670 elif not rec["managed"] and maxAge >= 0:
2671 lastAccess = datetime.datetime.fromtimestamp( \
2672 os.path.getatime(os.path.join(self.path, cacheFile)))
2673 if lastAccess < cutoff:
2674 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge))
2675 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE
2677 if action == self.CACHE_ACTION_KEEP:
2678 Util.log("Keeping cache %s" % uuid)
2679 continue
2681 lockId = uuid
2682 parentUuid = None
2683 if rec and rec["managed"]:
2684 parentUuid = rec["sm_config"].get("vhd-parent")
2685 if parentUuid:
2686 lockId = parentUuid
2688 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId)
2689 cacheLock.acquire()
2690 try:
2691 if self._cleanupCache(uuid, action):
2692 numRemoved += 1
2693 finally:
2694 cacheLock.release()
2695 return numRemoved
2697 def _cleanupCache(self, uuid, action):
2698 assert(action != self.CACHE_ACTION_KEEP)
2699 rec = self.xapi.getRecordVDI(uuid)
2700 if rec and rec["allow_caching"]:
2701 Util.log("Cache %s appears to have become valid" % uuid)
2702 return False
2704 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT)
2705 tapdisk = blktap2.Tapdisk.find_by_path(fullPath)
2706 if tapdisk:
2707 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE:
2708 Util.log("Cache %s still in use" % uuid)
2709 return False
2710 Util.log("Shutting down tapdisk for %s" % fullPath)
2711 tapdisk.shutdown()
2713 Util.log("Deleting file %s" % fullPath)
2714 os.unlink(fullPath)
2715 return True
2717 def _isCacheFileName(self, name):
2718 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \
2719 name.endswith(self.CACHE_FILE_EXT)
2721 def _scan(self, force):
2722 for i in range(SR.SCAN_RETRY_ATTEMPTS):
2723 error = False
2724 pattern = os.path.join(self.path, "*%s" % vhdutil.FILE_EXTN_VHD)
2725 vhds = vhdutil.getAllVHDs(pattern, FileVDI.extractUuid)
2726 for uuid, vhdInfo in vhds.items():
2727 if vhdInfo.error:
2728 error = True
2729 break
2730 if not error:
2731 return vhds
2732 Util.log("Scan error on attempt %d" % i)
2733 if force:
2734 return vhds
2735 raise util.SMException("Scan error")
2737 def deleteVDI(self, vdi):
2738 self._checkSlaves(vdi)
2739 SR.deleteVDI(self, vdi)
2741 def _checkSlaves(self, vdi):
2742 onlineHosts = self.xapi.getOnlineHosts()
2743 abortFlag = IPCFlag(self.uuid)
2744 for pbdRecord in self.xapi.getAttachedPBDs():
2745 hostRef = pbdRecord["host"]
2746 if hostRef == self.xapi._hostRef:
2747 continue
2748 if abortFlag.test(FLAG_TYPE_ABORT):
2749 raise AbortException("Aborting due to signal")
2750 try:
2751 self._checkSlave(hostRef, vdi)
2752 except util.CommandException:
2753 if hostRef in onlineHosts:
2754 raise
2756 def _checkSlave(self, hostRef, vdi):
2757 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path})
2758 Util.log("Checking with slave: %s" % repr(call))
2759 _host = self.xapi.session.xenapi.host
2760 text = _host.call_plugin( * call)
2762 def _handleInterruptedCoalesceLeaf(self):
2763 entries = self.journaler.getAll(VDI.JRN_LEAF)
2764 for uuid, parentUuid in entries.items():
2765 fileList = os.listdir(self.path)
2766 childName = uuid + vhdutil.FILE_EXTN_VHD
2767 tmpChildName = self.TMP_RENAME_PREFIX + uuid + vhdutil.FILE_EXTN_VHD
2768 parentName1 = parentUuid + vhdutil.FILE_EXTN_VHD
2769 parentName2 = parentUuid + vhdutil.FILE_EXTN_RAW
2770 parentPresent = (parentName1 in fileList or parentName2 in fileList)
2771 if parentPresent or tmpChildName in fileList:
2772 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
2773 else:
2774 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
2775 self.journaler.remove(VDI.JRN_LEAF, uuid)
2776 vdi = self.getVDI(uuid)
2777 if vdi:
2778 vdi.ensureUnpaused()
2780 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
2781 Util.log("*** UNDO LEAF-COALESCE")
2782 parent = self.getVDI(parentUuid)
2783 if not parent:
2784 parent = self.getVDI(childUuid)
2785 if not parent:
2786 raise util.SMException("Neither %s nor %s found" % \
2787 (parentUuid, childUuid))
2788 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid))
2789 parent.rename(parentUuid)
2790 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid)
2792 child = self.getVDI(childUuid)
2793 if not child:
2794 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
2795 if not child:
2796 raise util.SMException("Neither %s nor %s found" % \
2797 (childUuid, self.TMP_RENAME_PREFIX + childUuid))
2798 Util.log("Renaming child back to %s" % childUuid)
2799 child.rename(childUuid)
2800 Util.log("Updating the VDI record")
2801 child.setConfig(VDI.DB_VHD_PARENT, parentUuid)
2802 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD)
2803 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid)
2805 if child.hidden:
2806 child._setHidden(False)
2807 if not parent.hidden:
2808 parent._setHidden(True)
2809 self._updateSlavesOnUndoLeafCoalesce(parent, child)
2810 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid)
2811 Util.log("*** leaf-coalesce undo successful")
2812 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"):
2813 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
2815 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
2816 Util.log("*** FINISH LEAF-COALESCE")
2817 vdi = self.getVDI(childUuid)
2818 if not vdi:
2819 raise util.SMException("VDI %s not found" % childUuid)
2820 try:
2821 self.forgetVDI(parentUuid)
2822 except XenAPI.Failure:
2823 pass
2824 self._updateSlavesOnResize(vdi)
2825 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid)
2826 Util.log("*** finished leaf-coalesce successfully")
2829class LVHDSR(SR):
2830 TYPE = SR.TYPE_LVHD
2831 SUBTYPES = ["lvhdoiscsi", "lvhdohba"]
2833 def __init__(self, uuid, xapi, createLock, force):
2834 SR.__init__(self, uuid, xapi, createLock, force)
2835 self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid)
2836 self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName)
2838 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid)
2839 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref)
2840 lvm_conf = other_conf.get('lvm-conf') if other_conf else None
2841 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf)
2843 self.lvActivator = LVActivator(self.uuid, self.lvmCache)
2844 self.journaler = journaler.Journaler(self.lvmCache)
2846 def deleteVDI(self, vdi):
2847 if self.lvActivator.get(vdi.uuid, False):
2848 self.lvActivator.deactivate(vdi.uuid, False)
2849 self._checkSlaves(vdi)
2850 SR.deleteVDI(self, vdi)
2852 def forgetVDI(self, vdiUuid):
2853 SR.forgetVDI(self, vdiUuid)
2854 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME)
2855 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid)
2857 def getFreeSpace(self):
2858 stats = lvutil._getVGstats(self.vgName)
2859 return stats['physical_size'] - stats['physical_utilisation']
2861 def cleanup(self):
2862 if not self.lvActivator.deactivateAll():
2863 Util.log("ERROR deactivating LVs while cleaning up")
2865 def needUpdateBlockInfo(self):
2866 for vdi in self.vdis.values():
2867 if vdi.scanError or vdi.raw or len(vdi.children) == 0:
2868 continue
2869 if not vdi.getConfig(vdi.DB_VHD_BLOCKS):
2870 return True
2871 return False
2873 def updateBlockInfo(self):
2874 numUpdated = 0
2875 for vdi in self.vdis.values():
2876 if vdi.scanError or vdi.raw or len(vdi.children) == 0:
2877 continue
2878 if not vdi.getConfig(vdi.DB_VHD_BLOCKS):
2879 vdi.updateBlockInfo()
2880 numUpdated += 1
2881 if numUpdated:
2882 # deactivate the LVs back sooner rather than later. If we don't
2883 # now, by the time this thread gets to deactivations, another one
2884 # might have leaf-coalesced a node and deleted it, making the child
2885 # inherit the refcount value and preventing the correct decrement
2886 self.cleanup()
2888 def scan(self, force=False):
2889 vdis = self._scan(force)
2890 for uuid, vdiInfo in vdis.items():
2891 vdi = self.getVDI(uuid)
2892 if not vdi:
2893 self.logFilter.logNewVDI(uuid)
2894 vdi = LVHDVDI(self, uuid,
2895 vdiInfo.vdiType == vhdutil.VDI_TYPE_RAW)
2896 self.vdis[uuid] = vdi
2897 vdi.load(vdiInfo)
2898 self._removeStaleVDIs(vdis.keys())
2899 self._buildTree(force)
2900 self.logFilter.logState()
2901 self._handleInterruptedCoalesceLeaf()
2903 def _scan(self, force):
2904 for i in range(SR.SCAN_RETRY_ATTEMPTS):
2905 error = False
2906 self.lvmCache.refresh()
2907 vdis = lvhdutil.getVDIInfo(self.lvmCache)
2908 for uuid, vdiInfo in vdis.items():
2909 if vdiInfo.scanError:
2910 error = True
2911 break
2912 if not error:
2913 return vdis
2914 Util.log("Scan error, retrying (%d)" % i)
2915 if force:
2916 return vdis
2917 raise util.SMException("Scan error")
2919 def _removeStaleVDIs(self, uuidsPresent):
2920 for uuid in list(self.vdis.keys()):
2921 if not uuid in uuidsPresent:
2922 Util.log("VDI %s disappeared since last scan" % \
2923 self.vdis[uuid])
2924 del self.vdis[uuid]
2925 if self.lvActivator.get(uuid, False):
2926 self.lvActivator.remove(uuid, False)
2928 def _liveLeafCoalesce(self, vdi):
2929 """If the parent is raw and the child was resized (virt. size), then
2930 we'll need to resize the parent, which can take a while due to zeroing
2931 out of the extended portion of the LV. Do it before pausing the child
2932 to avoid a protracted downtime"""
2933 if vdi.parent.raw and vdi.sizeVirt > vdi.parent.sizeVirt:
2934 self.lvmCache.setReadonly(vdi.parent.fileName, False)
2935 vdi.parent._increaseSizeVirt(vdi.sizeVirt)
2937 return SR._liveLeafCoalesce(self, vdi)
2939 def _prepareCoalesceLeaf(self, vdi):
2940 vdi._activateChain()
2941 self.lvmCache.setReadonly(vdi.parent.fileName, False)
2942 vdi.deflate()
2943 vdi.inflateParentForCoalesce()
2945 def _updateNode(self, vdi):
2946 # fix the refcounts: the remaining node should inherit the binary
2947 # refcount from the leaf (because if it was online, it should remain
2948 # refcounted as such), but the normal refcount from the parent (because
2949 # this node is really the parent node) - minus 1 if it is online (since
2950 # non-leaf nodes increment their normal counts when they are online and
2951 # we are now a leaf, storing that 1 in the binary refcount).
2952 ns = lvhdutil.NS_PREFIX_LVM + self.uuid
2953 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns)
2954 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns)
2955 pCnt = pCnt - cBcnt
2956 assert(pCnt >= 0)
2957 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns)
2959 def _finishCoalesceLeaf(self, parent):
2960 if not parent.isSnapshot() or parent.isAttachedRW():
2961 parent.inflateFully()
2962 else:
2963 parent.deflate()
2965 def _calcExtraSpaceNeeded(self, child, parent):
2966 return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV
2968 def _handleInterruptedCoalesceLeaf(self):
2969 entries = self.journaler.getAll(VDI.JRN_LEAF)
2970 for uuid, parentUuid in entries.items():
2971 childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid
2972 tmpChildLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \
2973 self.TMP_RENAME_PREFIX + uuid
2974 parentLV1 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + parentUuid
2975 parentLV2 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + parentUuid
2976 parentPresent = (self.lvmCache.checkLV(parentLV1) or \
2977 self.lvmCache.checkLV(parentLV2))
2978 if parentPresent or self.lvmCache.checkLV(tmpChildLV):
2979 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
2980 else:
2981 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
2982 self.journaler.remove(VDI.JRN_LEAF, uuid)
2983 vdi = self.getVDI(uuid)
2984 if vdi:
2985 vdi.ensureUnpaused()
2987 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
2988 Util.log("*** UNDO LEAF-COALESCE")
2989 parent = self.getVDI(parentUuid)
2990 if not parent:
2991 parent = self.getVDI(childUuid)
2992 if not parent:
2993 raise util.SMException("Neither %s nor %s found" % \
2994 (parentUuid, childUuid))
2995 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid))
2996 parent.rename(parentUuid)
2997 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid)
2999 child = self.getVDI(childUuid)
3000 if not child:
3001 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3002 if not child:
3003 raise util.SMException("Neither %s nor %s found" % \
3004 (childUuid, self.TMP_RENAME_PREFIX + childUuid))
3005 Util.log("Renaming child back to %s" % childUuid)
3006 child.rename(childUuid)
3007 Util.log("Updating the VDI record")
3008 child.setConfig(VDI.DB_VHD_PARENT, parentUuid)
3009 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD)
3010 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid)
3012 # refcount (best effort - assume that it had succeeded if the
3013 # second rename succeeded; if not, this adjustment will be wrong,
3014 # leading to a non-deactivation of the LV)
3015 ns = lvhdutil.NS_PREFIX_LVM + self.uuid
3016 cCnt, cBcnt = RefCounter.check(child.uuid, ns)
3017 pCnt, pBcnt = RefCounter.check(parent.uuid, ns)
3018 pCnt = pCnt + cBcnt
3019 RefCounter.set(parent.uuid, pCnt, 0, ns)
3020 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid)
3022 parent.deflate()
3023 child.inflateFully()
3024 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid)
3025 if child.hidden:
3026 child._setHidden(False)
3027 if not parent.hidden:
3028 parent._setHidden(True)
3029 if not parent.lvReadonly:
3030 self.lvmCache.setReadonly(parent.fileName, True)
3031 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3032 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid)
3033 Util.log("*** leaf-coalesce undo successful")
3034 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"):
3035 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
3037 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3038 Util.log("*** FINISH LEAF-COALESCE")
3039 vdi = self.getVDI(childUuid)
3040 if not vdi:
3041 raise util.SMException("VDI %s not found" % childUuid)
3042 vdi.inflateFully()
3043 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid)
3044 try:
3045 self.forgetVDI(parentUuid)
3046 except XenAPI.Failure:
3047 pass
3048 self._updateSlavesOnResize(vdi)
3049 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid)
3050 Util.log("*** finished leaf-coalesce successfully")
3052 def _checkSlaves(self, vdi):
3053 """Confirm with all slaves in the pool that 'vdi' is not in use. We
3054 try to check all slaves, including those that the Agent believes are
3055 offline, but ignore failures for offline hosts. This is to avoid cases
3056 where the Agent thinks a host is offline but the host is up."""
3057 args = {"vgName": self.vgName,
3058 "action1": "deactivateNoRefcount",
3059 "lvName1": vdi.fileName,
3060 "action2": "cleanupLockAndRefcount",
3061 "uuid2": vdi.uuid,
3062 "ns2": lvhdutil.NS_PREFIX_LVM + self.uuid}
3063 onlineHosts = self.xapi.getOnlineHosts()
3064 abortFlag = IPCFlag(self.uuid)
3065 for pbdRecord in self.xapi.getAttachedPBDs():
3066 hostRef = pbdRecord["host"]
3067 if hostRef == self.xapi._hostRef:
3068 continue
3069 if abortFlag.test(FLAG_TYPE_ABORT):
3070 raise AbortException("Aborting due to signal")
3071 Util.log("Checking with slave %s (path %s)" % (
3072 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path))
3073 try:
3074 self.xapi.ensureInactive(hostRef, args)
3075 except XenAPI.Failure:
3076 if hostRef in onlineHosts:
3077 raise
3079 def _updateSlavesOnUndoLeafCoalesce(self, parent, child):
3080 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid])
3081 if not slaves:
3082 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \
3083 child)
3084 return
3086 tmpName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \
3087 self.TMP_RENAME_PREFIX + child.uuid
3088 args = {"vgName": self.vgName,
3089 "action1": "deactivateNoRefcount",
3090 "lvName1": tmpName,
3091 "action2": "deactivateNoRefcount",
3092 "lvName2": child.fileName,
3093 "action3": "refresh",
3094 "lvName3": child.fileName,
3095 "action4": "refresh",
3096 "lvName4": parent.fileName}
3097 for slave in slaves:
3098 Util.log("Updating %s, %s, %s on slave %s" % \
3099 (tmpName, child.fileName, parent.fileName,
3100 self.xapi.getRecordHost(slave)['hostname']))
3101 text = self.xapi.session.xenapi.host.call_plugin( \
3102 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args)
3103 Util.log("call-plugin returned: '%s'" % text)
3105 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid):
3106 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid])
3107 if not slaves:
3108 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi)
3109 return
3111 args = {"vgName": self.vgName,
3112 "action1": "deactivateNoRefcount",
3113 "lvName1": oldNameLV,
3114 "action2": "refresh",
3115 "lvName2": vdi.fileName,
3116 "action3": "cleanupLockAndRefcount",
3117 "uuid3": origParentUuid,
3118 "ns3": lvhdutil.NS_PREFIX_LVM + self.uuid}
3119 for slave in slaves:
3120 Util.log("Updating %s to %s on slave %s" % \
3121 (oldNameLV, vdi.fileName,
3122 self.xapi.getRecordHost(slave)['hostname']))
3123 text = self.xapi.session.xenapi.host.call_plugin( \
3124 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args)
3125 Util.log("call-plugin returned: '%s'" % text)
3127 def _updateSlavesOnResize(self, vdi):
3128 uuids = [x.uuid for x in vdi.getAllLeaves()]
3129 slaves = util.get_slaves_attached_on(self.xapi.session, uuids)
3130 if not slaves:
3131 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi)
3132 return
3133 lvhdutil.lvRefreshOnSlaves(self.xapi.session, self.uuid, self.vgName,
3134 vdi.fileName, vdi.uuid, slaves)
3137class LinstorSR(SR):
3138 TYPE = SR.TYPE_LINSTOR
3140 def __init__(self, uuid, xapi, createLock, force):
3141 if not LINSTOR_AVAILABLE:
3142 raise util.SMException(
3143 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing'
3144 )
3146 SR.__init__(self, uuid, xapi, createLock, force)
3147 self.path = LinstorVolumeManager.DEV_ROOT_PATH
3148 self._reloadLinstor()
3150 def deleteVDI(self, vdi):
3151 self._checkSlaves(vdi)
3152 SR.deleteVDI(self, vdi)
3154 def getFreeSpace(self):
3155 return self._linstor.max_volume_size_allowed
3157 def scan(self, force=False):
3158 all_vdi_info = self._scan(force)
3159 for uuid, vdiInfo in all_vdi_info.items():
3160 # When vdiInfo is None, the VDI is RAW.
3161 vdi = self.getVDI(uuid)
3162 if not vdi:
3163 self.logFilter.logNewVDI(uuid)
3164 vdi = LinstorVDI(self, uuid, not vdiInfo)
3165 self.vdis[uuid] = vdi
3166 if vdiInfo:
3167 vdi.load(vdiInfo)
3168 self._removeStaleVDIs(all_vdi_info.keys())
3169 self._buildTree(force)
3170 self.logFilter.logState()
3171 self._handleInterruptedCoalesceLeaf()
3173 def pauseVDIs(self, vdiList):
3174 self._linstor.ensure_volume_list_is_not_locked(
3175 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT
3176 )
3177 return super(LinstorSR, self).pauseVDIs(vdiList)
3179 def _reloadLinstor(self):
3180 session = self.xapi.session
3181 host_ref = util.get_this_host_ref(session)
3182 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid)
3184 pbd = util.find_my_pbd(session, host_ref, sr_ref)
3185 if pbd is None:
3186 raise util.SMException('Failed to find PBD')
3188 dconf = session.xenapi.PBD.get_device_config(pbd)
3189 group_name = dconf['group-name']
3191 controller_uri = get_controller_uri()
3192 self.journaler = LinstorJournaler(
3193 controller_uri, group_name, logger=util.SMlog
3194 )
3196 self._linstor = LinstorVolumeManager(
3197 controller_uri,
3198 group_name,
3199 repair=True,
3200 logger=util.SMlog
3201 )
3202 self._vhdutil = LinstorVhdUtil(session, self._linstor)
3204 def _scan(self, force):
3205 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3206 self._reloadLinstor()
3207 error = False
3208 try:
3209 all_vdi_info = self._load_vdi_info()
3210 for uuid, vdiInfo in all_vdi_info.items():
3211 if vdiInfo and vdiInfo.error:
3212 error = True
3213 break
3214 if not error:
3215 return all_vdi_info
3216 Util.log('Scan error, retrying ({})'.format(i))
3217 except Exception as e:
3218 Util.log('Scan exception, retrying ({}): {}'.format(i, e))
3219 Util.log(traceback.format_exc())
3221 if force:
3222 return all_vdi_info
3223 raise util.SMException('Scan error')
3225 def _load_vdi_info(self):
3226 all_vdi_info = {}
3228 # TODO: Ensure metadata contains the right info.
3230 all_volume_info = self._linstor.get_volumes_with_info()
3231 volumes_metadata = self._linstor.get_volumes_with_metadata()
3232 for vdi_uuid, volume_info in all_volume_info.items():
3233 try:
3234 volume_metadata = volumes_metadata[vdi_uuid]
3235 if not volume_info.name and not list(volume_metadata.items()):
3236 continue # Ignore it, probably deleted.
3238 if vdi_uuid.startswith('DELETED_'):
3239 # Assume it's really a RAW volume of a failed snap without VHD header/footer.
3240 # We must remove this VDI now without adding it in the VDI list.
3241 # Otherwise `Relinking` calls and other actions can be launched on it.
3242 # We don't want that...
3243 Util.log('Deleting bad VDI {}'.format(vdi_uuid))
3245 self.lock()
3246 try:
3247 self._linstor.destroy_volume(vdi_uuid)
3248 try:
3249 self.forgetVDI(vdi_uuid)
3250 except:
3251 pass
3252 except Exception as e:
3253 Util.log('Cannot delete bad VDI: {}'.format(e))
3254 finally:
3255 self.unlock()
3256 continue
3258 vdi_type = volume_metadata.get(VDI_TYPE_TAG)
3259 volume_name = self._linstor.get_volume_name(vdi_uuid)
3260 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX):
3261 # Always RAW!
3262 info = None
3263 elif vdi_type == vhdutil.VDI_TYPE_VHD:
3264 info = self._vhdutil.get_vhd_info(vdi_uuid)
3265 else:
3266 # Ensure it's not a VHD...
3267 try:
3268 info = self._vhdutil.get_vhd_info(vdi_uuid)
3269 except:
3270 try:
3271 self._vhdutil.force_repair(
3272 self._linstor.get_device_path(vdi_uuid)
3273 )
3274 info = self._vhdutil.get_vhd_info(vdi_uuid)
3275 except:
3276 info = None
3278 except Exception as e:
3279 Util.log(
3280 ' [VDI {}: failed to load VDI info]: {}'
3281 .format(vdi_uuid, e)
3282 )
3283 info = vhdutil.VHDInfo(vdi_uuid)
3284 info.error = 1
3286 all_vdi_info[vdi_uuid] = info
3288 return all_vdi_info
3290 def _prepareCoalesceLeaf(self, vdi):
3291 vdi._activateChain()
3292 vdi.deflate()
3293 vdi._inflateParentForCoalesce()
3295 def _finishCoalesceLeaf(self, parent):
3296 if not parent.isSnapshot() or parent.isAttachedRW():
3297 parent.inflateFully()
3298 else:
3299 parent.deflate()
3301 def _calcExtraSpaceNeeded(self, child, parent):
3302 return LinstorVhdUtil.compute_volume_size(parent.sizeVirt, parent.vdi_type) - parent.getDrbdSize()
3304 def _hasValidDevicePath(self, uuid):
3305 try:
3306 self._linstor.get_device_path(uuid)
3307 except Exception:
3308 # TODO: Maybe log exception.
3309 return False
3310 return True
3312 def _liveLeafCoalesce(self, vdi):
3313 self.lock()
3314 try:
3315 self._linstor.ensure_volume_is_not_locked(
3316 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT
3317 )
3318 return super(LinstorSR, self)._liveLeafCoalesce(vdi)
3319 finally:
3320 self.unlock()
3322 def _handleInterruptedCoalesceLeaf(self):
3323 entries = self.journaler.get_all(VDI.JRN_LEAF)
3324 for uuid, parentUuid in entries.items():
3325 if self._hasValidDevicePath(parentUuid) or \
3326 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid):
3327 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3328 else:
3329 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3330 self.journaler.remove(VDI.JRN_LEAF, uuid)
3331 vdi = self.getVDI(uuid)
3332 if vdi:
3333 vdi.ensureUnpaused()
3335 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3336 Util.log('*** UNDO LEAF-COALESCE')
3337 parent = self.getVDI(parentUuid)
3338 if not parent:
3339 parent = self.getVDI(childUuid)
3340 if not parent:
3341 raise util.SMException(
3342 'Neither {} nor {} found'.format(parentUuid, childUuid)
3343 )
3344 Util.log(
3345 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid)
3346 )
3347 parent.rename(parentUuid)
3349 child = self.getVDI(childUuid)
3350 if not child:
3351 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3352 if not child:
3353 raise util.SMException(
3354 'Neither {} nor {} found'.format(
3355 childUuid, self.TMP_RENAME_PREFIX + childUuid
3356 )
3357 )
3358 Util.log('Renaming child back to {}'.format(childUuid))
3359 child.rename(childUuid)
3360 Util.log('Updating the VDI record')
3361 child.setConfig(VDI.DB_VHD_PARENT, parentUuid)
3362 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD)
3364 # TODO: Maybe deflate here.
3366 if child.hidden:
3367 child._setHidden(False)
3368 if not parent.hidden:
3369 parent._setHidden(True)
3370 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3371 Util.log('*** leaf-coalesce undo successful')
3373 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3374 Util.log('*** FINISH LEAF-COALESCE')
3375 vdi = self.getVDI(childUuid)
3376 if not vdi:
3377 raise util.SMException('VDI {} not found'.format(childUuid))
3378 # TODO: Maybe inflate.
3379 try:
3380 self.forgetVDI(parentUuid)
3381 except XenAPI.Failure:
3382 pass
3383 self._updateSlavesOnResize(vdi)
3384 Util.log('*** finished leaf-coalesce successfully')
3386 def _checkSlaves(self, vdi):
3387 try:
3388 all_openers = self._linstor.get_volume_openers(vdi.uuid)
3389 for openers in all_openers.values():
3390 for opener in openers.values():
3391 if opener['process-name'] != 'tapdisk':
3392 raise util.SMException(
3393 'VDI {} is in use: {}'.format(vdi.uuid, all_openers)
3394 )
3395 except LinstorVolumeManagerError as e:
3396 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS:
3397 raise
3400################################################################################
3401#
3402# Helpers
3403#
3404def daemonize():
3405 pid = os.fork()
3406 if pid:
3407 os.waitpid(pid, 0)
3408 Util.log("New PID [%d]" % pid)
3409 return False
3410 os.chdir("/")
3411 os.setsid()
3412 pid = os.fork()
3413 if pid:
3414 Util.log("Will finish as PID [%d]" % pid)
3415 os._exit(0)
3416 for fd in [0, 1, 2]:
3417 try:
3418 os.close(fd)
3419 except OSError:
3420 pass
3421 # we need to fill those special fd numbers or pread won't work
3422 sys.stdin = open("/dev/null", 'r')
3423 sys.stderr = open("/dev/null", 'w')
3424 sys.stdout = open("/dev/null", 'w')
3425 # As we're a new process we need to clear the lock objects
3426 lock.Lock.clearAll()
3427 return True
3430def normalizeType(type):
3431 if type in LVHDSR.SUBTYPES:
3432 type = SR.TYPE_LVHD
3433 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]:
3434 # temporary while LVHD is symlinked as LVM
3435 type = SR.TYPE_LVHD
3436 if type in [
3437 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs",
3438 "moosefs", "xfs", "zfs", "largeblock"
3439 ]:
3440 type = SR.TYPE_FILE
3441 if type in ["linstor"]:
3442 type = SR.TYPE_LINSTOR
3443 if type not in SR.TYPES:
3444 raise util.SMException("Unsupported SR type: %s" % type)
3445 return type
3447GCPAUSE_DEFAULT_SLEEP = 5 * 60
3450def _gc_init_file(sr_uuid):
3451 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init')
3454def _create_init_file(sr_uuid):
3455 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid)))
3456 with open(os.path.join(
3457 NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init'), 'w+') as f:
3458 f.write('1')
3461def _gcLoopPause(sr, dryRun=False, immediate=False):
3462 if immediate:
3463 return
3465 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist
3466 # point will just return. Otherwise, fall back on an abortable sleep.
3468 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT):
3470 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3470 ↛ exitline 3470 didn't jump to the function exit
3471 lambda *args: None)
3472 elif os.path.exists(_gc_init_file(sr.uuid)):
3473 def abortTest():
3474 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT)
3476 # If time.sleep hangs we are in deep trouble, however for
3477 # completeness we set the timeout of the abort thread to
3478 # 110% of GCPAUSE_DEFAULT_SLEEP.
3479 Util.log("GC active, about to go quiet")
3480 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3480 ↛ exitline 3480 didn't run the lambda on line 3480
3481 None, sr.uuid, abortTest, VDI.POLL_INTERVAL,
3482 GCPAUSE_DEFAULT_SLEEP * 1.1)
3483 Util.log("GC active, quiet period ended")
3486def _gcLoop(sr, dryRun=False, immediate=False):
3487 if not lockGCActive.acquireNoblock(): 3487 ↛ 3488line 3487 didn't jump to line 3488, because the condition on line 3487 was never true
3488 Util.log("Another GC instance already active, exiting")
3489 return
3491 # Check we're still attached after acquiring locks
3492 if not sr.xapi.isPluggedHere():
3493 Util.log("SR no longer attached, exiting")
3494 return
3496 # Clean up Intellicache files
3497 sr.cleanupCache()
3499 # Track how many we do
3500 coalesced = 0
3501 task_status = "success"
3502 try:
3503 # Check if any work needs to be done
3504 if not sr.xapi.isPluggedHere(): 3504 ↛ 3505line 3504 didn't jump to line 3505, because the condition on line 3504 was never true
3505 Util.log("SR no longer attached, exiting")
3506 return
3507 sr.scanLocked()
3508 if not sr.hasWork():
3509 Util.log("No work, exiting")
3510 return
3511 sr.xapi.create_task(
3512 "Garbage Collection",
3513 "Garbage collection for SR %s" % sr.uuid)
3514 _gcLoopPause(sr, dryRun, immediate=immediate)
3515 while True:
3516 if not sr.xapi.isPluggedHere(): 3516 ↛ 3517line 3516 didn't jump to line 3517, because the condition on line 3516 was never true
3517 Util.log("SR no longer attached, exiting")
3518 break
3519 sr.scanLocked()
3520 if not sr.hasWork():
3521 Util.log("No work, exiting")
3522 break
3524 if not lockGCRunning.acquireNoblock(): 3524 ↛ 3525line 3524 didn't jump to line 3525, because the condition on line 3524 was never true
3525 Util.log("Unable to acquire GC running lock.")
3526 return
3527 try:
3528 if not sr.gcEnabled(): 3528 ↛ 3529line 3528 didn't jump to line 3529, because the condition on line 3528 was never true
3529 break
3531 sr.xapi.update_task_progress("done", coalesced)
3533 sr.cleanupCoalesceJournals()
3534 # Create the init file here in case startup is waiting on it
3535 _create_init_file(sr.uuid)
3536 sr.scanLocked()
3537 sr.updateBlockInfo()
3539 howmany = len(sr.findGarbage())
3540 if howmany > 0:
3541 Util.log("Found %d orphaned vdis" % howmany)
3542 sr.lock()
3543 try:
3544 sr.garbageCollect(dryRun)
3545 finally:
3546 sr.unlock()
3547 sr.xapi.srUpdate()
3549 candidate = sr.findCoalesceable()
3550 if candidate:
3551 util.fistpoint.activate(
3552 "LVHDRT_finding_a_suitable_pair", sr.uuid)
3553 sr.coalesce(candidate, dryRun)
3554 sr.xapi.srUpdate()
3555 coalesced += 1
3556 continue
3558 candidate = sr.findLeafCoalesceable()
3559 if candidate: 3559 ↛ 3566line 3559 didn't jump to line 3566, because the condition on line 3559 was never false
3560 sr.coalesceLeaf(candidate, dryRun)
3561 sr.xapi.srUpdate()
3562 coalesced += 1
3563 continue
3565 finally:
3566 lockGCRunning.release() 3566 ↛ 3571line 3566 didn't jump to line 3571, because the break on line 3529 wasn't executed
3567 except:
3568 task_status = "failure"
3569 raise
3570 finally:
3571 sr.xapi.set_task_status(task_status)
3572 Util.log("GC process exiting, no work left")
3573 _create_init_file(sr.uuid)
3574 lockGCActive.release()
3577def _xapi_enabled(session, hostref):
3578 host = session.xenapi.host.get_record(hostref)
3579 return host['enabled']
3582def _ensure_xapi_initialised(session):
3583 """
3584 Don't want to start GC until Xapi is fully initialised
3585 """
3586 local_session = None
3587 if session is None:
3588 local_session = util.get_localAPI_session()
3589 session = local_session
3591 try:
3592 hostref = session.xenapi.host.get_by_uuid(util.get_this_host())
3593 while not _xapi_enabled(session, hostref):
3594 util.SMlog("Xapi not ready, GC waiting")
3595 time.sleep(15)
3596 finally:
3597 if local_session is not None:
3598 local_session.xenapi.session.logout()
3600def _gc(session, srUuid, dryRun=False, immediate=False):
3601 init(srUuid)
3602 _ensure_xapi_initialised(session)
3603 sr = SR.getInstance(srUuid, session)
3604 if not sr.gcEnabled(False): 3604 ↛ 3605line 3604 didn't jump to line 3605, because the condition on line 3604 was never true
3605 return
3607 try:
3608 _gcLoop(sr, dryRun, immediate=immediate)
3609 finally:
3610 sr.cleanup()
3611 sr.logFilter.logState()
3612 del sr.xapi
3615def _abort(srUuid, soft=False):
3616 """Aborts an GC/coalesce.
3618 srUuid: the UUID of the SR whose GC/coalesce must be aborted
3619 soft: If set to True and there is a pending abort signal, the function
3620 doesn't do anything. If set to False, a new abort signal is issued.
3622 returns: If soft is set to False, we return True holding lockGCActive. If
3623 soft is set to False and an abort signal is pending, we return False
3624 without holding lockGCActive. An exception is raised in case of error."""
3625 Util.log("=== SR %s: abort ===" % (srUuid))
3626 init(srUuid)
3627 if not lockGCActive.acquireNoblock():
3628 gotLock = False
3629 Util.log("Aborting currently-running instance (SR %s)" % srUuid)
3630 abortFlag = IPCFlag(srUuid)
3631 if not abortFlag.set(FLAG_TYPE_ABORT, soft):
3632 return False
3633 for i in range(SR.LOCK_RETRY_ATTEMPTS):
3634 gotLock = lockGCActive.acquireNoblock()
3635 if gotLock:
3636 break
3637 time.sleep(SR.LOCK_RETRY_INTERVAL)
3638 abortFlag.clear(FLAG_TYPE_ABORT)
3639 if not gotLock:
3640 raise util.CommandException(code=errno.ETIMEDOUT,
3641 reason="SR %s: error aborting existing process" % srUuid)
3642 return True
3645def init(srUuid):
3646 global lockGCRunning
3647 if not lockGCRunning: 3647 ↛ 3648line 3647 didn't jump to line 3648, because the condition on line 3647 was never true
3648 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid)
3649 global lockGCActive
3650 if not lockGCActive: 3650 ↛ 3651line 3650 didn't jump to line 3651, because the condition on line 3650 was never true
3651 lockGCActive = LockActive(srUuid)
3654class LockActive:
3655 """
3656 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired
3657 if another process holds the SR lock.
3658 """
3659 def __init__(self, srUuid):
3660 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid)
3661 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, srUuid)
3663 def acquireNoblock(self):
3664 self._srLock.acquire()
3666 try:
3667 return self._lock.acquireNoblock()
3668 finally:
3669 self._srLock.release()
3671 def release(self):
3672 self._lock.release()
3675def usage():
3676 output = """Garbage collect and/or coalesce VHDs in a VHD-based SR
3678Parameters:
3679 -u --uuid UUID SR UUID
3680 and one of:
3681 -g --gc garbage collect, coalesce, and repeat while there is work
3682 -G --gc_force garbage collect once, aborting any current operations
3683 -c --cache-clean <max_age> clean up IntelliCache cache files older than
3684 max_age hours
3685 -a --abort abort any currently running operation (GC or coalesce)
3686 -q --query query the current state (GC'ing, coalescing or not running)
3687 -x --disable disable GC/coalesce (will be in effect until you exit)
3688 -t --debug see Debug below
3690Options:
3691 -b --background run in background (return immediately) (valid for -g only)
3692 -f --force continue in the presence of VHDs with errors (when doing
3693 GC, this might cause removal of any such VHDs) (only valid
3694 for -G) (DANGEROUS)
3696Debug:
3697 The --debug parameter enables manipulation of LVHD VDIs for debugging
3698 purposes. ** NEVER USE IT ON A LIVE VM **
3699 The following parameters are required:
3700 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate",
3701 "deflate".
3702 -v --vdi_uuid VDI UUID
3703 """
3704 #-d --dry-run don't actually perform any SR-modifying operations
3705 print(output)
3706 Util.log("(Invalid usage)")
3707 sys.exit(1)
3710##############################################################################
3711#
3712# API
3713#
3714def abort(srUuid, soft=False):
3715 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair.
3716 """
3717 if _abort(srUuid, soft):
3718 Util.log("abort: releasing the process lock")
3719 lockGCActive.release()
3720 return True
3721 else:
3722 return False
3725def gc(session, srUuid, inBackground, dryRun=False):
3726 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return
3727 immediately if inBackground=True.
3729 The following algorithm is used:
3730 1. If we are already GC'ing in this SR, return
3731 2. If we are already coalescing a VDI pair:
3732 a. Scan the SR and determine if the VDI pair is GC'able
3733 b. If the pair is not GC'able, return
3734 c. If the pair is GC'able, abort coalesce
3735 3. Scan the SR
3736 4. If there is nothing to collect, nor to coalesce, return
3737 5. If there is something to collect, GC all, then goto 3
3738 6. If there is something to coalesce, coalesce one pair, then goto 3
3739 """
3740 Util.log("=== SR %s: gc ===" % srUuid)
3741 if inBackground:
3742 if daemonize(): 3742 ↛ exitline 3742 didn't return from function 'gc', because the condition on line 3742 was never false
3743 # we are now running in the background. Catch & log any errors
3744 # because there is no other way to propagate them back at this
3745 # point
3747 try:
3748 _gc(None, srUuid, dryRun)
3749 except AbortException:
3750 Util.log("Aborted")
3751 except Exception:
3752 Util.logException("gc")
3753 Util.log("* * * * * SR %s: ERROR\n" % srUuid)
3754 os._exit(0)
3755 else:
3756 _gc(session, srUuid, dryRun, immediate=True)
3759def start_gc(session, sr_uuid):
3760 """
3761 This function is used to try to start a backgrounded GC session by forking
3762 the current process. If using the systemd version, call start_gc_service() instead.
3763 """
3764 # don't bother if an instance already running (this is just an
3765 # optimization to reduce the overhead of forking a new process if we
3766 # don't have to, but the process will check the lock anyways)
3767 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid)
3768 if not lockRunning.acquireNoblock():
3769 if should_preempt(session, sr_uuid):
3770 util.SMlog("Aborting currently-running coalesce of garbage VDI")
3771 try:
3772 if not abort(sr_uuid, soft=True):
3773 util.SMlog("The GC has already been scheduled to re-start")
3774 except util.CommandException as e:
3775 if e.code != errno.ETIMEDOUT:
3776 raise
3777 util.SMlog('failed to abort the GC')
3778 else:
3779 util.SMlog("A GC instance already running, not kicking")
3780 return
3781 else:
3782 lockRunning.release()
3784 util.SMlog(f"Starting GC file is {__file__}")
3785 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'],
3786 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
3788def start_gc_service(sr_uuid, wait=False):
3789 """
3790 This starts the templated systemd service which runs GC on the given SR UUID.
3791 If the service was already started, this is a no-op.
3793 Because the service is a one-shot with RemainAfterExit=no, when called with
3794 wait=True this will run the service synchronously and will not return until the
3795 run has finished. This is used to force a run of the GC instead of just kicking it
3796 in the background.
3797 """
3798 sr_uuid_esc = sr_uuid.replace("-", "\\x2d")
3799 util.SMlog(f"Kicking SMGC@{sr_uuid}...")
3800 cmd=[ "/usr/bin/systemctl", "--quiet" ]
3801 if not wait: 3801 ↛ 3803line 3801 didn't jump to line 3803, because the condition on line 3801 was never false
3802 cmd.append("--no-block")
3803 cmd += ["start", f"SMGC@{sr_uuid_esc}"]
3804 subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
3807def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False):
3808 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure
3809 the SR lock is held.
3810 The following algorithm is used:
3811 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce
3812 2. Scan the SR
3813 3. GC
3814 4. return
3815 """
3816 Util.log("=== SR %s: gc_force ===" % srUuid)
3817 init(srUuid)
3818 sr = SR.getInstance(srUuid, session, lockSR, True)
3819 if not lockGCActive.acquireNoblock():
3820 abort(srUuid)
3821 else:
3822 Util.log("Nothing was running, clear to proceed")
3824 if force:
3825 Util.log("FORCED: will continue even if there are VHD errors")
3826 sr.scanLocked(force)
3827 sr.cleanupCoalesceJournals()
3829 try:
3830 sr.cleanupCache()
3831 sr.garbageCollect(dryRun)
3832 finally:
3833 sr.cleanup()
3834 sr.logFilter.logState()
3835 lockGCActive.release()
3838def get_state(srUuid):
3839 """Return whether GC/coalesce is currently running or not. This asks systemd for
3840 the state of the templated SMGC service and will return True if it is "activating"
3841 or "running" (for completeness, as in practice it will never achieve the latter state)
3842 """
3843 sr_uuid_esc = srUuid.replace("-", "\\x2d")
3844 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"]
3845 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
3846 state = result.stdout.decode('utf-8').rstrip()
3847 if state == "activating" or state == "running":
3848 return True
3849 return False
3852def should_preempt(session, srUuid):
3853 sr = SR.getInstance(srUuid, session)
3854 entries = sr.journaler.getAll(VDI.JRN_COALESCE)
3855 if len(entries) == 0:
3856 return False
3857 elif len(entries) > 1:
3858 raise util.SMException("More than one coalesce entry: " + str(entries))
3859 sr.scanLocked()
3860 coalescedUuid = entries.popitem()[0]
3861 garbage = sr.findGarbage()
3862 for vdi in garbage:
3863 if vdi.uuid == coalescedUuid:
3864 return True
3865 return False
3868def get_coalesceable_leaves(session, srUuid, vdiUuids):
3869 coalesceable = []
3870 sr = SR.getInstance(srUuid, session)
3871 sr.scanLocked()
3872 for uuid in vdiUuids:
3873 vdi = sr.getVDI(uuid)
3874 if not vdi:
3875 raise util.SMException("VDI %s not found" % uuid)
3876 if vdi.isLeafCoalesceable():
3877 coalesceable.append(uuid)
3878 return coalesceable
3881def cache_cleanup(session, srUuid, maxAge):
3882 sr = SR.getInstance(srUuid, session)
3883 return sr.cleanupCache(maxAge)
3886def debug(sr_uuid, cmd, vdi_uuid):
3887 Util.log("Debug command: %s" % cmd)
3888 sr = SR.getInstance(sr_uuid, None)
3889 if not isinstance(sr, LVHDSR):
3890 print("Error: not an LVHD SR")
3891 return
3892 sr.scanLocked()
3893 vdi = sr.getVDI(vdi_uuid)
3894 if not vdi:
3895 print("Error: VDI %s not found")
3896 return
3897 print("Running %s on SR %s" % (cmd, sr))
3898 print("VDI before: %s" % vdi)
3899 if cmd == "activate":
3900 vdi._activate()
3901 print("VDI file: %s" % vdi.path)
3902 if cmd == "deactivate":
3903 ns = lvhdutil.NS_PREFIX_LVM + sr.uuid
3904 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False)
3905 if cmd == "inflate":
3906 vdi.inflateFully()
3907 sr.cleanup()
3908 if cmd == "deflate":
3909 vdi.deflate()
3910 sr.cleanup()
3911 sr.scanLocked()
3912 print("VDI after: %s" % vdi)
3915def abort_optional_reenable(uuid):
3916 print("Disabling GC/coalesce for %s" % uuid)
3917 ret = _abort(uuid)
3918 input("Press enter to re-enable...")
3919 print("GC/coalesce re-enabled")
3920 lockGCRunning.release()
3921 if ret:
3922 lockGCActive.release()
3925##############################################################################
3926#
3927# CLI
3928#
3929def main():
3930 action = ""
3931 uuid = ""
3932 background = False
3933 force = False
3934 dryRun = False
3935 debug_cmd = ""
3936 vdi_uuid = ""
3937 shortArgs = "gGc:aqxu:bfdt:v:"
3938 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable",
3939 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="]
3941 try:
3942 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs)
3943 except getopt.GetoptError:
3944 usage()
3945 for o, a in opts:
3946 if o in ("-g", "--gc"):
3947 action = "gc"
3948 if o in ("-G", "--gc_force"):
3949 action = "gc_force"
3950 if o in ("-c", "--clean_cache"):
3951 action = "clean_cache"
3952 maxAge = int(a)
3953 if o in ("-a", "--abort"):
3954 action = "abort"
3955 if o in ("-q", "--query"):
3956 action = "query"
3957 if o in ("-x", "--disable"):
3958 action = "disable"
3959 if o in ("-u", "--uuid"):
3960 uuid = a
3961 if o in ("-b", "--background"):
3962 background = True
3963 if o in ("-f", "--force"):
3964 force = True
3965 if o in ("-d", "--dry-run"):
3966 Util.log("Dry run mode")
3967 dryRun = True
3968 if o in ("-t", "--debug"):
3969 action = "debug"
3970 debug_cmd = a
3971 if o in ("-v", "--vdi_uuid"):
3972 vdi_uuid = a
3974 if not action or not uuid:
3975 usage()
3976 if action == "debug" and not (debug_cmd and vdi_uuid) or \
3977 action != "debug" and (debug_cmd or vdi_uuid):
3978 usage()
3980 if action != "query" and action != "debug":
3981 print("All output goes to log")
3983 if action == "gc":
3984 gc(None, uuid, background, dryRun)
3985 elif action == "gc_force":
3986 gc_force(None, uuid, force, dryRun, True)
3987 elif action == "clean_cache":
3988 cache_cleanup(None, uuid, maxAge)
3989 elif action == "abort":
3990 abort(uuid)
3991 elif action == "query":
3992 print("Currently running: %s" % get_state(uuid))
3993 elif action == "disable":
3994 abort_optional_reenable(uuid)
3995 elif action == "debug":
3996 debug(uuid, debug_cmd, vdi_uuid)
3999if __name__ == '__main__': 3999 ↛ 4000line 3999 didn't jump to line 4000, because the condition on line 3999 was never true
4000 main()