Coverage for drivers/LinstorSR.py : 0%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python3
2#
3# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
17from constants import CBTLOG_TAG
19try:
20 from linstorjournaler import LinstorJournaler
21 from linstorvhdutil import LinstorVhdUtil
22 from linstorvolumemanager import get_controller_uri
23 from linstorvolumemanager import get_controller_node_name
24 from linstorvolumemanager import LinstorVolumeManager
25 from linstorvolumemanager import LinstorVolumeManagerError
27 LINSTOR_AVAILABLE = True
28except ImportError:
29 LINSTOR_AVAILABLE = False
31from lock import Lock
32import blktap2
33import cleanup
34import distutils
35import errno
36import functools
37import lvutil
38import os
39import re
40import scsiutil
41import signal
42import socket
43import SR
44import SRCommand
45import subprocess
46import time
47import traceback
48import util
49import VDI
50import vhdutil
51import xml.etree.ElementTree as xml_parser
52import xmlrpc.client
53import xs_errors
55from srmetadata import \
56 NAME_LABEL_TAG, NAME_DESCRIPTION_TAG, IS_A_SNAPSHOT_TAG, SNAPSHOT_OF_TAG, \
57 TYPE_TAG, VDI_TYPE_TAG, READ_ONLY_TAG, SNAPSHOT_TIME_TAG, \
58 METADATA_OF_POOL_TAG
60HIDDEN_TAG = 'hidden'
62XHA_CONFIG_PATH = '/etc/xensource/xhad.conf'
64FORK_LOG_DAEMON = '/opt/xensource/libexec/fork-log-daemon'
66# This flag can be disabled to debug the DRBD layer.
67# When this config var is False, the HA can only be used under
68# specific conditions:
69# - Only one heartbeat diskless VDI is present in the pool.
70# - The other hearbeat volumes must be diskful and limited to a maximum of 3.
71USE_HTTP_NBD_SERVERS = True
73# Useful flag to trace calls using cProfile.
74TRACE_PERFS = False
76# Enable/Disable VHD key hash support.
77USE_KEY_HASH = False
79# ==============================================================================
81# TODO: Supports 'VDI_INTRODUCE', 'VDI_RESET_ON_BOOT/2', 'SR_TRIM',
82# 'VDI_CONFIG_CBT', 'SR_PROBE'
84CAPABILITIES = [
85 'ATOMIC_PAUSE',
86 'SR_UPDATE',
87 'VDI_CREATE',
88 'VDI_DELETE',
89 'VDI_UPDATE',
90 'VDI_ATTACH',
91 'VDI_DETACH',
92 'VDI_ACTIVATE',
93 'VDI_DEACTIVATE',
94 'VDI_CLONE',
95 'VDI_MIRROR',
96 'VDI_RESIZE',
97 'VDI_SNAPSHOT',
98 'VDI_GENERATE_CONFIG'
99]
101CONFIGURATION = [
102 ['group-name', 'LVM group name'],
103 ['redundancy', 'replication count'],
104 ['provisioning', '"thin" or "thick" are accepted (optional, defaults to thin)'],
105 ['monitor-db-quorum', 'disable controller when only one host is online (optional, defaults to true)']
106]
108DRIVER_INFO = {
109 'name': 'LINSTOR resources on XCP-ng',
110 'description': 'SR plugin which uses Linstor to manage VDIs',
111 'vendor': 'Vates',
112 'copyright': '(C) 2020 Vates',
113 'driver_version': '1.0',
114 'required_api_version': '1.0',
115 'capabilities': CAPABILITIES,
116 'configuration': CONFIGURATION
117}
119DRIVER_CONFIG = {'ATTACH_FROM_CONFIG_WITH_TAPDISK': False}
121OPS_EXCLUSIVE = [
122 'sr_create', 'sr_delete', 'sr_attach', 'sr_detach', 'sr_scan',
123 'sr_update', 'sr_probe', 'vdi_init', 'vdi_create', 'vdi_delete',
124 'vdi_attach', 'vdi_detach', 'vdi_clone', 'vdi_snapshot',
125]
127# ==============================================================================
128# Misc helpers used by LinstorSR and linstor-thin plugin.
129# ==============================================================================
132def compute_volume_size(virtual_size, image_type):
133 if image_type == vhdutil.VDI_TYPE_VHD:
134 # All LINSTOR VDIs have the metadata area preallocated for
135 # the maximum possible virtual size (for fast online VDI.resize).
136 meta_overhead = vhdutil.calcOverheadEmpty(LinstorVDI.MAX_SIZE)
137 bitmap_overhead = vhdutil.calcOverheadBitmap(virtual_size)
138 virtual_size += meta_overhead + bitmap_overhead
139 elif image_type != vhdutil.VDI_TYPE_RAW:
140 raise Exception('Invalid image type: {}'.format(image_type))
142 return LinstorVolumeManager.round_up_volume_size(virtual_size)
145def attach_thin(session, journaler, linstor, sr_uuid, vdi_uuid):
146 volume_metadata = linstor.get_volume_metadata(vdi_uuid)
147 image_type = volume_metadata.get(VDI_TYPE_TAG)
148 if image_type == vhdutil.VDI_TYPE_RAW:
149 return
151 lock = Lock(vhdutil.LOCK_TYPE_SR, sr_uuid)
152 try:
153 lock.acquire()
155 device_path = linstor.get_device_path(vdi_uuid)
157 # If the virtual VHD size is lower than the LINSTOR volume size,
158 # there is nothing to do.
159 vhd_size = compute_volume_size(
160 # TODO: Replace pylint comment with this feature when possible:
161 # https://github.com/PyCQA/pylint/pull/2926
162 LinstorVhdUtil(session, linstor).get_size_virt(vdi_uuid), # pylint: disable = E1120
163 image_type
164 )
166 volume_info = linstor.get_volume_info(vdi_uuid)
167 volume_size = volume_info.virtual_size
169 if vhd_size > volume_size:
170 inflate(
171 journaler, linstor, vdi_uuid, device_path,
172 vhd_size, volume_size
173 )
174 finally:
175 lock.release()
178def detach_thin_impl(session, linstor, sr_uuid, vdi_uuid):
179 volume_metadata = linstor.get_volume_metadata(vdi_uuid)
180 image_type = volume_metadata.get(VDI_TYPE_TAG)
181 if image_type == vhdutil.VDI_TYPE_RAW:
182 return
184 lock = Lock(vhdutil.LOCK_TYPE_SR, sr_uuid)
185 try:
186 lock.acquire()
188 def check_vbd_count():
189 vdi_ref = session.xenapi.VDI.get_by_uuid(vdi_uuid)
190 vbds = session.xenapi.VBD.get_all_records_where(
191 'field "VDI" = "{}"'.format(vdi_ref)
192 )
194 num_plugged = 0
195 for vbd_rec in vbds.values():
196 if vbd_rec['currently_attached']:
197 num_plugged += 1
198 if num_plugged > 1:
199 raise xs_errors.XenError(
200 'VDIUnavailable',
201 opterr='Cannot deflate VDI {}, already used by '
202 'at least 2 VBDs'.format(vdi_uuid)
203 )
205 # We can have multiple VBDs attached to a VDI during a VM-template clone.
206 # So we use a timeout to ensure that we can detach the volume properly.
207 util.retry(check_vbd_count, maxretry=10, period=1)
209 device_path = linstor.get_device_path(vdi_uuid)
210 new_volume_size = LinstorVolumeManager.round_up_volume_size(
211 # TODO: Replace pylint comment with this feature when possible:
212 # https://github.com/PyCQA/pylint/pull/2926
213 LinstorVhdUtil(session, linstor).get_size_phys(vdi_uuid) # pylint: disable = E1120
214 )
216 volume_info = linstor.get_volume_info(vdi_uuid)
217 old_volume_size = volume_info.virtual_size
218 deflate(
219 linstor, vdi_uuid, device_path, new_volume_size, old_volume_size
220 )
221 finally:
222 lock.release()
225def detach_thin(session, linstor, sr_uuid, vdi_uuid):
226 # This function must always return without errors.
227 # Otherwise it could cause errors in the XAPI regarding the state of the VDI.
228 # It's why we use this `try` block.
229 try:
230 detach_thin_impl(session, linstor, sr_uuid, vdi_uuid)
231 except Exception as e:
232 util.SMlog('Failed to detach properly VDI {}: {}'.format(vdi_uuid, e))
235def inflate(journaler, linstor, vdi_uuid, vdi_path, new_size, old_size):
236 # Only inflate if the LINSTOR volume capacity is not enough.
237 new_size = LinstorVolumeManager.round_up_volume_size(new_size)
238 if new_size <= old_size:
239 return
241 util.SMlog(
242 'Inflate {} (size={}, previous={})'
243 .format(vdi_uuid, new_size, old_size)
244 )
246 journaler.create(
247 LinstorJournaler.INFLATE, vdi_uuid, old_size
248 )
249 linstor.resize_volume(vdi_uuid, new_size)
251 result_size = linstor.get_volume_size(vdi_uuid)
252 if result_size < new_size:
253 util.SMlog(
254 'WARNING: Cannot inflate volume to {}B, result size: {}B'
255 .format(new_size, result_size)
256 )
258 if not util.zeroOut(
259 vdi_path, result_size - vhdutil.VHD_FOOTER_SIZE,
260 vhdutil.VHD_FOOTER_SIZE
261 ):
262 raise xs_errors.XenError(
263 'EIO',
264 opterr='Failed to zero out VHD footer {}'.format(vdi_path)
265 )
267 LinstorVhdUtil(None, linstor).set_size_phys(vdi_path, result_size, False)
268 journaler.remove(LinstorJournaler.INFLATE, vdi_uuid)
271def deflate(linstor, vdi_uuid, vdi_path, new_size, old_size):
272 new_size = LinstorVolumeManager.round_up_volume_size(new_size)
273 if new_size >= old_size:
274 return
276 util.SMlog(
277 'Deflate {} (new size={}, previous={})'
278 .format(vdi_uuid, new_size, old_size)
279 )
281 LinstorVhdUtil(None, linstor).set_size_phys(vdi_path, new_size)
282 # TODO: Change the LINSTOR volume size using linstor.resize_volume.
285IPS_XHA_CACHE = None
288def get_ips_from_xha_config_file():
289 if IPS_XHA_CACHE:
290 return IPS_XHA_CACHE
292 ips = dict()
293 host_id = None
294 try:
295 # Ensure there is no dirty read problem.
296 # For example if the HA is reloaded.
297 tree = util.retry(
298 lambda: xml_parser.parse(XHA_CONFIG_PATH),
299 maxretry=10,
300 period=1
301 )
302 except:
303 return (None, ips)
305 def parse_host_nodes(ips, node):
306 current_id = None
307 current_ip = None
309 for sub_node in node:
310 if sub_node.tag == 'IPaddress':
311 current_ip = sub_node.text
312 elif sub_node.tag == 'HostID':
313 current_id = sub_node.text
314 else:
315 continue
317 if current_id and current_ip:
318 ips[current_id] = current_ip
319 return
320 util.SMlog('Ill-formed XHA file, missing IPaddress or/and HostID')
322 def parse_common_config(ips, node):
323 for sub_node in node:
324 if sub_node.tag == 'host':
325 parse_host_nodes(ips, sub_node)
327 def parse_local_config(ips, node):
328 for sub_node in node:
329 if sub_node.tag == 'localhost':
330 for host_node in sub_node:
331 if host_node.tag == 'HostID':
332 return host_node.text
334 for node in tree.getroot():
335 if node.tag == 'common-config':
336 parse_common_config(ips, node)
337 elif node.tag == 'local-config':
338 host_id = parse_local_config(ips, node)
339 else:
340 continue
342 if ips and host_id:
343 break
345 return (host_id and ips.get(host_id), ips)
348def activate_lvm_group(group_name):
349 path = group_name.split('/')
350 assert path and len(path) <= 2
351 try:
352 lvutil.setActiveVG(path[0], True)
353 except Exception as e:
354 util.SMlog('Cannot active VG `{}`: {}'.format(path[0], e))
356# ==============================================================================
358# Usage example:
359# xe sr-create type=linstor name-label=linstor-sr
360# host-uuid=d2deba7a-c5ad-4de1-9a20-5c8df3343e93
361# device-config:group-name=vg_loop device-config:redundancy=2
364class LinstorSR(SR.SR):
365 DRIVER_TYPE = 'linstor'
367 PROVISIONING_TYPES = ['thin', 'thick']
368 PROVISIONING_DEFAULT = 'thin'
370 MANAGER_PLUGIN = 'linstor-manager'
372 INIT_STATUS_NOT_SET = 0
373 INIT_STATUS_IN_PROGRESS = 1
374 INIT_STATUS_OK = 2
375 INIT_STATUS_FAIL = 3
377 # --------------------------------------------------------------------------
378 # SR methods.
379 # --------------------------------------------------------------------------
381 @staticmethod
382 def handles(type):
383 return type == LinstorSR.DRIVER_TYPE
385 def load(self, sr_uuid):
386 if not LINSTOR_AVAILABLE:
387 raise util.SMException(
388 'Can\'t load LinstorSR: LINSTOR libraries are missing'
389 )
391 # Check parameters.
392 if 'group-name' not in self.dconf or not self.dconf['group-name']:
393 raise xs_errors.XenError('LinstorConfigGroupNameMissing')
394 if 'redundancy' not in self.dconf or not self.dconf['redundancy']:
395 raise xs_errors.XenError('LinstorConfigRedundancyMissing')
397 self.driver_config = DRIVER_CONFIG
399 # Check provisioning config.
400 provisioning = self.dconf.get('provisioning')
401 if provisioning:
402 if provisioning in self.PROVISIONING_TYPES:
403 self._provisioning = provisioning
404 else:
405 raise xs_errors.XenError(
406 'InvalidArg',
407 opterr='Provisioning parameter must be one of {}'.format(
408 self.PROVISIONING_TYPES
409 )
410 )
411 else:
412 self._provisioning = self.PROVISIONING_DEFAULT
414 monitor_db_quorum = self.dconf.get('monitor-db-quorum')
415 self._monitor_db_quorum = (monitor_db_quorum is None) or \
416 distutils.util.strtobool(monitor_db_quorum)
418 # Note: We don't have access to the session field if the
419 # 'vdi_attach_from_config' command is executed.
420 self._has_session = self.sr_ref and self.session is not None
421 if self._has_session:
422 self.sm_config = self.session.xenapi.SR.get_sm_config(self.sr_ref)
423 else:
424 self.sm_config = self.srcmd.params.get('sr_sm_config') or {}
426 provisioning = self.sm_config.get('provisioning')
427 if provisioning in self.PROVISIONING_TYPES:
428 self._provisioning = provisioning
430 # Define properties for SR parent class.
431 self.ops_exclusive = OPS_EXCLUSIVE
432 self.path = LinstorVolumeManager.DEV_ROOT_PATH
433 self.lock = Lock(vhdutil.LOCK_TYPE_SR, self.uuid)
434 self.sr_vditype = SR.DEFAULT_TAP
436 if self.cmd == 'sr_create':
437 self._redundancy = int(self.dconf['redundancy']) or 1
438 self._linstor = None # Ensure that LINSTOR attribute exists.
439 self._journaler = None
441 self._is_master = False
442 if 'SRmaster' in self.dconf and self.dconf['SRmaster'] == 'true':
443 self._is_master = True
444 self._group_name = self.dconf['group-name']
446 self._vdi_shared_time = 0
448 self._init_status = self.INIT_STATUS_NOT_SET
450 self._vdis_loaded = False
451 self._all_volume_info_cache = None
452 self._all_volume_metadata_cache = None
454 def _locked_load(method):
455 def wrapped_method(self, *args, **kwargs):
456 self._init_status = self.INIT_STATUS_OK
457 return method(self, *args, **kwargs)
459 def load(self, *args, **kwargs):
460 # Activate all LVMs to make drbd-reactor happy.
461 if self.srcmd.cmd == 'sr_attach':
462 activate_lvm_group(self._group_name)
464 if not self._has_session:
465 if self.srcmd.cmd in (
466 'vdi_attach_from_config',
467 'vdi_detach_from_config',
468 # When on-slave (is_open) is executed we have an
469 # empty command.
470 None
471 ):
472 def create_linstor(uri, attempt_count=30):
473 self._linstor = LinstorVolumeManager(
474 uri,
475 self._group_name,
476 logger=util.SMlog,
477 attempt_count=attempt_count
478 )
479 # Only required if we are attaching from config using a non-special VDI.
480 # I.e. not an HA volume.
481 self._vhdutil = LinstorVhdUtil(self.session, self._linstor)
483 controller_uri = get_controller_uri()
484 if controller_uri:
485 create_linstor(controller_uri)
486 else:
487 def connect():
488 # We must have a valid LINSTOR instance here without using
489 # the XAPI. Fallback with the HA config file.
490 for ip in get_ips_from_xha_config_file()[1].values():
491 controller_uri = 'linstor://' + ip
492 try:
493 util.SMlog('Connecting from config to LINSTOR controller using: {}'.format(ip))
494 create_linstor(controller_uri, attempt_count=0)
495 return controller_uri
496 except:
497 pass
499 controller_uri = util.retry(connect, maxretry=30, period=1)
500 if not controller_uri:
501 raise xs_errors.XenError(
502 'SRUnavailable',
503 opterr='No valid controller URI to attach/detach from config'
504 )
506 self._journaler = LinstorJournaler(
507 controller_uri, self._group_name, logger=util.SMlog
508 )
510 if self.srcmd.cmd is None:
511 # Only useful on on-slave plugin (is_open).
512 self._vhdutil = LinstorVhdUtil(self.session, self._linstor)
514 return wrapped_method(self, *args, **kwargs)
516 if not self._is_master:
517 if self.cmd in [
518 'sr_create', 'sr_delete', 'sr_update', 'sr_probe',
519 'sr_scan', 'vdi_create', 'vdi_delete', 'vdi_resize',
520 'vdi_snapshot', 'vdi_clone'
521 ]:
522 util.SMlog('{} blocked for non-master'.format(self.cmd))
523 raise xs_errors.XenError('LinstorMaster')
525 # Because the LINSTOR KV objects cache all values, we must lock
526 # the VDI before the LinstorJournaler/LinstorVolumeManager
527 # instantiation and before any action on the master to avoid a
528 # bad read. The lock is also necessary to avoid strange
529 # behaviors if the GC is executed during an action on a slave.
530 if self.cmd.startswith('vdi_'):
531 self._shared_lock_vdi(self.srcmd.params['vdi_uuid'])
532 self._vdi_shared_time = time.time()
534 if self.srcmd.cmd != 'sr_create' and self.srcmd.cmd != 'sr_detach':
535 try:
536 self._reconnect()
537 except Exception as e:
538 raise xs_errors.XenError('SRUnavailable', opterr=str(e))
540 if self._linstor:
541 try:
542 hosts = self._linstor.disconnected_hosts
543 except Exception as e:
544 raise xs_errors.XenError('SRUnavailable', opterr=str(e))
546 if hosts:
547 util.SMlog('Failed to join node(s): {}'.format(hosts))
549 # Ensure we use a non-locked volume when vhdutil is called.
550 if (
551 self._is_master and self.cmd.startswith('vdi_') and
552 self.cmd != 'vdi_create'
553 ):
554 self._linstor.ensure_volume_is_not_locked(
555 self.srcmd.params['vdi_uuid']
556 )
558 try:
559 # If the command is a SR scan command on the master,
560 # we must load all VDIs and clean journal transactions.
561 # We must load the VDIs in the snapshot case too only if
562 # there is at least one entry in the journal.
563 #
564 # If the command is a SR command we want at least to remove
565 # resourceless volumes.
566 if self._is_master and self.cmd not in [
567 'vdi_attach', 'vdi_detach',
568 'vdi_activate', 'vdi_deactivate',
569 'vdi_epoch_begin', 'vdi_epoch_end',
570 'vdi_update', 'vdi_destroy'
571 ]:
572 load_vdis = (
573 self.cmd == 'sr_scan' or
574 self.cmd == 'sr_attach'
575 ) or len(
576 self._journaler.get_all(LinstorJournaler.INFLATE)
577 ) or len(
578 self._journaler.get_all(LinstorJournaler.CLONE)
579 )
581 if load_vdis:
582 self._load_vdis()
584 self._linstor.remove_resourceless_volumes()
586 self._synchronize_metadata()
587 except Exception as e:
588 if self.cmd == 'sr_scan' or self.cmd == 'sr_attach':
589 # Always raise, we don't want to remove VDIs
590 # from the XAPI database otherwise.
591 raise e
592 util.SMlog(
593 'Ignoring exception in LinstorSR.load: {}'.format(e)
594 )
595 util.SMlog(traceback.format_exc())
597 return wrapped_method(self, *args, **kwargs)
599 @functools.wraps(wrapped_method)
600 def wrap(self, *args, **kwargs):
601 if self._init_status in \
602 (self.INIT_STATUS_OK, self.INIT_STATUS_IN_PROGRESS):
603 return wrapped_method(self, *args, **kwargs)
604 if self._init_status == self.INIT_STATUS_FAIL:
605 util.SMlog(
606 'Can\'t call method {} because initialization failed'
607 .format(method)
608 )
609 else:
610 try:
611 self._init_status = self.INIT_STATUS_IN_PROGRESS
612 return load(self, *args, **kwargs)
613 except Exception:
614 if self._init_status != self.INIT_STATUS_OK:
615 self._init_status = self.INIT_STATUS_FAIL
616 raise
618 return wrap
620 def cleanup(self):
621 if self._vdi_shared_time:
622 self._shared_lock_vdi(self.srcmd.params['vdi_uuid'], locked=False)
624 @_locked_load
625 def create(self, uuid, size):
626 util.SMlog('LinstorSR.create for {}'.format(self.uuid))
628 host_adresses = util.get_host_addresses(self.session)
629 if self._redundancy > len(host_adresses):
630 raise xs_errors.XenError(
631 'LinstorSRCreate',
632 opterr='Redundancy greater than host count'
633 )
635 xenapi = self.session.xenapi
636 srs = xenapi.SR.get_all_records_where(
637 'field "type" = "{}"'.format(self.DRIVER_TYPE)
638 )
639 srs = dict([e for e in srs.items() if e[1]['uuid'] != self.uuid])
641 for sr in srs.values():
642 for pbd in sr['PBDs']:
643 device_config = xenapi.PBD.get_device_config(pbd)
644 group_name = device_config.get('group-name')
645 if group_name and group_name == self._group_name:
646 raise xs_errors.XenError(
647 'LinstorSRCreate',
648 opterr='group name must be unique'
649 )
651 if srs:
652 raise xs_errors.XenError(
653 'LinstorSRCreate',
654 opterr='LINSTOR SR must be unique in a pool'
655 )
657 online_hosts = util.get_online_hosts(self.session)
658 if len(online_hosts) < len(host_adresses):
659 raise xs_errors.XenError(
660 'LinstorSRCreate',
661 opterr='Not enough online hosts'
662 )
664 ips = {}
665 for host_ref in online_hosts:
666 record = self.session.xenapi.host.get_record(host_ref)
667 hostname = record['hostname']
668 ips[hostname] = record['address']
670 if len(ips) != len(online_hosts):
671 raise xs_errors.XenError(
672 'LinstorSRCreate',
673 opterr='Multiple hosts with same hostname'
674 )
676 # Ensure ports are opened and LINSTOR satellites
677 # are activated. In the same time the drbd-reactor instances
678 # must be stopped.
679 self._prepare_sr_on_all_hosts(self._group_name, enabled=True)
681 # Create SR.
682 # Throw if the SR already exists.
683 try:
684 self._linstor = LinstorVolumeManager.create_sr(
685 self._group_name,
686 ips,
687 self._redundancy,
688 thin_provisioning=self._provisioning == 'thin',
689 auto_quorum=self._monitor_db_quorum,
690 logger=util.SMlog
691 )
692 self._vhdutil = LinstorVhdUtil(self.session, self._linstor)
693 except Exception as e:
694 util.SMlog('Failed to create LINSTOR SR: {}'.format(e))
695 raise xs_errors.XenError('LinstorSRCreate', opterr=str(e))
697 try:
698 util.SMlog(
699 "Finishing SR creation, enable drbd-reactor on all hosts..."
700 )
701 self._update_drbd_reactor_on_all_hosts(enabled=True)
702 except Exception as e:
703 try:
704 self._linstor.destroy()
705 except Exception as e2:
706 util.SMlog(
707 'Failed to destroy LINSTOR SR after creation fail: {}'
708 .format(e2)
709 )
710 raise e
712 @_locked_load
713 def delete(self, uuid):
714 util.SMlog('LinstorSR.delete for {}'.format(self.uuid))
715 cleanup.gc_force(self.session, self.uuid)
717 if self.vdis or self._linstor._volumes:
718 raise xs_errors.XenError('SRNotEmpty')
720 node_name = get_controller_node_name()
721 if not node_name:
722 raise xs_errors.XenError(
723 'LinstorSRDelete',
724 opterr='Cannot get controller node name'
725 )
727 host = None
728 if node_name == 'localhost':
729 host = util.get_this_host_ref(self.session)
730 else:
731 for slave in util.get_all_slaves(self.session):
732 r_name = self.session.xenapi.host.get_record(slave)['hostname']
733 if r_name == node_name:
734 host = slave
735 break
737 if not host:
738 raise xs_errors.XenError(
739 'LinstorSRDelete',
740 opterr='Failed to find host with hostname: {}'.format(
741 node_name
742 )
743 )
745 try:
746 self._update_drbd_reactor_on_all_hosts(
747 controller_node_name=node_name, enabled=False
748 )
750 args = {
751 'groupName': self._group_name,
752 }
753 self._exec_manager_command(
754 host, 'destroy', args, 'LinstorSRDelete'
755 )
756 except Exception as e:
757 try:
758 self._update_drbd_reactor_on_all_hosts(
759 controller_node_name=node_name, enabled=True
760 )
761 except Exception as e2:
762 util.SMlog(
763 'Failed to restart drbd-reactor after destroy fail: {}'
764 .format(e2)
765 )
766 util.SMlog('Failed to delete LINSTOR SR: {}'.format(e))
767 raise xs_errors.XenError(
768 'LinstorSRDelete',
769 opterr=str(e)
770 )
772 Lock.cleanupAll(self.uuid)
774 @_locked_load
775 def update(self, uuid):
776 util.SMlog('LinstorSR.update for {}'.format(self.uuid))
778 # Well, how can we update a SR if it doesn't exist? :thinking:
779 if not self._linstor:
780 raise xs_errors.XenError(
781 'SRUnavailable',
782 opterr='no such volume group: {}'.format(self._group_name)
783 )
785 self._update_stats(0)
787 # Update the SR name and description only in LINSTOR metadata.
788 xenapi = self.session.xenapi
789 self._linstor.metadata = {
790 NAME_LABEL_TAG: util.to_plain_string(
791 xenapi.SR.get_name_label(self.sr_ref)
792 ),
793 NAME_DESCRIPTION_TAG: util.to_plain_string(
794 xenapi.SR.get_name_description(self.sr_ref)
795 )
796 }
798 @_locked_load
799 def attach(self, uuid):
800 util.SMlog('LinstorSR.attach for {}'.format(self.uuid))
802 if not self._linstor:
803 raise xs_errors.XenError(
804 'SRUnavailable',
805 opterr='no such group: {}'.format(self._group_name)
806 )
808 @_locked_load
809 def detach(self, uuid):
810 util.SMlog('LinstorSR.detach for {}'.format(self.uuid))
811 cleanup.abort(self.uuid)
813 @_locked_load
814 def probe(self):
815 util.SMlog('LinstorSR.probe for {}'.format(self.uuid))
816 # TODO
818 @_locked_load
819 def scan(self, uuid):
820 if self._init_status == self.INIT_STATUS_FAIL:
821 return
823 util.SMlog('LinstorSR.scan for {}'.format(self.uuid))
824 if not self._linstor:
825 raise xs_errors.XenError(
826 'SRUnavailable',
827 opterr='no such volume group: {}'.format(self._group_name)
828 )
830 # Note: `scan` can be called outside this module, so ensure the VDIs
831 # are loaded.
832 self._load_vdis()
833 self._update_physical_size()
835 for vdi_uuid in list(self.vdis.keys()):
836 if self.vdis[vdi_uuid].deleted:
837 del self.vdis[vdi_uuid]
839 # Update the database before the restart of the GC to avoid
840 # bad sync in the process if new VDIs have been introduced.
841 ret = super(LinstorSR, self).scan(self.uuid)
842 self._kick_gc()
843 return ret
845 @_locked_load
846 def vdi(self, uuid):
847 return LinstorVDI(self, uuid)
849 _locked_load = staticmethod(_locked_load)
851 # --------------------------------------------------------------------------
852 # Lock.
853 # --------------------------------------------------------------------------
855 def _shared_lock_vdi(self, vdi_uuid, locked=True):
856 master = util.get_master_ref(self.session)
858 command = 'lockVdi'
859 args = {
860 'groupName': self._group_name,
861 'srUuid': self.uuid,
862 'vdiUuid': vdi_uuid,
863 'locked': str(locked)
864 }
866 # Note: We must avoid to unlock the volume if the timeout is reached
867 # because during volume unlock, the SR lock is not used. Otherwise
868 # we could destroy a valid lock acquired from another host...
869 #
870 # This code is not very clean, the ideal solution would be to acquire
871 # the SR lock during volume unlock (like lock) but it's not easy
872 # to implement without impacting performance.
873 if not locked:
874 elapsed_time = time.time() - self._vdi_shared_time
875 timeout = LinstorVolumeManager.LOCKED_EXPIRATION_DELAY * 0.7
876 if elapsed_time >= timeout:
877 util.SMlog(
878 'Avoid unlock call of {} because timeout has been reached'
879 .format(vdi_uuid)
880 )
881 return
883 self._exec_manager_command(master, command, args, 'VDIUnavailable')
885 # --------------------------------------------------------------------------
886 # Network.
887 # --------------------------------------------------------------------------
889 def _exec_manager_command(self, host_ref, command, args, error):
890 host_rec = self.session.xenapi.host.get_record(host_ref)
891 host_uuid = host_rec['uuid']
893 try:
894 ret = self.session.xenapi.host.call_plugin(
895 host_ref, self.MANAGER_PLUGIN, command, args
896 )
897 except Exception as e:
898 util.SMlog(
899 'call-plugin on {} ({}:{} with {}) raised'.format(
900 host_uuid, self.MANAGER_PLUGIN, command, args
901 )
902 )
903 raise e
905 util.SMlog(
906 'call-plugin on {} ({}:{} with {}) returned: {}'.format(
907 host_uuid, self.MANAGER_PLUGIN, command, args, ret
908 )
909 )
910 if ret == 'False':
911 raise xs_errors.XenError(
912 error,
913 opterr='Plugin {} failed'.format(self.MANAGER_PLUGIN)
914 )
916 def _prepare_sr(self, host, group_name, enabled):
917 self._exec_manager_command(
918 host,
919 'prepareSr' if enabled else 'releaseSr',
920 {'groupName': group_name},
921 'SRUnavailable'
922 )
924 def _prepare_sr_on_all_hosts(self, group_name, enabled):
925 master = util.get_master_ref(self.session)
926 self._prepare_sr(master, group_name, enabled)
928 for slave in util.get_all_slaves(self.session):
929 self._prepare_sr(slave, group_name, enabled)
931 def _update_drbd_reactor(self, host, enabled):
932 self._exec_manager_command(
933 host,
934 'updateDrbdReactor',
935 {'enabled': str(enabled)},
936 'SRUnavailable'
937 )
939 def _update_drbd_reactor_on_all_hosts(
940 self, enabled, controller_node_name=None
941 ):
942 if controller_node_name == 'localhost':
943 controller_node_name = self.session.xenapi.host.get_record(
944 util.get_this_host_ref(self.session)
945 )['hostname']
946 assert controller_node_name
947 assert controller_node_name != 'localhost'
949 controller_host = None
950 secondary_hosts = []
952 hosts = self.session.xenapi.host.get_all_records()
953 for host_ref, host_rec in hosts.items():
954 hostname = host_rec['hostname']
955 if controller_node_name == hostname:
956 controller_host = host_ref
957 else:
958 secondary_hosts.append((host_ref, hostname))
960 action_name = 'Starting' if enabled else 'Stopping'
961 if controller_node_name and not controller_host:
962 util.SMlog('Failed to find controller host: `{}`'.format(
963 controller_node_name
964 ))
966 if enabled and controller_host:
967 util.SMlog('{} drbd-reactor on controller host `{}`...'.format(
968 action_name, controller_node_name
969 ))
970 # If enabled is true, we try to start the controller on the desired
971 # node name first.
972 self._update_drbd_reactor(controller_host, enabled)
974 for host_ref, hostname in secondary_hosts:
975 util.SMlog('{} drbd-reactor on host {}...'.format(
976 action_name, hostname
977 ))
978 self._update_drbd_reactor(host_ref, enabled)
980 if not enabled and controller_host:
981 util.SMlog('{} drbd-reactor on controller host `{}`...'.format(
982 action_name, controller_node_name
983 ))
984 # If enabled is false, we disable the drbd-reactor service of
985 # the controller host last. Why? Otherwise the linstor-controller
986 # of other nodes can be started, and we don't want that.
987 self._update_drbd_reactor(controller_host, enabled)
989 # --------------------------------------------------------------------------
990 # Metadata.
991 # --------------------------------------------------------------------------
993 def _synchronize_metadata_and_xapi(self):
994 try:
995 # First synch SR parameters.
996 self.update(self.uuid)
998 # Now update the VDI information in the metadata if required.
999 xenapi = self.session.xenapi
1000 volumes_metadata = self._linstor.get_volumes_with_metadata()
1001 for vdi_uuid, volume_metadata in volumes_metadata.items():
1002 try:
1003 vdi_ref = xenapi.VDI.get_by_uuid(vdi_uuid)
1004 except Exception:
1005 # May be the VDI is not in XAPI yet dont bother.
1006 continue
1008 label = util.to_plain_string(
1009 xenapi.VDI.get_name_label(vdi_ref)
1010 )
1011 description = util.to_plain_string(
1012 xenapi.VDI.get_name_description(vdi_ref)
1013 )
1015 if (
1016 volume_metadata.get(NAME_LABEL_TAG) != label or
1017 volume_metadata.get(NAME_DESCRIPTION_TAG) != description
1018 ):
1019 self._linstor.update_volume_metadata(vdi_uuid, {
1020 NAME_LABEL_TAG: label,
1021 NAME_DESCRIPTION_TAG: description
1022 })
1023 except Exception as e:
1024 raise xs_errors.XenError(
1025 'MetadataError',
1026 opterr='Error synching SR Metadata and XAPI: {}'.format(e)
1027 )
1029 def _synchronize_metadata(self):
1030 if not self._is_master:
1031 return
1033 util.SMlog('Synchronize metadata...')
1034 if self.cmd == 'sr_attach':
1035 try:
1036 util.SMlog(
1037 'Synchronize SR metadata and the state on the storage.'
1038 )
1039 self._synchronize_metadata_and_xapi()
1040 except Exception as e:
1041 util.SMlog('Failed to synchronize metadata: {}'.format(e))
1043 # --------------------------------------------------------------------------
1044 # Stats.
1045 # --------------------------------------------------------------------------
1047 def _update_stats(self, virt_alloc_delta):
1048 valloc = int(self.session.xenapi.SR.get_virtual_allocation(
1049 self.sr_ref
1050 ))
1052 # Update size attributes of the SR parent class.
1053 self.virtual_allocation = valloc + virt_alloc_delta
1055 self._update_physical_size()
1057 # Notify SR parent class.
1058 self._db_update()
1060 def _update_physical_size(self):
1061 # We use the size of the smallest disk, this is an approximation that
1062 # ensures the displayed physical size is reachable by the user.
1063 (min_physical_size, pool_count) = self._linstor.get_min_physical_size()
1064 self.physical_size = min_physical_size * pool_count // \
1065 self._linstor.redundancy
1067 self.physical_utilisation = self._linstor.allocated_volume_size
1069 # --------------------------------------------------------------------------
1070 # VDIs.
1071 # --------------------------------------------------------------------------
1073 def _load_vdis(self):
1074 if self._vdis_loaded:
1075 return
1077 assert self._is_master
1079 # We use a cache to avoid repeated JSON parsing.
1080 # The performance gain is not big but we can still
1081 # enjoy it with a few lines.
1082 self._create_linstor_cache()
1083 self._load_vdis_ex()
1084 self._destroy_linstor_cache()
1086 # We must mark VDIs as loaded only if the load is a success.
1087 self._vdis_loaded = True
1089 self._undo_all_journal_transactions()
1091 def _load_vdis_ex(self):
1092 # 1. Get existing VDIs in XAPI.
1093 xenapi = self.session.xenapi
1094 xapi_vdi_uuids = set()
1095 for vdi in xenapi.SR.get_VDIs(self.sr_ref):
1096 xapi_vdi_uuids.add(xenapi.VDI.get_uuid(vdi))
1098 # 2. Get volumes info.
1099 all_volume_info = self._all_volume_info_cache
1100 volumes_metadata = self._all_volume_metadata_cache
1102 # 3. Get CBT vdis.
1103 # See: https://support.citrix.com/article/CTX230619
1104 cbt_vdis = set()
1105 for volume_metadata in volumes_metadata.values():
1106 cbt_uuid = volume_metadata.get(CBTLOG_TAG)
1107 if cbt_uuid:
1108 cbt_vdis.add(cbt_uuid)
1110 introduce = False
1112 # Try to introduce VDIs only during scan/attach.
1113 if self.cmd == 'sr_scan' or self.cmd == 'sr_attach':
1114 has_clone_entries = list(self._journaler.get_all(
1115 LinstorJournaler.CLONE
1116 ).items())
1118 if has_clone_entries:
1119 util.SMlog(
1120 'Cannot introduce VDIs during scan because it exists '
1121 'CLONE entries in journaler on SR {}'.format(self.uuid)
1122 )
1123 else:
1124 introduce = True
1126 # 4. Now check all volume info.
1127 vdi_to_snaps = {}
1128 for vdi_uuid, volume_info in all_volume_info.items():
1129 if vdi_uuid.startswith(cleanup.SR.TMP_RENAME_PREFIX):
1130 continue
1132 # 4.a. Check if the VDI in LINSTOR is in XAPI VDIs.
1133 if vdi_uuid not in xapi_vdi_uuids:
1134 if not introduce:
1135 continue
1137 if vdi_uuid.startswith('DELETED_'):
1138 continue
1140 volume_metadata = volumes_metadata.get(vdi_uuid)
1141 if not volume_metadata:
1142 util.SMlog(
1143 'Skipping volume {} because no metadata could be found'
1144 .format(vdi_uuid)
1145 )
1146 continue
1148 util.SMlog(
1149 'Trying to introduce VDI {} as it is present in '
1150 'LINSTOR and not in XAPI...'
1151 .format(vdi_uuid)
1152 )
1154 try:
1155 self._linstor.get_device_path(vdi_uuid)
1156 except Exception as e:
1157 util.SMlog(
1158 'Cannot introduce {}, unable to get path: {}'
1159 .format(vdi_uuid, e)
1160 )
1161 continue
1163 name_label = volume_metadata.get(NAME_LABEL_TAG) or ''
1164 type = volume_metadata.get(TYPE_TAG) or 'user'
1165 vdi_type = volume_metadata.get(VDI_TYPE_TAG)
1167 if not vdi_type:
1168 util.SMlog(
1169 'Cannot introduce {} '.format(vdi_uuid) +
1170 'without vdi_type'
1171 )
1172 continue
1174 sm_config = {
1175 'vdi_type': vdi_type
1176 }
1178 if vdi_type == vhdutil.VDI_TYPE_RAW:
1179 managed = not volume_metadata.get(HIDDEN_TAG)
1180 elif vdi_type == vhdutil.VDI_TYPE_VHD:
1181 vhd_info = self._vhdutil.get_vhd_info(vdi_uuid)
1182 managed = not vhd_info.hidden
1183 if vhd_info.parentUuid:
1184 sm_config['vhd-parent'] = vhd_info.parentUuid
1185 else:
1186 util.SMlog(
1187 'Cannot introduce {} with invalid VDI type {}'
1188 .format(vdi_uuid, vdi_type)
1189 )
1190 continue
1192 util.SMlog(
1193 'Introducing VDI {} '.format(vdi_uuid) +
1194 ' (name={}, virtual_size={}, allocated_size={})'.format(
1195 name_label,
1196 volume_info.virtual_size,
1197 volume_info.allocated_size
1198 )
1199 )
1201 vdi_ref = xenapi.VDI.db_introduce(
1202 vdi_uuid,
1203 name_label,
1204 volume_metadata.get(NAME_DESCRIPTION_TAG) or '',
1205 self.sr_ref,
1206 type,
1207 False, # sharable
1208 bool(volume_metadata.get(READ_ONLY_TAG)),
1209 {}, # other_config
1210 vdi_uuid, # location
1211 {}, # xenstore_data
1212 sm_config,
1213 managed,
1214 str(volume_info.virtual_size),
1215 str(volume_info.allocated_size)
1216 )
1218 is_a_snapshot = volume_metadata.get(IS_A_SNAPSHOT_TAG)
1219 xenapi.VDI.set_is_a_snapshot(vdi_ref, bool(is_a_snapshot))
1220 if is_a_snapshot:
1221 xenapi.VDI.set_snapshot_time(
1222 vdi_ref,
1223 xmlrpc.client.DateTime(
1224 volume_metadata[SNAPSHOT_TIME_TAG] or
1225 '19700101T00:00:00Z'
1226 )
1227 )
1229 snap_uuid = volume_metadata[SNAPSHOT_OF_TAG]
1230 if snap_uuid in vdi_to_snaps:
1231 vdi_to_snaps[snap_uuid].append(vdi_uuid)
1232 else:
1233 vdi_to_snaps[snap_uuid] = [vdi_uuid]
1235 # 4.b. Add the VDI in the list.
1236 vdi = self.vdi(vdi_uuid)
1237 self.vdis[vdi_uuid] = vdi
1239 if USE_KEY_HASH and vdi.vdi_type == vhdutil.VDI_TYPE_VHD:
1240 # TODO: Replace pylint comment with this feature when possible:
1241 # https://github.com/PyCQA/pylint/pull/2926
1242 vdi.sm_config_override['key_hash'] = \
1243 self._vhdutil.get_key_hash(vdi_uuid) # pylint: disable = E1120
1245 # 4.c. Update CBT status of disks either just added
1246 # or already in XAPI.
1247 cbt_uuid = volume_metadata.get(CBTLOG_TAG)
1248 if cbt_uuid in cbt_vdis:
1249 vdi_ref = xenapi.VDI.get_by_uuid(vdi_uuid)
1250 xenapi.VDI.set_cbt_enabled(vdi_ref, True)
1251 # For existing VDIs, update local state too.
1252 # Scan in base class SR updates existing VDIs
1253 # again based on local states.
1254 self.vdis[vdi_uuid].cbt_enabled = True
1255 cbt_vdis.remove(cbt_uuid)
1257 # 5. Now set the snapshot statuses correctly in XAPI.
1258 for src_uuid in vdi_to_snaps:
1259 try:
1260 src_ref = xenapi.VDI.get_by_uuid(src_uuid)
1261 except Exception:
1262 # The source VDI no longer exists, continue.
1263 continue
1265 for snap_uuid in vdi_to_snaps[src_uuid]:
1266 try:
1267 # This might fail in cases where its already set.
1268 snap_ref = xenapi.VDI.get_by_uuid(snap_uuid)
1269 xenapi.VDI.set_snapshot_of(snap_ref, src_ref)
1270 except Exception as e:
1271 util.SMlog('Setting snapshot failed: {}'.format(e))
1273 # TODO: Check correctly how to use CBT.
1274 # Update cbt_enabled on the right VDI, check LVM/FileSR code.
1276 # 6. If we have items remaining in this list,
1277 # they are cbt_metadata VDI that XAPI doesn't know about.
1278 # Add them to self.vdis and they'll get added to the DB.
1279 for cbt_uuid in cbt_vdis:
1280 new_vdi = self.vdi(cbt_uuid)
1281 new_vdi.ty = 'cbt_metadata'
1282 new_vdi.cbt_enabled = True
1283 self.vdis[cbt_uuid] = new_vdi
1285 # 7. Update virtual allocation, build geneology and remove useless VDIs
1286 self.virtual_allocation = 0
1288 # 8. Build geneology.
1289 geneology = {}
1291 for vdi_uuid, vdi in self.vdis.items():
1292 if vdi.parent:
1293 if vdi.parent in self.vdis:
1294 self.vdis[vdi.parent].read_only = True
1295 if vdi.parent in geneology:
1296 geneology[vdi.parent].append(vdi_uuid)
1297 else:
1298 geneology[vdi.parent] = [vdi_uuid]
1299 if not vdi.hidden:
1300 self.virtual_allocation += vdi.size
1302 # 9. Remove all hidden leaf nodes to avoid introducing records that
1303 # will be GC'ed.
1304 for vdi_uuid in list(self.vdis.keys()):
1305 if vdi_uuid not in geneology and self.vdis[vdi_uuid].hidden:
1306 util.SMlog(
1307 'Scan found hidden leaf ({}), ignoring'.format(vdi_uuid)
1308 )
1309 del self.vdis[vdi_uuid]
1311 # --------------------------------------------------------------------------
1312 # Journals.
1313 # --------------------------------------------------------------------------
1315 def _get_vdi_path_and_parent(self, vdi_uuid, volume_name):
1316 try:
1317 device_path = self._linstor.build_device_path(volume_name)
1318 if not util.pathexists(device_path):
1319 return (None, None)
1321 # If it's a RAW VDI, there is no parent.
1322 volume_metadata = self._linstor.get_volume_metadata(vdi_uuid)
1323 vdi_type = volume_metadata[VDI_TYPE_TAG]
1324 if vdi_type == vhdutil.VDI_TYPE_RAW:
1325 return (device_path, None)
1327 # Otherwise it's a VHD and a parent can exist.
1328 if not self._vhdutil.check(vdi_uuid):
1329 return (None, None)
1331 vhd_info = self._vhdutil.get_vhd_info(vdi_uuid)
1332 if vhd_info:
1333 return (device_path, vhd_info.parentUuid)
1334 except Exception as e:
1335 util.SMlog(
1336 'Failed to get VDI path and parent, ignoring: {}'
1337 .format(e)
1338 )
1339 return (None, None)
1341 def _undo_all_journal_transactions(self):
1342 util.SMlog('Undoing all journal transactions...')
1343 self.lock.acquire()
1344 try:
1345 self._handle_interrupted_inflate_ops()
1346 self._handle_interrupted_clone_ops()
1347 pass
1348 finally:
1349 self.lock.release()
1351 def _handle_interrupted_inflate_ops(self):
1352 transactions = self._journaler.get_all(LinstorJournaler.INFLATE)
1353 for vdi_uuid, old_size in transactions.items():
1354 self._handle_interrupted_inflate(vdi_uuid, old_size)
1355 self._journaler.remove(LinstorJournaler.INFLATE, vdi_uuid)
1357 def _handle_interrupted_clone_ops(self):
1358 transactions = self._journaler.get_all(LinstorJournaler.CLONE)
1359 for vdi_uuid, old_size in transactions.items():
1360 self._handle_interrupted_clone(vdi_uuid, old_size)
1361 self._journaler.remove(LinstorJournaler.CLONE, vdi_uuid)
1363 def _handle_interrupted_inflate(self, vdi_uuid, old_size):
1364 util.SMlog(
1365 '*** INTERRUPTED INFLATE OP: for {} ({})'
1366 .format(vdi_uuid, old_size)
1367 )
1369 vdi = self.vdis.get(vdi_uuid)
1370 if not vdi:
1371 util.SMlog('Cannot deflate missing VDI {}'.format(vdi_uuid))
1372 return
1374 assert not self._all_volume_info_cache
1375 volume_info = self._linstor.get_volume_info(vdi_uuid)
1377 current_size = volume_info.virtual_size
1378 assert current_size > 0
1380 util.zeroOut(
1381 vdi.path,
1382 current_size - vhdutil.VHD_FOOTER_SIZE,
1383 vhdutil.VHD_FOOTER_SIZE
1384 )
1385 deflate(self._linstor, vdi_uuid, vdi.path, old_size, current_size)
1387 def _handle_interrupted_clone(
1388 self, vdi_uuid, clone_info, force_undo=False
1389 ):
1390 util.SMlog(
1391 '*** INTERRUPTED CLONE OP: for {} ({})'
1392 .format(vdi_uuid, clone_info)
1393 )
1395 base_uuid, snap_uuid = clone_info.split('_')
1397 # Use LINSTOR data because new VDIs may not be in the XAPI.
1398 volume_names = self._linstor.get_volumes_with_name()
1400 # Check if we don't have a base VDI. (If clone failed at startup.)
1401 if base_uuid not in volume_names:
1402 if vdi_uuid in volume_names:
1403 util.SMlog('*** INTERRUPTED CLONE OP: nothing to do')
1404 return
1405 raise util.SMException(
1406 'Base copy {} not present, but no original {} found'
1407 .format(base_uuid, vdi_uuid)
1408 )
1410 if force_undo:
1411 util.SMlog('Explicit revert')
1412 self._undo_clone(
1413 volume_names, vdi_uuid, base_uuid, snap_uuid
1414 )
1415 return
1417 # If VDI or snap uuid is missing...
1418 if vdi_uuid not in volume_names or \
1419 (snap_uuid and snap_uuid not in volume_names):
1420 util.SMlog('One or both leaves missing => revert')
1421 self._undo_clone(volume_names, vdi_uuid, base_uuid, snap_uuid)
1422 return
1424 vdi_path, vdi_parent_uuid = self._get_vdi_path_and_parent(
1425 vdi_uuid, volume_names[vdi_uuid]
1426 )
1427 snap_path, snap_parent_uuid = self._get_vdi_path_and_parent(
1428 snap_uuid, volume_names[snap_uuid]
1429 )
1431 if not vdi_path or (snap_uuid and not snap_path):
1432 util.SMlog('One or both leaves invalid (and path(s)) => revert')
1433 self._undo_clone(volume_names, vdi_uuid, base_uuid, snap_uuid)
1434 return
1436 util.SMlog('Leaves valid but => revert')
1437 self._undo_clone(volume_names, vdi_uuid, base_uuid, snap_uuid)
1439 def _undo_clone(self, volume_names, vdi_uuid, base_uuid, snap_uuid):
1440 base_path = self._linstor.build_device_path(volume_names[base_uuid])
1441 base_metadata = self._linstor.get_volume_metadata(base_uuid)
1442 base_type = base_metadata[VDI_TYPE_TAG]
1444 if not util.pathexists(base_path):
1445 util.SMlog('Base not found! Exit...')
1446 util.SMlog('*** INTERRUPTED CLONE OP: rollback fail')
1447 return
1449 # Un-hide the parent.
1450 self._linstor.update_volume_metadata(base_uuid, {READ_ONLY_TAG: False})
1451 if base_type == vhdutil.VDI_TYPE_VHD:
1452 vhd_info = self._vhdutil.get_vhd_info(base_uuid, False)
1453 if vhd_info.hidden:
1454 self._vhdutil.set_hidden(base_path, False)
1455 elif base_type == vhdutil.VDI_TYPE_RAW and \
1456 base_metadata.get(HIDDEN_TAG):
1457 self._linstor.update_volume_metadata(
1458 base_uuid, {HIDDEN_TAG: False}
1459 )
1461 # Remove the child nodes.
1462 if snap_uuid and snap_uuid in volume_names:
1463 util.SMlog('Destroying snap {}...'.format(snap_uuid))
1465 try:
1466 self._linstor.destroy_volume(snap_uuid)
1467 except Exception as e:
1468 util.SMlog(
1469 'Cannot destroy snap {} during undo clone: {}'
1470 .format(snap_uuid, e)
1471 )
1473 if vdi_uuid in volume_names:
1474 try:
1475 util.SMlog('Destroying {}...'.format(vdi_uuid))
1476 self._linstor.destroy_volume(vdi_uuid)
1477 except Exception as e:
1478 util.SMlog(
1479 'Cannot destroy VDI {} during undo clone: {}'
1480 .format(vdi_uuid, e)
1481 )
1482 # We can get an exception like this:
1483 # "Shutdown of the DRBD resource 'XXX failed", so the
1484 # volume info remains... The problem is we can't rename
1485 # properly the base VDI below this line, so we must change the
1486 # UUID of this bad VDI before.
1487 self._linstor.update_volume_uuid(
1488 vdi_uuid, 'DELETED_' + vdi_uuid, force=True
1489 )
1491 # Rename!
1492 self._linstor.update_volume_uuid(base_uuid, vdi_uuid)
1494 # Inflate to the right size.
1495 if base_type == vhdutil.VDI_TYPE_VHD:
1496 vdi = self.vdi(vdi_uuid)
1497 volume_size = compute_volume_size(vdi.size, vdi.vdi_type)
1498 inflate(
1499 self._journaler, self._linstor, vdi_uuid, vdi.path,
1500 volume_size, vdi.capacity
1501 )
1502 self.vdis[vdi_uuid] = vdi
1504 # At this stage, tapdisk and SM vdi will be in paused state. Remove
1505 # flag to facilitate vm deactivate.
1506 vdi_ref = self.session.xenapi.VDI.get_by_uuid(vdi_uuid)
1507 self.session.xenapi.VDI.remove_from_sm_config(vdi_ref, 'paused')
1509 util.SMlog('*** INTERRUPTED CLONE OP: rollback success')
1511 # --------------------------------------------------------------------------
1512 # Cache.
1513 # --------------------------------------------------------------------------
1515 def _create_linstor_cache(self):
1516 reconnect = False
1518 def create_cache():
1519 nonlocal reconnect
1520 try:
1521 if reconnect:
1522 self._reconnect()
1523 return self._linstor.get_volumes_with_info()
1524 except Exception as e:
1525 reconnect = True
1526 raise e
1528 self._all_volume_metadata_cache = \
1529 self._linstor.get_volumes_with_metadata()
1530 self._all_volume_info_cache = util.retry(
1531 create_cache,
1532 maxretry=10,
1533 period=3
1534 )
1536 def _destroy_linstor_cache(self):
1537 self._all_volume_info_cache = None
1538 self._all_volume_metadata_cache = None
1540 # --------------------------------------------------------------------------
1541 # Misc.
1542 # --------------------------------------------------------------------------
1544 def _reconnect(self):
1545 controller_uri = get_controller_uri()
1547 self._journaler = LinstorJournaler(
1548 controller_uri, self._group_name, logger=util.SMlog
1549 )
1551 # Try to open SR if exists.
1552 # We can repair only if we are on the master AND if
1553 # we are trying to execute an exclusive operation.
1554 # Otherwise we could try to delete a VDI being created or
1555 # during a snapshot. An exclusive op is the guarantee that
1556 # the SR is locked.
1557 self._linstor = LinstorVolumeManager(
1558 controller_uri,
1559 self._group_name,
1560 repair=(
1561 self._is_master and
1562 self.srcmd.cmd in self.ops_exclusive
1563 ),
1564 logger=util.SMlog
1565 )
1566 self._vhdutil = LinstorVhdUtil(self.session, self._linstor)
1568 def _ensure_space_available(self, amount_needed):
1569 space_available = self._linstor.max_volume_size_allowed
1570 if (space_available < amount_needed):
1571 util.SMlog(
1572 'Not enough space! Free space: {}, need: {}'.format(
1573 space_available, amount_needed
1574 )
1575 )
1576 raise xs_errors.XenError('SRNoSpace')
1578 def _kick_gc(self):
1579 # Don't bother if an instance already running. This is just an
1580 # optimization to reduce the overhead of forking a new process if we
1581 # don't have to, but the process will check the lock anyways.
1582 lock = Lock(cleanup.LOCK_TYPE_RUNNING, self.uuid)
1583 if not lock.acquireNoblock():
1584 if not cleanup.should_preempt(self.session, self.uuid):
1585 util.SMlog('A GC instance already running, not kicking')
1586 return
1588 util.SMlog('Aborting currently-running coalesce of garbage VDI')
1589 try:
1590 if not cleanup.abort(self.uuid, soft=True):
1591 util.SMlog('The GC has already been scheduled to re-start')
1592 except util.CommandException as e:
1593 if e.code != errno.ETIMEDOUT:
1594 raise
1595 util.SMlog('Failed to abort the GC')
1596 else:
1597 lock.release()
1599 util.SMlog('Kicking GC')
1600 cleanup.gc(self.session, self.uuid, True)
1602# ==============================================================================
1603# LinstorSr VDI
1604# ==============================================================================
1607class LinstorVDI(VDI.VDI):
1608 # Warning: Not the same values than vhdutil.VDI_TYPE_*.
1609 # These values represents the types given on the command line.
1610 TYPE_RAW = 'raw'
1611 TYPE_VHD = 'vhd'
1613 MAX_SIZE = 2 * 1024 * 1024 * 1024 * 1024 # Max VHD size.
1615 # Metadata size given to the "S" param of vhd-util create.
1616 # "-S size (MB) for metadata preallocation".
1617 # Increase the performance when resize is called.
1618 MAX_METADATA_VIRT_SIZE = 2 * 1024 * 1024
1620 # --------------------------------------------------------------------------
1621 # VDI methods.
1622 # --------------------------------------------------------------------------
1624 def load(self, vdi_uuid):
1625 self._lock = self.sr.lock
1626 self._exists = True
1627 self._linstor = self.sr._linstor
1629 # Update hidden parent property.
1630 self.hidden = False
1632 def raise_bad_load(e):
1633 util.SMlog(
1634 'Got exception in LinstorVDI.load: {}'.format(e)
1635 )
1636 util.SMlog(traceback.format_exc())
1637 raise xs_errors.XenError(
1638 'VDIUnavailable',
1639 opterr='Could not load {} because: {}'.format(self.uuid, e)
1640 )
1642 # Try to load VDI.
1643 try:
1644 if (
1645 self.sr.srcmd.cmd == 'vdi_attach_from_config' or
1646 self.sr.srcmd.cmd == 'vdi_detach_from_config'
1647 ):
1648 self.vdi_type = vhdutil.VDI_TYPE_RAW
1649 self.path = self.sr.srcmd.params['vdi_path']
1650 else:
1651 self._determine_type_and_path()
1652 self._load_this()
1654 util.SMlog('VDI {} loaded! (path={}, hidden={})'.format(
1655 self.uuid, self.path, self.hidden
1656 ))
1657 except LinstorVolumeManagerError as e:
1658 # 1. It may be a VDI deletion.
1659 if e.code == LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS:
1660 if self.sr.srcmd.cmd == 'vdi_delete':
1661 self.deleted = True
1662 return
1664 # 2. Or maybe a creation.
1665 if self.sr.srcmd.cmd == 'vdi_create':
1666 # Set type attribute of VDI parent class.
1667 # We use VHD by default.
1668 self.vdi_type = vhdutil.VDI_TYPE_VHD
1669 self._key_hash = None # Only used in create.
1671 self._exists = False
1672 vdi_sm_config = self.sr.srcmd.params.get('vdi_sm_config')
1673 if vdi_sm_config is not None:
1674 type = vdi_sm_config.get('type')
1675 if type is not None:
1676 if type == self.TYPE_RAW:
1677 self.vdi_type = vhdutil.VDI_TYPE_RAW
1678 elif type == self.TYPE_VHD:
1679 self.vdi_type = vhdutil.VDI_TYPE_VHD
1680 else:
1681 raise xs_errors.XenError(
1682 'VDICreate',
1683 opterr='Invalid VDI type {}'.format(type)
1684 )
1685 if self.vdi_type == vhdutil.VDI_TYPE_VHD:
1686 self._key_hash = vdi_sm_config.get('key_hash')
1688 # For the moment we don't have a path.
1689 self._update_device_name(None)
1690 return
1691 raise_bad_load(e)
1692 except Exception as e:
1693 raise_bad_load(e)
1695 def create(self, sr_uuid, vdi_uuid, size):
1696 # Usage example:
1697 # xe vdi-create sr-uuid=39a5826b-5a90-73eb-dd09-51e3a116f937
1698 # name-label="linstor-vdi-1" virtual-size=4096MiB sm-config:type=vhd
1700 # 1. Check if we are on the master and if the VDI doesn't exist.
1701 util.SMlog('LinstorVDI.create for {}'.format(self.uuid))
1702 if self._exists:
1703 raise xs_errors.XenError('VDIExists')
1705 assert self.uuid
1706 assert self.ty
1707 assert self.vdi_type
1709 # 2. Compute size and check space available.
1710 size = vhdutil.validate_and_round_vhd_size(int(size))
1711 volume_size = compute_volume_size(size, self.vdi_type)
1712 util.SMlog(
1713 'LinstorVDI.create: type={}, vhd-size={}, volume-size={}'
1714 .format(self.vdi_type, size, volume_size)
1715 )
1716 self.sr._ensure_space_available(volume_size)
1718 # 3. Set sm_config attribute of VDI parent class.
1719 self.sm_config = self.sr.srcmd.params['vdi_sm_config']
1721 # 4. Create!
1722 failed = False
1723 try:
1724 volume_name = None
1725 if self.ty == 'ha_statefile':
1726 volume_name = 'xcp-persistent-ha-statefile'
1727 elif self.ty == 'redo_log':
1728 volume_name = 'xcp-persistent-redo-log'
1730 self._linstor.create_volume(
1731 self.uuid, volume_size, persistent=False,
1732 volume_name=volume_name
1733 )
1734 volume_info = self._linstor.get_volume_info(self.uuid)
1736 self._update_device_name(volume_info.name)
1738 if self.vdi_type == vhdutil.VDI_TYPE_RAW:
1739 self.size = volume_info.virtual_size
1740 else:
1741 self.sr._vhdutil.create(
1742 self.path, size, False, self.MAX_METADATA_VIRT_SIZE
1743 )
1744 self.size = self.sr._vhdutil.get_size_virt(self.uuid)
1746 if self._key_hash:
1747 self.sr._vhdutil.set_key(self.path, self._key_hash)
1749 # Because vhdutil commands modify the volume data,
1750 # we must retrieve a new time the utilization size.
1751 volume_info = self._linstor.get_volume_info(self.uuid)
1753 volume_metadata = {
1754 NAME_LABEL_TAG: util.to_plain_string(self.label),
1755 NAME_DESCRIPTION_TAG: util.to_plain_string(self.description),
1756 IS_A_SNAPSHOT_TAG: False,
1757 SNAPSHOT_OF_TAG: '',
1758 SNAPSHOT_TIME_TAG: '',
1759 TYPE_TAG: self.ty,
1760 VDI_TYPE_TAG: self.vdi_type,
1761 READ_ONLY_TAG: bool(self.read_only),
1762 METADATA_OF_POOL_TAG: ''
1763 }
1764 self._linstor.set_volume_metadata(self.uuid, volume_metadata)
1766 # Set the open timeout to 1min to reduce CPU usage
1767 # in http-disk-server when a secondary server tries to open
1768 # an already opened volume.
1769 if self.ty == 'ha_statefile' or self.ty == 'redo_log':
1770 self._linstor.set_auto_promote_timeout(self.uuid, 600)
1772 self._linstor.mark_volume_as_persistent(self.uuid)
1773 except util.CommandException as e:
1774 failed = True
1775 raise xs_errors.XenError(
1776 'VDICreate', opterr='error {}'.format(e.code)
1777 )
1778 except Exception as e:
1779 failed = True
1780 raise xs_errors.XenError('VDICreate', opterr='error {}'.format(e))
1781 finally:
1782 if failed:
1783 util.SMlog('Unable to create VDI {}'.format(self.uuid))
1784 try:
1785 self._linstor.destroy_volume(self.uuid)
1786 except Exception as e:
1787 util.SMlog(
1788 'Ignoring exception after fail in LinstorVDI.create: '
1789 '{}'.format(e)
1790 )
1792 self.utilisation = volume_info.allocated_size
1793 self.sm_config['vdi_type'] = self.vdi_type
1795 self.ref = self._db_introduce()
1796 self.sr._update_stats(self.size)
1798 return VDI.VDI.get_params(self)
1800 def delete(self, sr_uuid, vdi_uuid, data_only=False):
1801 util.SMlog('LinstorVDI.delete for {}'.format(self.uuid))
1802 if self.attached:
1803 raise xs_errors.XenError('VDIInUse')
1805 if self.deleted:
1806 return super(LinstorVDI, self).delete(
1807 sr_uuid, vdi_uuid, data_only
1808 )
1810 vdi_ref = self.sr.srcmd.params['vdi_ref']
1811 if not self.session.xenapi.VDI.get_managed(vdi_ref):
1812 raise xs_errors.XenError(
1813 'VDIDelete',
1814 opterr='Deleting non-leaf node not permitted'
1815 )
1817 try:
1818 # Remove from XAPI and delete from LINSTOR.
1819 self._linstor.destroy_volume(self.uuid)
1820 if not data_only:
1821 self._db_forget()
1823 self.sr.lock.cleanupAll(vdi_uuid)
1824 except Exception as e:
1825 util.SMlog(
1826 'Failed to remove the volume (maybe is leaf coalescing) '
1827 'for {} err: {}'.format(self.uuid, e)
1828 )
1829 raise xs_errors.XenError('VDIDelete', opterr=str(e))
1831 if self.uuid in self.sr.vdis:
1832 del self.sr.vdis[self.uuid]
1834 # TODO: Check size after delete.
1835 self.sr._update_stats(-self.size)
1836 self.sr._kick_gc()
1837 return super(LinstorVDI, self).delete(sr_uuid, vdi_uuid, data_only)
1839 def attach(self, sr_uuid, vdi_uuid):
1840 util.SMlog('LinstorVDI.attach for {}'.format(self.uuid))
1841 attach_from_config = self.sr.srcmd.cmd == 'vdi_attach_from_config'
1842 if (
1843 not attach_from_config or
1844 self.sr.srcmd.params['vdi_uuid'] != self.uuid
1845 ) and self.sr._journaler.has_entries(self.uuid):
1846 raise xs_errors.XenError(
1847 'VDIUnavailable',
1848 opterr='Interrupted operation detected on this VDI, '
1849 'scan SR first to trigger auto-repair'
1850 )
1852 if not attach_from_config or self.sr._is_master:
1853 writable = 'args' not in self.sr.srcmd.params or \
1854 self.sr.srcmd.params['args'][0] == 'true'
1856 # We need to inflate the volume if we don't have enough place
1857 # to mount the VHD image. I.e. the volume capacity must be greater
1858 # than the VHD size + bitmap size.
1859 need_inflate = True
1860 if (
1861 self.vdi_type == vhdutil.VDI_TYPE_RAW or
1862 not writable or
1863 self.capacity >= compute_volume_size(self.size, self.vdi_type)
1864 ):
1865 need_inflate = False
1867 if need_inflate:
1868 try:
1869 self._prepare_thin(True)
1870 except Exception as e:
1871 raise xs_errors.XenError(
1872 'VDIUnavailable',
1873 opterr='Failed to attach VDI during "prepare thin": {}'
1874 .format(e)
1875 )
1877 if not hasattr(self, 'xenstore_data'):
1878 self.xenstore_data = {}
1879 self.xenstore_data['storage-type'] = LinstorSR.DRIVER_TYPE
1881 if (
1882 USE_HTTP_NBD_SERVERS and
1883 attach_from_config and
1884 self.path.startswith('/dev/http-nbd/')
1885 ):
1886 return self._attach_using_http_nbd()
1888 # Ensure we have a path...
1889 while vdi_uuid:
1890 path = self._linstor.get_device_path(vdi_uuid)
1891 if not util.pathexists(path):
1892 raise xs_errors.XenError(
1893 'VDIUnavailable', opterr='Could not find: {}'.format(path)
1894 )
1895 vdi_uuid = self.sr._vhdutil.get_vhd_info(vdi_uuid).parentUuid
1897 self.attached = True
1898 return VDI.VDI.attach(self, self.sr.uuid, self.uuid)
1900 def detach(self, sr_uuid, vdi_uuid):
1901 util.SMlog('LinstorVDI.detach for {}'.format(self.uuid))
1902 detach_from_config = self.sr.srcmd.cmd == 'vdi_detach_from_config'
1903 self.attached = False
1905 if detach_from_config and self.path.startswith('/dev/http-nbd/'):
1906 return self._detach_using_http_nbd()
1908 if self.vdi_type == vhdutil.VDI_TYPE_RAW:
1909 return
1911 # The VDI is already deflated if the VHD image size + metadata is
1912 # equal to the LINSTOR volume size.
1913 volume_size = compute_volume_size(self.size, self.vdi_type)
1914 already_deflated = self.capacity <= volume_size
1916 if already_deflated:
1917 util.SMlog(
1918 'VDI {} already deflated (old volume size={}, volume size={})'
1919 .format(self.uuid, self.capacity, volume_size)
1920 )
1922 need_deflate = True
1923 if already_deflated:
1924 need_deflate = False
1925 elif self.sr._provisioning == 'thick':
1926 need_deflate = False
1928 vdi_ref = self.sr.srcmd.params['vdi_ref']
1929 if self.session.xenapi.VDI.get_is_a_snapshot(vdi_ref):
1930 need_deflate = True
1932 if need_deflate:
1933 try:
1934 self._prepare_thin(False)
1935 except Exception as e:
1936 raise xs_errors.XenError(
1937 'VDIUnavailable',
1938 opterr='Failed to detach VDI during "prepare thin": {}'
1939 .format(e)
1940 )
1942 def resize(self, sr_uuid, vdi_uuid, size):
1943 util.SMlog('LinstorVDI.resize for {}'.format(self.uuid))
1944 if not self.sr._is_master:
1945 raise xs_errors.XenError(
1946 'VDISize',
1947 opterr='resize on slave not allowed'
1948 )
1950 if self.hidden:
1951 raise xs_errors.XenError('VDIUnavailable', opterr='hidden VDI')
1953 # Compute the virtual VHD and DRBD volume size.
1954 size = vhdutil.validate_and_round_vhd_size(int(size))
1955 volume_size = compute_volume_size(size, self.vdi_type)
1956 util.SMlog(
1957 'LinstorVDI.resize: type={}, vhd-size={}, volume-size={}'
1958 .format(self.vdi_type, size, volume_size)
1959 )
1961 if size < self.size:
1962 util.SMlog(
1963 'vdi_resize: shrinking not supported: '
1964 '(current size: {}, new size: {})'.format(self.size, size)
1965 )
1966 raise xs_errors.XenError('VDISize', opterr='shrinking not allowed')
1968 if size == self.size:
1969 return VDI.VDI.get_params(self)
1971 if self.vdi_type == vhdutil.VDI_TYPE_RAW:
1972 old_volume_size = self.size
1973 else:
1974 old_volume_size = self.utilisation
1975 if self.sr._provisioning == 'thin':
1976 # VDI is currently deflated, so keep it deflated.
1977 new_volume_size = old_volume_size
1978 assert new_volume_size >= old_volume_size
1980 space_needed = new_volume_size - old_volume_size
1981 self.sr._ensure_space_available(space_needed)
1983 old_size = self.size
1984 if self.vdi_type == vhdutil.VDI_TYPE_RAW:
1985 self._linstor.resize(self.uuid, new_volume_size)
1986 else:
1987 if new_volume_size != old_volume_size:
1988 inflate(
1989 self.sr._journaler, self._linstor, self.uuid, self.path,
1990 new_volume_size, old_volume_size
1991 )
1992 self.sr._vhdutil.set_size_virt_fast(self.path, size)
1994 # Reload size attributes.
1995 self._load_this()
1997 vdi_ref = self.sr.srcmd.params['vdi_ref']
1998 self.session.xenapi.VDI.set_virtual_size(vdi_ref, str(self.size))
1999 self.session.xenapi.VDI.set_physical_utilisation(
2000 vdi_ref, str(self.utilisation)
2001 )
2002 self.sr._update_stats(self.size - old_size)
2003 return VDI.VDI.get_params(self)
2005 def clone(self, sr_uuid, vdi_uuid):
2006 return self._do_snapshot(sr_uuid, vdi_uuid, VDI.SNAPSHOT_DOUBLE)
2008 def compose(self, sr_uuid, vdi1, vdi2):
2009 util.SMlog('VDI.compose for {} -> {}'.format(vdi2, vdi1))
2010 if self.vdi_type != vhdutil.VDI_TYPE_VHD:
2011 raise xs_errors.XenError('Unimplemented')
2013 parent_uuid = vdi1
2014 parent_path = self._linstor.get_device_path(parent_uuid)
2016 # We must pause tapdisk to correctly change the parent. Otherwise we
2017 # have a readonly error.
2018 # See: https://github.com/xapi-project/xen-api/blob/b3169a16d36dae0654881b336801910811a399d9/ocaml/xapi/storage_migrate.ml#L928-L929
2019 # and: https://github.com/xapi-project/xen-api/blob/b3169a16d36dae0654881b336801910811a399d9/ocaml/xapi/storage_migrate.ml#L775
2021 if not blktap2.VDI.tap_pause(self.session, self.sr.uuid, self.uuid):
2022 raise util.SMException('Failed to pause VDI {}'.format(self.uuid))
2023 try:
2024 self.sr._vhdutil.set_parent(self.path, parent_path, False)
2025 self.sr._vhdutil.set_hidden(parent_path)
2026 self.sr.session.xenapi.VDI.set_managed(
2027 self.sr.srcmd.params['args'][0], False
2028 )
2029 finally:
2030 blktap2.VDI.tap_unpause(self.session, self.sr.uuid, self.uuid)
2032 if not blktap2.VDI.tap_refresh(self.session, self.sr.uuid, self.uuid):
2033 raise util.SMException(
2034 'Failed to refresh VDI {}'.format(self.uuid)
2035 )
2037 util.SMlog('Compose done')
2039 def generate_config(self, sr_uuid, vdi_uuid):
2040 """
2041 Generate the XML config required to attach and activate
2042 a VDI for use when XAPI is not running. Attach and
2043 activation is handled by vdi_attach_from_config below.
2044 """
2046 util.SMlog('LinstorVDI.generate_config for {}'.format(self.uuid))
2048 resp = {}
2049 resp['device_config'] = self.sr.dconf
2050 resp['sr_uuid'] = sr_uuid
2051 resp['vdi_uuid'] = self.uuid
2052 resp['sr_sm_config'] = self.sr.sm_config
2053 resp['command'] = 'vdi_attach_from_config'
2055 # By default, we generate a normal config.
2056 # But if the disk is persistent, we must use a HTTP/NBD
2057 # server to ensure we can always write or read data.
2058 # Why? DRBD is unsafe when used with more than 4 hosts:
2059 # We are limited to use 1 diskless and 3 full.
2060 # We can't increase this limitation, so we use a NBD/HTTP device
2061 # instead.
2062 volume_name = self._linstor.get_volume_name(self.uuid)
2063 if not USE_HTTP_NBD_SERVERS or volume_name not in [
2064 'xcp-persistent-ha-statefile', 'xcp-persistent-redo-log'
2065 ]:
2066 if not self.path or not util.pathexists(self.path):
2067 available = False
2068 # Try to refresh symlink path...
2069 try:
2070 self.path = self._linstor.get_device_path(vdi_uuid)
2071 available = util.pathexists(self.path)
2072 except Exception:
2073 pass
2074 if not available:
2075 raise xs_errors.XenError('VDIUnavailable')
2077 resp['vdi_path'] = self.path
2078 else:
2079 # Axiom: DRBD device is present on at least one host.
2080 resp['vdi_path'] = '/dev/http-nbd/' + volume_name
2082 config = xmlrpc.client.dumps(tuple([resp]), 'vdi_attach_from_config')
2083 return xmlrpc.client.dumps((config,), "", True)
2085 def attach_from_config(self, sr_uuid, vdi_uuid):
2086 """
2087 Attach and activate a VDI using config generated by
2088 vdi_generate_config above. This is used for cases such as
2089 the HA state-file and the redo-log.
2090 """
2092 util.SMlog('LinstorVDI.attach_from_config for {}'.format(vdi_uuid))
2094 try:
2095 if not util.pathexists(self.sr.path):
2096 self.sr.attach(sr_uuid)
2098 if not DRIVER_CONFIG['ATTACH_FROM_CONFIG_WITH_TAPDISK']:
2099 return self.attach(sr_uuid, vdi_uuid)
2100 except Exception:
2101 util.logException('LinstorVDI.attach_from_config')
2102 raise xs_errors.XenError(
2103 'SRUnavailable',
2104 opterr='Unable to attach from config'
2105 )
2107 def reset_leaf(self, sr_uuid, vdi_uuid):
2108 if self.vdi_type != vhdutil.VDI_TYPE_VHD:
2109 raise xs_errors.XenError('Unimplemented')
2111 if not self.sr._vhdutil.has_parent(self.uuid):
2112 raise util.SMException(
2113 'ERROR: VDI {} has no parent, will not reset contents'
2114 .format(self.uuid)
2115 )
2117 self.sr._vhdutil.kill_data(self.path)
2119 def _load_this(self):
2120 volume_metadata = None
2121 if self.sr._all_volume_metadata_cache:
2122 volume_metadata = self.sr._all_volume_metadata_cache.get(self.uuid)
2123 if volume_metadata is None:
2124 volume_metadata = self._linstor.get_volume_metadata(self.uuid)
2126 volume_info = None
2127 if self.sr._all_volume_info_cache:
2128 volume_info = self.sr._all_volume_info_cache.get(self.uuid)
2129 if volume_info is None:
2130 volume_info = self._linstor.get_volume_info(self.uuid)
2132 # Contains the max physical size used on a disk.
2133 # When LINSTOR LVM driver is used, the size should be similar to
2134 # virtual size (i.e. the LINSTOR max volume size).
2135 # When LINSTOR Thin LVM driver is used, the used physical size should
2136 # be lower than virtual size at creation.
2137 # The physical size increases after each write in a new block.
2138 self.utilisation = volume_info.allocated_size
2139 self.capacity = volume_info.virtual_size
2141 if self.vdi_type == vhdutil.VDI_TYPE_RAW:
2142 self.hidden = int(volume_metadata.get(HIDDEN_TAG) or 0)
2143 self.size = volume_info.virtual_size
2144 self.parent = ''
2145 else:
2146 vhd_info = self.sr._vhdutil.get_vhd_info(self.uuid)
2147 self.hidden = vhd_info.hidden
2148 self.size = vhd_info.sizeVirt
2149 self.parent = vhd_info.parentUuid
2151 if self.hidden:
2152 self.managed = False
2154 self.label = volume_metadata.get(NAME_LABEL_TAG) or ''
2155 self.description = volume_metadata.get(NAME_DESCRIPTION_TAG) or ''
2157 # Update sm_config_override of VDI parent class.
2158 self.sm_config_override = {'vhd-parent': self.parent or None}
2160 def _mark_hidden(self, hidden=True):
2161 if self.hidden == hidden:
2162 return
2164 if self.vdi_type == vhdutil.VDI_TYPE_VHD:
2165 self.sr._vhdutil.set_hidden(self.path, hidden)
2166 else:
2167 self._linstor.update_volume_metadata(self.uuid, {
2168 HIDDEN_TAG: hidden
2169 })
2170 self.hidden = hidden
2172 def update(self, sr_uuid, vdi_uuid):
2173 xenapi = self.session.xenapi
2174 vdi_ref = xenapi.VDI.get_by_uuid(self.uuid)
2176 volume_metadata = {
2177 NAME_LABEL_TAG: util.to_plain_string(
2178 xenapi.VDI.get_name_label(vdi_ref)
2179 ),
2180 NAME_DESCRIPTION_TAG: util.to_plain_string(
2181 xenapi.VDI.get_name_description(vdi_ref)
2182 )
2183 }
2185 try:
2186 self._linstor.update_volume_metadata(self.uuid, volume_metadata)
2187 except LinstorVolumeManagerError as e:
2188 if e.code == LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS:
2189 raise xs_errors.XenError(
2190 'VDIUnavailable',
2191 opterr='LINSTOR volume {} not found'.format(self.uuid)
2192 )
2193 raise xs_errors.XenError('VDIUnavailable', opterr=str(e))
2195 # --------------------------------------------------------------------------
2196 # Thin provisioning.
2197 # --------------------------------------------------------------------------
2199 def _prepare_thin(self, attach):
2200 if self.sr._is_master:
2201 if attach:
2202 attach_thin(
2203 self.session, self.sr._journaler, self._linstor,
2204 self.sr.uuid, self.uuid
2205 )
2206 else:
2207 detach_thin(
2208 self.session, self._linstor, self.sr.uuid, self.uuid
2209 )
2210 else:
2211 fn = 'attach' if attach else 'detach'
2213 master = util.get_master_ref(self.session)
2215 args = {
2216 'groupName': self.sr._group_name,
2217 'srUuid': self.sr.uuid,
2218 'vdiUuid': self.uuid
2219 }
2220 self.sr._exec_manager_command(master, fn, args, 'VDIUnavailable')
2222 # Reload size attrs after inflate or deflate!
2223 self._load_this()
2224 self.sr._update_physical_size()
2226 vdi_ref = self.sr.srcmd.params['vdi_ref']
2227 self.session.xenapi.VDI.set_physical_utilisation(
2228 vdi_ref, str(self.utilisation)
2229 )
2231 self.session.xenapi.SR.set_physical_utilisation(
2232 self.sr.sr_ref, str(self.sr.physical_utilisation)
2233 )
2235 # --------------------------------------------------------------------------
2236 # Generic helpers.
2237 # --------------------------------------------------------------------------
2239 def _determine_type_and_path(self):
2240 """
2241 Determine whether this is a RAW or a VHD VDI.
2242 """
2244 # 1. Check vdi_ref and vdi_type in config.
2245 try:
2246 vdi_ref = self.session.xenapi.VDI.get_by_uuid(self.uuid)
2247 if vdi_ref:
2248 sm_config = self.session.xenapi.VDI.get_sm_config(vdi_ref)
2249 vdi_type = sm_config.get('vdi_type')
2250 if vdi_type:
2251 # Update parent fields.
2252 self.vdi_type = vdi_type
2253 self.sm_config_override = sm_config
2254 self._update_device_name(
2255 self._linstor.get_volume_name(self.uuid)
2256 )
2257 return
2258 except Exception:
2259 pass
2261 # 2. Otherwise use the LINSTOR volume manager directly.
2262 # It's probably a new VDI created via snapshot.
2263 volume_metadata = self._linstor.get_volume_metadata(self.uuid)
2264 self.vdi_type = volume_metadata.get(VDI_TYPE_TAG)
2265 if not self.vdi_type:
2266 raise xs_errors.XenError(
2267 'VDIUnavailable',
2268 opterr='failed to get vdi_type in metadata'
2269 )
2270 self._update_device_name(self._linstor.get_volume_name(self.uuid))
2272 def _update_device_name(self, device_name):
2273 self._device_name = device_name
2275 # Mark path of VDI parent class.
2276 if device_name:
2277 self.path = self._linstor.build_device_path(self._device_name)
2278 else:
2279 self.path = None
2281 def _create_snapshot(self, snap_uuid, snap_of_uuid=None):
2282 """
2283 Snapshot self and return the snapshot VDI object.
2284 """
2286 # 1. Create a new LINSTOR volume with the same size than self.
2287 snap_path = self._linstor.shallow_clone_volume(
2288 self.uuid, snap_uuid, persistent=False
2289 )
2291 # 2. Write the snapshot content.
2292 is_raw = (self.vdi_type == vhdutil.VDI_TYPE_RAW)
2293 self.sr._vhdutil.snapshot(
2294 snap_path, self.path, is_raw, self.MAX_METADATA_VIRT_SIZE
2295 )
2297 # 3. Get snapshot parent.
2298 snap_parent = self.sr._vhdutil.get_parent(snap_uuid)
2300 # 4. Update metadata.
2301 util.SMlog('Set VDI {} metadata of snapshot'.format(snap_uuid))
2302 volume_metadata = {
2303 NAME_LABEL_TAG: util.to_plain_string(self.label),
2304 NAME_DESCRIPTION_TAG: util.to_plain_string(self.description),
2305 IS_A_SNAPSHOT_TAG: bool(snap_of_uuid),
2306 SNAPSHOT_OF_TAG: snap_of_uuid,
2307 SNAPSHOT_TIME_TAG: '',
2308 TYPE_TAG: self.ty,
2309 VDI_TYPE_TAG: vhdutil.VDI_TYPE_VHD,
2310 READ_ONLY_TAG: False,
2311 METADATA_OF_POOL_TAG: ''
2312 }
2313 self._linstor.set_volume_metadata(snap_uuid, volume_metadata)
2315 # 5. Set size.
2316 snap_vdi = LinstorVDI(self.sr, snap_uuid)
2317 if not snap_vdi._exists:
2318 raise xs_errors.XenError('VDISnapshot')
2320 volume_info = self._linstor.get_volume_info(snap_uuid)
2322 snap_vdi.size = self.sr._vhdutil.get_size_virt(snap_uuid)
2323 snap_vdi.utilisation = volume_info.allocated_size
2325 # 6. Update sm config.
2326 snap_vdi.sm_config = {}
2327 snap_vdi.sm_config['vdi_type'] = snap_vdi.vdi_type
2328 if snap_parent:
2329 snap_vdi.sm_config['vhd-parent'] = snap_parent
2330 snap_vdi.parent = snap_parent
2332 snap_vdi.label = self.label
2333 snap_vdi.description = self.description
2335 self._linstor.mark_volume_as_persistent(snap_uuid)
2337 return snap_vdi
2339 # --------------------------------------------------------------------------
2340 # Implement specific SR methods.
2341 # --------------------------------------------------------------------------
2343 def _rename(self, oldpath, newpath):
2344 # TODO: I'm not sure... Used by CBT.
2345 volume_uuid = self._linstor.get_volume_uuid_from_device_path(oldpath)
2346 self._linstor.update_volume_name(volume_uuid, newpath)
2348 def _do_snapshot(
2349 self, sr_uuid, vdi_uuid, snap_type, secondary=None, cbtlog=None
2350 ):
2351 # If cbt enabled, save file consistency state.
2352 if cbtlog is not None:
2353 if blktap2.VDI.tap_status(self.session, vdi_uuid):
2354 consistency_state = False
2355 else:
2356 consistency_state = True
2357 util.SMlog(
2358 'Saving log consistency state of {} for vdi: {}'
2359 .format(consistency_state, vdi_uuid)
2360 )
2361 else:
2362 consistency_state = None
2364 if self.vdi_type != vhdutil.VDI_TYPE_VHD:
2365 raise xs_errors.XenError('Unimplemented')
2367 if not blktap2.VDI.tap_pause(self.session, sr_uuid, vdi_uuid):
2368 raise util.SMException('Failed to pause VDI {}'.format(vdi_uuid))
2369 try:
2370 return self._snapshot(snap_type, cbtlog, consistency_state)
2371 finally:
2372 blktap2.VDI.tap_unpause(self.session, sr_uuid, vdi_uuid, secondary)
2374 def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None):
2375 util.SMlog(
2376 'LinstorVDI._snapshot for {} (type {})'
2377 .format(self.uuid, snap_type)
2378 )
2380 # 1. Checks...
2381 if self.hidden:
2382 raise xs_errors.XenError('VDIClone', opterr='hidden VDI')
2384 depth = self.sr._vhdutil.get_depth(self.uuid)
2385 if depth == -1:
2386 raise xs_errors.XenError(
2387 'VDIUnavailable',
2388 opterr='failed to get VHD depth'
2389 )
2390 elif depth >= vhdutil.MAX_CHAIN_SIZE:
2391 raise xs_errors.XenError('SnapshotChainTooLong')
2393 # Ensure we have a valid path if we don't have a local diskful.
2394 self.sr._linstor.get_device_path(self.uuid)
2396 volume_path = self.path
2397 if not util.pathexists(volume_path):
2398 raise xs_errors.XenError(
2399 'EIO',
2400 opterr='IO error checking path {}'.format(volume_path)
2401 )
2403 # 2. Create base and snap uuid (if required) and a journal entry.
2404 base_uuid = util.gen_uuid()
2405 snap_uuid = None
2407 if snap_type == VDI.SNAPSHOT_DOUBLE:
2408 snap_uuid = util.gen_uuid()
2410 clone_info = '{}_{}'.format(base_uuid, snap_uuid)
2412 active_uuid = self.uuid
2413 self.sr._journaler.create(
2414 LinstorJournaler.CLONE, active_uuid, clone_info
2415 )
2417 try:
2418 # 3. Self becomes the new base.
2419 # The device path remains the same.
2420 self._linstor.update_volume_uuid(self.uuid, base_uuid)
2421 self.uuid = base_uuid
2422 self.location = self.uuid
2423 self.read_only = True
2424 self.managed = False
2426 # 4. Create snapshots (new active and snap).
2427 active_vdi = self._create_snapshot(active_uuid)
2429 snap_vdi = None
2430 if snap_type == VDI.SNAPSHOT_DOUBLE:
2431 snap_vdi = self._create_snapshot(snap_uuid, active_uuid)
2433 self.label = 'base copy'
2434 self.description = ''
2436 # 5. Mark the base VDI as hidden so that it does not show up
2437 # in subsequent scans.
2438 self._mark_hidden()
2439 self._linstor.update_volume_metadata(
2440 self.uuid, {READ_ONLY_TAG: True}
2441 )
2443 # 6. We must update the new active VDI with the "paused" and
2444 # "host_" properties. Why? Because the original VDI has been
2445 # paused and we we must unpause it after the snapshot.
2446 # See: `tap_unpause` in `blktap2.py`.
2447 vdi_ref = self.session.xenapi.VDI.get_by_uuid(active_uuid)
2448 sm_config = self.session.xenapi.VDI.get_sm_config(vdi_ref)
2449 for key in [x for x in sm_config.keys() if x == 'paused' or x.startswith('host_')]:
2450 active_vdi.sm_config[key] = sm_config[key]
2452 # 7. Verify parent locator field of both children and
2453 # delete base if unused.
2454 introduce_parent = True
2455 try:
2456 snap_parent = None
2457 if snap_vdi:
2458 snap_parent = snap_vdi.parent
2460 if active_vdi.parent != self.uuid and (
2461 snap_type == VDI.SNAPSHOT_SINGLE or
2462 snap_type == VDI.SNAPSHOT_INTERNAL or
2463 snap_parent != self.uuid
2464 ):
2465 util.SMlog(
2466 'Destroy unused base volume: {} (path={})'
2467 .format(self.uuid, self.path)
2468 )
2469 introduce_parent = False
2470 self._linstor.destroy_volume(self.uuid)
2471 except Exception as e:
2472 util.SMlog('Ignoring exception: {}'.format(e))
2473 pass
2475 # 8. Introduce the new VDI records.
2476 if snap_vdi:
2477 # If the parent is encrypted set the key_hash for the
2478 # new snapshot disk.
2479 vdi_ref = self.sr.srcmd.params['vdi_ref']
2480 sm_config = self.session.xenapi.VDI.get_sm_config(vdi_ref)
2481 # TODO: Maybe remove key_hash support.
2482 if 'key_hash' in sm_config:
2483 snap_vdi.sm_config['key_hash'] = sm_config['key_hash']
2484 # If we have CBT enabled on the VDI,
2485 # set CBT status for the new snapshot disk.
2486 if cbtlog:
2487 snap_vdi.cbt_enabled = True
2489 if snap_vdi:
2490 snap_vdi_ref = snap_vdi._db_introduce()
2491 util.SMlog(
2492 'vdi_clone: introduced VDI: {} ({})'
2493 .format(snap_vdi_ref, snap_vdi.uuid)
2494 )
2495 if introduce_parent:
2496 base_vdi_ref = self._db_introduce()
2497 self.session.xenapi.VDI.set_managed(base_vdi_ref, False)
2498 util.SMlog(
2499 'vdi_clone: introduced VDI: {} ({})'
2500 .format(base_vdi_ref, self.uuid)
2501 )
2502 self._linstor.update_volume_metadata(self.uuid, {
2503 NAME_LABEL_TAG: util.to_plain_string(self.label),
2504 NAME_DESCRIPTION_TAG: util.to_plain_string(
2505 self.description
2506 ),
2507 READ_ONLY_TAG: True,
2508 METADATA_OF_POOL_TAG: ''
2509 })
2511 # 9. Update cbt files if user created snapshot (SNAPSHOT_DOUBLE)
2512 if snap_type == VDI.SNAPSHOT_DOUBLE and cbtlog:
2513 try:
2514 self._cbt_snapshot(snap_uuid, cbt_consistency)
2515 except Exception:
2516 # CBT operation failed.
2517 # TODO: Implement me.
2518 raise
2520 if snap_type != VDI.SNAPSHOT_INTERNAL:
2521 self.sr._update_stats(self.size)
2523 # 10. Return info on the new user-visible leaf VDI.
2524 ret_vdi = snap_vdi
2525 if not ret_vdi:
2526 ret_vdi = self
2527 if not ret_vdi:
2528 ret_vdi = active_vdi
2530 vdi_ref = self.sr.srcmd.params['vdi_ref']
2531 self.session.xenapi.VDI.set_sm_config(
2532 vdi_ref, active_vdi.sm_config
2533 )
2534 except Exception as e:
2535 util.logException('Failed to snapshot!')
2536 try:
2537 self.sr._handle_interrupted_clone(
2538 active_uuid, clone_info, force_undo=True
2539 )
2540 self.sr._journaler.remove(LinstorJournaler.CLONE, active_uuid)
2541 except Exception as e:
2542 util.SMlog(
2543 'WARNING: Failed to clean up failed snapshot: {}'
2544 .format(e)
2545 )
2546 raise xs_errors.XenError('VDIClone', opterr=str(e))
2548 self.sr._journaler.remove(LinstorJournaler.CLONE, active_uuid)
2550 return ret_vdi.get_params()
2552 @staticmethod
2553 def _start_persistent_http_server(volume_name):
2554 pid_path = None
2555 http_server = None
2557 try:
2558 if volume_name == 'xcp-persistent-ha-statefile':
2559 port = '8076'
2560 else:
2561 port = '8077'
2563 try:
2564 # Use a timeout call because XAPI may be unusable on startup
2565 # or if the host has been ejected. So in this case the call can
2566 # block indefinitely.
2567 session = util.timeout_call(5, util.get_localAPI_session)
2568 host_ip = util.get_this_host_address(session)
2569 except:
2570 # Fallback using the XHA file if session not available.
2571 host_ip, _ = get_ips_from_xha_config_file()
2572 if not host_ip:
2573 raise Exception(
2574 'Cannot start persistent HTTP server: no XAPI session, nor XHA config file'
2575 )
2577 arguments = [
2578 'http-disk-server',
2579 '--disk',
2580 '/dev/drbd/by-res/{}/0'.format(volume_name),
2581 '--ip',
2582 host_ip,
2583 '--port',
2584 port
2585 ]
2587 util.SMlog('Starting {} on port {}...'.format(arguments[0], port))
2588 http_server = subprocess.Popen(
2589 [FORK_LOG_DAEMON] + arguments,
2590 stdout=subprocess.PIPE,
2591 stderr=subprocess.STDOUT,
2592 # Ensure we use another group id to kill this process without
2593 # touch the current one.
2594 preexec_fn=os.setsid
2595 )
2597 pid_path = '/run/http-server-{}.pid'.format(volume_name)
2598 with open(pid_path, 'w') as pid_file:
2599 pid_file.write(str(http_server.pid))
2601 reg_server_ready = re.compile("Server ready!$")
2602 def is_ready():
2603 while http_server.poll() is None:
2604 line = http_server.stdout.readline()
2605 if reg_server_ready.search(line):
2606 return True
2607 return False
2608 try:
2609 if not util.timeout_call(10, is_ready):
2610 raise Exception('Failed to wait HTTP server startup, bad output')
2611 except util.TimeoutException:
2612 raise Exception('Failed to wait for HTTP server startup during given delay')
2613 except Exception as e:
2614 if pid_path:
2615 try:
2616 os.remove(pid_path)
2617 except Exception:
2618 pass
2620 if http_server:
2621 # Kill process and children in this case...
2622 try:
2623 os.killpg(os.getpgid(http_server.pid), signal.SIGTERM)
2624 except:
2625 pass
2627 raise xs_errors.XenError(
2628 'VDIUnavailable',
2629 opterr='Failed to start http-server: {}'.format(e)
2630 )
2632 def _start_persistent_nbd_server(self, volume_name):
2633 pid_path = None
2634 nbd_path = None
2635 nbd_server = None
2637 try:
2638 # We use a precomputed device size.
2639 # So if the XAPI is modified, we must update these values!
2640 if volume_name == 'xcp-persistent-ha-statefile':
2641 # See: https://github.com/xapi-project/xen-api/blob/703479fa448a8d7141954bb6e8964d8e25c4ac2e/ocaml/xapi/xha_statefile.ml#L32-L37
2642 port = '8076'
2643 device_size = 4 * 1024 * 1024
2644 else:
2645 # See: https://github.com/xapi-project/xen-api/blob/703479fa448a8d7141954bb6e8964d8e25c4ac2e/ocaml/database/redo_log.ml#L41-L44
2646 port = '8077'
2647 device_size = 256 * 1024 * 1024
2649 try:
2650 session = util.timeout_call(5, util.get_localAPI_session)
2651 ips = util.get_host_addresses(session)
2652 except Exception as e:
2653 _, ips = get_ips_from_xha_config_file()
2654 if not ips:
2655 raise Exception(
2656 'Cannot start persistent NBD server: no XAPI session, nor XHA config file ({})'.format(e)
2657 )
2658 ips = ips.values()
2660 arguments = [
2661 'nbd-http-server',
2662 '--socket-path',
2663 '/run/{}.socket'.format(volume_name),
2664 '--nbd-name',
2665 volume_name,
2666 '--urls',
2667 ','.join(['http://' + ip + ':' + port for ip in ips]),
2668 '--device-size',
2669 str(device_size)
2670 ]
2672 util.SMlog('Starting {} using port {}...'.format(arguments[0], port))
2673 nbd_server = subprocess.Popen(
2674 [FORK_LOG_DAEMON] + arguments,
2675 stdout=subprocess.PIPE,
2676 stderr=subprocess.STDOUT,
2677 # Ensure we use another group id to kill this process without
2678 # touch the current one.
2679 preexec_fn=os.setsid
2680 )
2682 pid_path = '/run/nbd-server-{}.pid'.format(volume_name)
2683 with open(pid_path, 'w') as pid_file:
2684 pid_file.write(str(nbd_server.pid))
2686 reg_nbd_path = re.compile("NBD `(/dev/nbd[0-9]+)` is now attached.$")
2687 def get_nbd_path():
2688 while nbd_server.poll() is None:
2689 line = nbd_server.stdout.readline()
2690 match = reg_nbd_path.search(line)
2691 if match:
2692 return match.group(1)
2693 # Use a timeout to never block the smapi if there is a problem.
2694 try:
2695 nbd_path = util.timeout_call(10, get_nbd_path)
2696 if nbd_path is None:
2697 raise Exception('Empty NBD path (NBD server is probably dead)')
2698 except util.TimeoutException:
2699 raise Exception('Unable to read NBD path')
2701 util.SMlog('Create symlink: {} -> {}'.format(self.path, nbd_path))
2702 os.symlink(nbd_path, self.path)
2703 except Exception as e:
2704 if pid_path:
2705 try:
2706 os.remove(pid_path)
2707 except Exception:
2708 pass
2710 if nbd_path:
2711 try:
2712 os.remove(nbd_path)
2713 except Exception:
2714 pass
2716 if nbd_server:
2717 # Kill process and children in this case...
2718 try:
2719 os.killpg(os.getpgid(nbd_server.pid), signal.SIGTERM)
2720 except:
2721 pass
2723 raise xs_errors.XenError(
2724 'VDIUnavailable',
2725 opterr='Failed to start nbd-server: {}'.format(e)
2726 )
2728 @classmethod
2729 def _kill_persistent_server(self, type, volume_name, sig):
2730 try:
2731 path = '/run/{}-server-{}.pid'.format(type, volume_name)
2732 if not os.path.exists(path):
2733 return
2735 pid = None
2736 with open(path, 'r') as pid_file:
2737 try:
2738 pid = int(pid_file.read())
2739 except Exception:
2740 pass
2742 if pid is not None and util.check_pid_exists(pid):
2743 util.SMlog('Kill {} server {} (pid={})'.format(type, path, pid))
2744 try:
2745 os.killpg(os.getpgid(pid), sig)
2746 except Exception as e:
2747 util.SMlog('Failed to kill {} server: {}'.format(type, e))
2749 os.remove(path)
2750 except:
2751 pass
2753 @classmethod
2754 def _kill_persistent_http_server(self, volume_name, sig=signal.SIGTERM):
2755 return self._kill_persistent_server('nbd', volume_name, sig)
2757 @classmethod
2758 def _kill_persistent_nbd_server(self, volume_name, sig=signal.SIGTERM):
2759 return self._kill_persistent_server('http', volume_name, sig)
2761 def _check_http_nbd_volume_name(self):
2762 volume_name = self.path[14:]
2763 if volume_name not in [
2764 'xcp-persistent-ha-statefile', 'xcp-persistent-redo-log'
2765 ]:
2766 raise xs_errors.XenError(
2767 'VDIUnavailable',
2768 opterr='Unsupported path: {}'.format(self.path)
2769 )
2770 return volume_name
2772 def _attach_using_http_nbd(self):
2773 volume_name = self._check_http_nbd_volume_name()
2775 # Ensure there is no NBD and HTTP server running.
2776 self._kill_persistent_nbd_server(volume_name)
2777 self._kill_persistent_http_server(volume_name)
2779 # 0. Fetch drbd path.
2780 must_get_device_path = True
2781 if not self.sr._is_master:
2782 # We are on a slave, we must try to find a diskful locally.
2783 try:
2784 volume_info = self._linstor.get_volume_info(self.uuid)
2785 except Exception as e:
2786 raise xs_errors.XenError(
2787 'VDIUnavailable',
2788 opterr='Cannot get volume info of {}: {}'
2789 .format(self.uuid, e)
2790 )
2792 hostname = socket.gethostname()
2793 must_get_device_path = hostname in volume_info.diskful
2795 drbd_path = None
2796 if must_get_device_path or self.sr._is_master:
2797 # If we are master, we must ensure we have a diskless
2798 # or diskful available to init HA.
2799 # It also avoid this error in xensource.log
2800 # (/usr/libexec/xapi/cluster-stack/xhad/ha_set_pool_state):
2801 # init exited with code 8 [stdout = ''; stderr = 'SF: failed to write in State-File \x10 (fd 4208696). (sys 28)\x0A']
2802 # init returned MTC_EXIT_CAN_NOT_ACCESS_STATEFILE (State-File is inaccessible)
2803 available = False
2804 try:
2805 drbd_path = self._linstor.get_device_path(self.uuid)
2806 available = util.pathexists(drbd_path)
2807 except Exception:
2808 pass
2810 if not available:
2811 raise xs_errors.XenError(
2812 'VDIUnavailable',
2813 opterr='Cannot get device path of {}'.format(self.uuid)
2814 )
2816 # 1. Prepare http-nbd folder.
2817 try:
2818 if not os.path.exists('/dev/http-nbd/'):
2819 os.makedirs('/dev/http-nbd/')
2820 elif os.path.islink(self.path):
2821 os.remove(self.path)
2822 except OSError as e:
2823 if e.errno != errno.EEXIST:
2824 raise xs_errors.XenError(
2825 'VDIUnavailable',
2826 opterr='Cannot prepare http-nbd: {}'.format(e)
2827 )
2829 # 2. Start HTTP service if we have a diskful or if we are master.
2830 http_service = None
2831 if drbd_path:
2832 assert(drbd_path in (
2833 '/dev/drbd/by-res/xcp-persistent-ha-statefile/0',
2834 '/dev/drbd/by-res/xcp-persistent-redo-log/0'
2835 ))
2836 self._start_persistent_http_server(volume_name)
2838 # 3. Start NBD server in all cases.
2839 try:
2840 self._start_persistent_nbd_server(volume_name)
2841 except Exception as e:
2842 if drbd_path:
2843 self._kill_persistent_http_server(volume_name)
2844 raise
2846 self.attached = True
2847 return VDI.VDI.attach(self, self.sr.uuid, self.uuid)
2849 def _detach_using_http_nbd(self):
2850 volume_name = self._check_http_nbd_volume_name()
2851 self._kill_persistent_nbd_server(volume_name)
2852 self._kill_persistent_http_server(volume_name)
2854# ------------------------------------------------------------------------------
2857if __name__ == '__main__':
2858 def run():
2859 SRCommand.run(LinstorSR, DRIVER_INFO)
2861 if not TRACE_PERFS:
2862 run()
2863 else:
2864 util.make_profile('LinstorSR', run)
2865else:
2866 SR.registerSR(LinstorSR)