From 62132d8cf0f1af26526d38f7a719e6f620104a60 Mon Sep 17 00:00:00 2001 From: Nic Anderson Date: Mon, 3 Apr 2023 22:09:41 -0400 Subject: [PATCH] Attempt image with ceph/ceph#49954 patch --- Dockerfile | 6 + src/ceph-volume/ceph_volume/util/device.py | 702 +++++++++++++++ src/ceph-volume/ceph_volume/util/disk.py | 937 +++++++++++++++++++++ 3 files changed, 1645 insertions(+) create mode 100644 Dockerfile create mode 100644 src/ceph-volume/ceph_volume/util/device.py create mode 100644 src/ceph-volume/ceph_volume/util/disk.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..3d3d6b5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,6 @@ +FROM quay.io/ceph/ceph:v17.2.5 + +LABEL org.opencontainers.image.source https://github.com/nanderson94/ceph-patch + +COPY src/ceph-volume/ceph_volume/util/disk.py /usr/lib/python3.6/site-packages/ceph_volume/util/disk.py +COPY src/ceph-volume/ceph_volume/util/device.py /usr/lib/python3.6/site-packages/ceph_volume/util/device.py diff --git a/src/ceph-volume/ceph_volume/util/device.py b/src/ceph-volume/ceph_volume/util/device.py new file mode 100644 index 0000000..e5ebb7d --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/device.py @@ -0,0 +1,702 @@ +# -*- coding: utf-8 -*- + +import logging +import os +from functools import total_ordering +from ceph_volume import sys_info +from ceph_volume.api import lvm +from ceph_volume.util import disk, system +from ceph_volume.util.lsmdisk import LSMDisk +from ceph_volume.util.constants import ceph_disk_guids +from ceph_volume.util.disk import allow_loop_devices + + +logger = logging.getLogger(__name__) + + +report_template = """ +{dev:<25} {size:<12} {device_nodes:<15} {rot!s:<7} {available!s:<9} {model}""" + + +def encryption_status(abspath): + """ + Helper function to run ``encryption.status()``. It is done here to avoid + a circular import issue (encryption module imports from this module) and to + ease testing by allowing monkeypatching of this function. + """ + from ceph_volume.util import encryption + return encryption.status(abspath) + + +class Devices(object): + """ + A container for Device instances with reporting + """ + + def __init__(self, filter_for_batch=False, with_lsm=False): + lvs = lvm.get_lvs() + lsblk_all = disk.lsblk_all() + all_devices_vgs = lvm.get_all_devices_vgs() + if not sys_info.devices: + sys_info.devices = disk.get_devices() + self.devices = [Device(k, + with_lsm, + lvs=lvs, + lsblk_all=lsblk_all, + all_devices_vgs=all_devices_vgs) for k in + sys_info.devices.keys()] + if filter_for_batch: + self.devices = [d for d in self.devices if d.available_lvm_batch] + + def pretty_report(self): + output = [ + report_template.format( + dev='Device Path', + size='Size', + rot='rotates', + model='Model name', + available='available', + device_nodes='Device nodes', + + )] + for device in sorted(self.devices): + output.append(device.report()) + return ''.join(output) + + def json_report(self): + output = [] + for device in sorted(self.devices): + output.append(device.json_report()) + return output + +@total_ordering +class Device(object): + + pretty_template = """ + {attr:<25} {value}""" + + report_fields = [ + 'ceph_device', + 'rejected_reasons', + 'available', + 'path', + 'sys_api', + 'device_id', + 'lsm_data', + ] + pretty_report_sys_fields = [ + 'actuators', + 'human_readable_size', + 'model', + 'removable', + 'ro', + 'rotational', + 'sas_address', + 'scheduler_mode', + 'vendor', + ] + + # define some class variables; mostly to enable the use of autospec in + # unittests + lvs = [] + + def __init__(self, path, with_lsm=False, lvs=None, lsblk_all=None, all_devices_vgs=None): + self.path = path + # LVs can have a vg/lv path, while disks will have /dev/sda + self.symlink = None + # check if we are a symlink + if os.path.islink(self.path): + self.symlink = self.path + real_path = os.path.realpath(self.path) + # check if we are not a device mapper + if "dm-" not in real_path: + self.path = real_path + if not sys_info.devices: + if self.path: + sys_info.devices = disk.get_devices(device=self.path) + else: + sys_info.devices = disk.get_devices() + if sys_info.devices.get(self.path, {}): + self.device_nodes = sys_info.devices[self.path]['device_nodes'] + self.sys_api = sys_info.devices.get(self.path, {}) + self.partitions = self._get_partitions() + self.lv_api = None + self.lvs = [] if not lvs else lvs + self.lsblk_all = lsblk_all + self.all_devices_vgs = all_devices_vgs + self.vgs = [] + self.vg_name = None + self.lv_name = None + self.disk_api = {} + self.blkid_api = None + self._exists = None + self._is_lvm_member = None + self.ceph_device = False + self._parse() + self.lsm_data = self.fetch_lsm(with_lsm) + + self.available_lvm, self.rejected_reasons_lvm = self._check_lvm_reject_reasons() + self.available_raw, self.rejected_reasons_raw = self._check_raw_reject_reasons() + self.available = self.available_lvm and self.available_raw + self.rejected_reasons = list(set(self.rejected_reasons_lvm + + self.rejected_reasons_raw)) + + self.device_id = self._get_device_id() + + def fetch_lsm(self, with_lsm): + ''' + Attempt to fetch libstoragemgmt (LSM) metadata, and return to the caller + as a dict. An empty dict is passed back to the caller if the target path + is not a block device, or lsm is unavailable on the host. Otherwise the + json returned will provide LSM attributes, and any associated errors that + lsm encountered when probing the device. + ''' + if not with_lsm or not self.exists or not self.is_device: + return {} + + lsm_disk = LSMDisk(self.path) + + return lsm_disk.json_report() + + def __lt__(self, other): + ''' + Implementing this method and __eq__ allows the @total_ordering + decorator to turn the Device class into a totally ordered type. + This can slower then implementing all comparison operations. + This sorting should put available devices before unavailable devices + and sort on the path otherwise (str sorting). + ''' + if self.available == other.available: + return self.path < other.path + return self.available and not other.available + + def __eq__(self, other): + return self.path == other.path + + def __hash__(self): + return hash(self.path) + + def load_blkid_api(self): + if self.blkid_api is None: + self.blkid_api = disk.blkid(self.path) + + def _parse(self): + lv = None + if not self.sys_api: + # if no device was found check if we are a partition + partname = self.path.split('/')[-1] + for device, info in sys_info.devices.items(): + part = info['partitions'].get(partname, {}) + if part: + self.sys_api = part + break + + if self.lvs: + for _lv in self.lvs: + # if the path is not absolute, we have 'vg/lv', let's use LV name + # to get the LV. + if self.path[0] == '/': + if _lv.lv_path == self.path: + lv = _lv + break + else: + vgname, lvname = self.path.split('/') + if _lv.lv_name == lvname and _lv.vg_name == vgname: + lv = _lv + break + else: + if self.path[0] == '/': + lv = lvm.get_single_lv(filters={'lv_path': self.path}) + else: + vgname, lvname = self.path.split('/') + lv = lvm.get_single_lv(filters={'lv_name': lvname, + 'vg_name': vgname}) + + if lv: + self.lv_api = lv + self.lvs = [lv] + self.path = lv.lv_path + self.vg_name = lv.vg_name + self.lv_name = lv.name + self.ceph_device = lvm.is_ceph_device(lv) + else: + self.lvs = [] + if self.lsblk_all: + for dev in self.lsblk_all: + if dev['NAME'] == os.path.basename(self.path): + break + else: + dev = disk.lsblk(self.path) + self.disk_api = dev + device_type = dev.get('TYPE', '') + # always check is this is an lvm member + valid_types = ['part', 'disk'] + if allow_loop_devices(): + valid_types.append('loop') + if device_type in valid_types: + self._set_lvm_membership() + + self.ceph_disk = CephDiskDevice(self) + + def __repr__(self): + prefix = 'Unknown' + if self.is_lv: + prefix = 'LV' + elif self.is_partition: + prefix = 'Partition' + elif self.is_device: + prefix = 'Raw Device' + return '<%s: %s>' % (prefix, self.path) + + def pretty_report(self): + def format_value(v): + if isinstance(v, list): + return ', '.join(v) + else: + return v + def format_key(k): + return k.strip('_').replace('_', ' ') + output = ['\n====== Device report {} ======\n'.format(self.path)] + output.extend( + [self.pretty_template.format( + attr=format_key(k), + value=format_value(v)) for k, v in vars(self).items() if k in + self.report_fields and k != 'disk_api' and k != 'sys_api'] ) + output.extend( + [self.pretty_template.format( + attr=format_key(k), + value=format_value(v)) for k, v in self.sys_api.items() if k in + self.pretty_report_sys_fields]) + for lv in self.lvs: + output.append(""" + --- Logical Volume ---""") + output.extend( + [self.pretty_template.format( + attr=format_key(k), + value=format_value(v)) for k, v in lv.report().items()]) + return ''.join(output) + + def report(self): + return report_template.format( + dev=self.path, + size=self.size_human, + rot=self.rotational, + available=self.available, + model=self.model, + device_nodes=self.device_nodes + ) + + def json_report(self): + output = {k.strip('_'): v for k, v in vars(self).items() if k in + self.report_fields} + output['lvs'] = [lv.report() for lv in self.lvs] + return output + + def _get_device_id(self): + """ + Please keep this implementation in sync with get_device_id() in + src/common/blkdev.cc + """ + props = ['ID_VENDOR', 'ID_MODEL', 'ID_MODEL_ENC', 'ID_SERIAL_SHORT', 'ID_SERIAL', + 'ID_SCSI_SERIAL'] + p = disk.udevadm_property(self.path, props) + if p.get('ID_MODEL','').startswith('LVM PV '): + p['ID_MODEL'] = p.get('ID_MODEL_ENC', '').replace('\\x20', ' ').strip() + if 'ID_VENDOR' in p and 'ID_MODEL' in p and 'ID_SCSI_SERIAL' in p: + dev_id = '_'.join([p['ID_VENDOR'], p['ID_MODEL'], + p['ID_SCSI_SERIAL']]) + elif 'ID_MODEL' in p and 'ID_SERIAL_SHORT' in p: + dev_id = '_'.join([p['ID_MODEL'], p['ID_SERIAL_SHORT']]) + elif 'ID_SERIAL' in p: + dev_id = p['ID_SERIAL'] + if dev_id.startswith('MTFD'): + # Micron NVMes hide the vendor + dev_id = 'Micron_' + dev_id + else: + # the else branch should fallback to using sysfs and ioctl to + # retrieve device_id on FreeBSD. Still figuring out if/how the + # python ioctl implementation does that on FreeBSD + dev_id = '' + dev_id = dev_id.replace(' ', '_') + while '__' in dev_id: + dev_id = dev_id.replace('__', '_') + return dev_id + + def _set_lvm_membership(self): + if self._is_lvm_member is None: + # this is contentious, if a PV is recognized by LVM but has no + # VGs, should we consider it as part of LVM? We choose not to + # here, because most likely, we need to use VGs from this PV. + self._is_lvm_member = False + device_to_check = [self.path] + device_to_check.extend(self.partitions) + + # a pv can only be in one vg, so this should be safe + # FIXME: While the above assumption holds, sda1 and sda2 + # can each host a PV and VG. I think the vg_name property is + # actually unused (not 100% sure) and can simply be removed + vgs = None + if not self.all_devices_vgs: + self.all_devices_vgs = lvm.get_all_devices_vgs() + for path in device_to_check: + for dev_vg in self.all_devices_vgs: + if dev_vg.pv_name == path: + vgs = [dev_vg] + if vgs: + self.vgs.extend(vgs) + self.vg_name = vgs[0] + self._is_lvm_member = True + self.lvs.extend(lvm.get_device_lvs(path)) + if self.lvs: + self.ceph_device = any([True if lv.tags.get('ceph.osd_id') else False for lv in self.lvs]) + + def _get_partitions(self): + """ + For block devices LVM can reside on the raw block device or on a + partition. Return a list of paths to be checked for a pv. + """ + partitions = [] + path_dir = os.path.dirname(self.path) + for partition in self.sys_api.get('partitions', {}).keys(): + partitions.append(os.path.join(path_dir, partition)) + return partitions + + @property + def exists(self): + return os.path.exists(self.path) + + @property + def has_fs(self): + self.load_blkid_api() + return 'TYPE' in self.blkid_api + + @property + def has_gpt_headers(self): + self.load_blkid_api() + return self.blkid_api.get("PTTYPE") == "gpt" + + @property + def rotational(self): + rotational = self.sys_api.get('rotational') + if rotational is None: + # fall back to lsblk if not found in sys_api + # default to '1' if no value is found with lsblk either + rotational = self.disk_api.get('ROTA', '1') + return rotational == '1' + + @property + def model(self): + return self.sys_api['model'] + + @property + def size_human(self): + return self.sys_api['human_readable_size'] + + @property + def size(self): + return self.sys_api['size'] + + @property + def parent_device(self): + if 'PKNAME' in self.disk_api: + return '/dev/%s' % self.disk_api['PKNAME'] + return None + + @property + def lvm_size(self): + """ + If this device was made into a PV it would lose 1GB in total size + due to the 1GB physical extent size we set when creating volume groups + """ + size = disk.Size(b=self.size) + lvm_size = disk.Size(gb=size.gb.as_int()) - disk.Size(gb=1) + return lvm_size + + @property + def is_lvm_member(self): + if self._is_lvm_member is None: + self._set_lvm_membership() + return self._is_lvm_member + + @property + def is_ceph_disk_member(self): + def is_member(device): + return 'ceph' in device.get('PARTLABEL', '') or \ + device.get('PARTTYPE', '') in ceph_disk_guids.keys() + # If we come from Devices(), self.lsblk_all is set already. + # Otherwise, we have to grab the data. + details = self.lsblk_all or disk.lsblk_all() + _is_member = False + if self.sys_api.get("partitions"): + for part in self.sys_api.get("partitions").keys(): + for dev in details: + if part.startswith(dev['NAME']): + if is_member(dev): + _is_member = True + return _is_member + else: + return is_member(self.disk_api) + raise RuntimeError(f"Couln't check if device {self.path} is a ceph-disk member.") + + @property + def has_bluestore_label(self): + return disk.has_bluestore_label(self.path) + + @property + def is_mapper(self): + return self.path.startswith(('/dev/mapper', '/dev/dm-')) + + @property + def device_type(self): + self.load_blkid_api() + if 'type' in self.sys_api: + return self.sys_api['type'] + elif self.disk_api: + return self.disk_api['TYPE'] + elif self.blkid_api: + return self.blkid_api['TYPE'] + + @property + def is_mpath(self): + return self.device_type == 'mpath' + + @property + def is_lv(self): + return self.lv_api is not None + + @property + def is_partition(self): + self.load_blkid_api() + if self.disk_api: + return self.disk_api['TYPE'] == 'part' + elif self.blkid_api: + return self.blkid_api['TYPE'] == 'part' + return False + + @property + def is_device(self): + self.load_blkid_api() + api = None + if self.disk_api: + api = self.disk_api + elif self.blkid_api: + api = self.blkid_api + if api: + valid_types = ['disk', 'device', 'mpath'] + if allow_loop_devices(): + valid_types.append('loop') + return self.device_type in valid_types + return False + + @property + def is_acceptable_device(self): + return self.is_device or self.is_partition + + @property + def is_encrypted(self): + """ + Only correct for LVs, device mappers, and partitions. Will report a ``None`` + for raw devices. + """ + self.load_blkid_api() + crypt_reports = [self.blkid_api.get('TYPE', ''), self.disk_api.get('FSTYPE', '')] + if self.is_lv: + # if disk APIs are reporting this is encrypted use that: + if 'crypto_LUKS' in crypt_reports: + return True + # if ceph-volume created this, then a tag would let us know + elif self.lv_api.encrypted: + return True + return False + elif self.is_partition: + return 'crypto_LUKS' in crypt_reports + elif self.is_mapper: + active_mapper = encryption_status(self.path) + if active_mapper: + # normalize a bit to ensure same values regardless of source + encryption_type = active_mapper['type'].lower().strip('12') # turn LUKS1 or LUKS2 into luks + return True if encryption_type in ['plain', 'luks'] else False + else: + return False + else: + return None + + @property + def used_by_ceph(self): + # only filter out data devices as journals could potentially be reused + osd_ids = [lv.tags.get("ceph.osd_id") is not None for lv in self.lvs + if lv.tags.get("ceph.type") in ["data", "block"]] + return any(osd_ids) + + @property + def vg_free_percent(self): + if self.vgs: + return [vg.free_percent for vg in self.vgs] + else: + return [1] + + @property + def vg_size(self): + if self.vgs: + return [vg.size for vg in self.vgs] + else: + # TODO fix this...we can probably get rid of vg_free + return self.vg_free + + @property + def vg_free(self): + ''' + Returns the free space in all VGs on this device. If no VGs are + present, returns the disk size. + ''' + if self.vgs: + return [vg.free for vg in self.vgs] + else: + # We could also query 'lvmconfig + # --typeconfig full' and use allocations -> physical_extent_size + # value to project the space for a vg + # assuming 4M extents here + extent_size = 4194304 + vg_free = int(self.size / extent_size) * extent_size + if self.size % extent_size == 0: + # If the extent size divides size exactly, deduct on extent for + # LVM metadata + vg_free -= extent_size + return [vg_free] + + @property + def has_partitions(self): + ''' + Boolean to determine if a given device has partitions. + ''' + if self.sys_api.get('partitions'): + return True + return False + + def _check_generic_reject_reasons(self): + reasons = [ + ('id_bus', 'usb', 'id_bus'), + ('ro', '1', 'read-only'), + ('locked', 1, 'locked'), + ] + rejected = [reason for (k, v, reason) in reasons if + self.sys_api.get(k, '') == v] + if self.is_acceptable_device: + # reject disks smaller than 5GB + if int(self.sys_api.get('size', 0)) < 5368709120: + rejected.append('Insufficient space (<5GB)') + else: + rejected.append("Device type is not acceptable. It should be raw device or partition") + if self.is_ceph_disk_member: + rejected.append("Used by ceph-disk") + + try: + if self.has_bluestore_label: + rejected.append('Has BlueStore device label') + except OSError as e: + # likely failed to open the device. assuming it is BlueStore is the safest option + # so that a possibly-already-existing OSD doesn't get overwritten + logger.error('failed to determine if device {} is BlueStore. device should not be used to avoid false negatives. err: {}'.format(self.path, e)) + rejected.append('Failed to determine if device is BlueStore') + + if self.is_partition: + try: + if disk.has_bluestore_label(self.parent_device): + rejected.append('Parent has BlueStore device label') + except OSError as e: + # likely failed to open the device. assuming the parent is BlueStore is the safest + # option so that a possibly-already-existing OSD doesn't get overwritten + logger.error('failed to determine if partition {} (parent: {}) has a BlueStore parent. partition should not be used to avoid false negatives. err: {}'.format(self.path, self.parent_device, e)) + rejected.append('Failed to determine if parent device is BlueStore') + + if self.has_gpt_headers: + rejected.append('Has GPT headers') + if self.has_partitions: + rejected.append('Has partitions') + return rejected + + def _check_lvm_reject_reasons(self): + rejected = [] + if self.vgs: + available_vgs = [vg for vg in self.vgs if int(vg.vg_free_count) > 10] + if not available_vgs: + rejected.append('Insufficient space (<10 extents) on vgs') + else: + # only check generic if no vgs are present. Vgs might hold lvs and + # that might cause 'locked' to trigger + rejected.extend(self._check_generic_reject_reasons()) + + return len(rejected) == 0, rejected + + def _check_raw_reject_reasons(self): + rejected = self._check_generic_reject_reasons() + if len(self.vgs) > 0: + rejected.append('LVM detected') + + return len(rejected) == 0, rejected + + @property + def available_lvm_batch(self): + if self.sys_api.get("partitions"): + return False + if system.device_is_mounted(self.path): + return False + return self.is_device or self.is_lv + + +class CephDiskDevice(object): + """ + Detect devices that have been created by ceph-disk, report their type + (journal, data, etc..). Requires a ``Device`` object as input. + """ + + def __init__(self, device): + self.device = device + self._is_ceph_disk_member = None + + @property + def partlabel(self): + """ + In containers, the 'PARTLABEL' attribute might not be detected + correctly via ``lsblk``, so we poke at the value with ``lsblk`` first, + falling back to ``blkid`` (which works correclty in containers). + """ + lsblk_partlabel = self.device.disk_api.get('PARTLABEL') + if lsblk_partlabel: + return lsblk_partlabel + return self.device.blkid_api.get('PARTLABEL', '') + + @property + def parttype(self): + """ + Seems like older version do not detect PARTTYPE correctly (assuming the + info in util/disk.py#lsblk is still valid). + SImply resolve to using blkid since lsblk will throw an error if asked + for an unknown columns + """ + return self.device.blkid_api.get('PARTTYPE', '') + + @property + def is_member(self): + if self._is_ceph_disk_member is None: + if 'ceph' in self.partlabel: + self._is_ceph_disk_member = True + return True + elif self.parttype in ceph_disk_guids.keys(): + return True + return False + return self._is_ceph_disk_member + + @property + def type(self): + types = [ + 'data', 'wal', 'db', 'lockbox', 'journal', + # ceph-disk uses 'ceph block' when placing data in bluestore, but + # keeps the regular OSD files in 'ceph data' :( :( :( :( + 'block', + ] + for t in types: + if t in self.partlabel: + return t + label = ceph_disk_guids.get(self.parttype, {}) + return label.get('type', 'unknown').split('.')[-1] diff --git a/src/ceph-volume/ceph_volume/util/disk.py b/src/ceph-volume/ceph_volume/util/disk.py new file mode 100644 index 0000000..c55645a --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/disk.py @@ -0,0 +1,937 @@ +import logging +import os +import re +import stat +import time +from ceph_volume import process +from ceph_volume.api import lvm +from ceph_volume.util.system import get_file_contents + + +logger = logging.getLogger(__name__) + + +# The blkid CLI tool has some oddities which prevents having one common call +# to extract the information instead of having separate utilities. The `udev` +# type of output is needed in older versions of blkid (v 2.23) that will not +# work correctly with just the ``-p`` flag to bypass the cache for example. +# Xenial doesn't have this problem as it uses a newer blkid version. + + +def get_partuuid(device): + """ + If a device is a partition, it will probably have a PARTUUID on it that + will persist and can be queried against `blkid` later to detect the actual + device + """ + out, err, rc = process.call( + ['blkid', '-c', '/dev/null', '-s', 'PARTUUID', '-o', 'value', device] + ) + return ' '.join(out).strip() + + +def _blkid_parser(output): + """ + Parses the output from a system ``blkid`` call, requires output to be + produced using the ``-p`` flag which bypasses the cache, mangling the + names. These names are corrected to what it would look like without the + ``-p`` flag. + + Normal output:: + + /dev/sdb1: UUID="62416664-cbaf-40bd-9689-10bd337379c3" TYPE="xfs" [...] + """ + # first spaced separated item is garbage, gets tossed: + output = ' '.join(output.split()[1:]) + # split again, respecting possible whitespace in quoted values + pairs = output.split('" ') + raw = {} + processed = {} + mapping = { + 'UUID': 'UUID', + 'TYPE': 'TYPE', + 'PART_ENTRY_NAME': 'PARTLABEL', + 'PART_ENTRY_UUID': 'PARTUUID', + 'PART_ENTRY_TYPE': 'PARTTYPE', + 'PTTYPE': 'PTTYPE', + } + + for pair in pairs: + try: + column, value = pair.split('=') + except ValueError: + continue + raw[column] = value.strip().strip().strip('"') + + for key, value in raw.items(): + new_key = mapping.get(key) + if not new_key: + continue + processed[new_key] = value + + return processed + + +def blkid(device): + """ + The blkid interface to its CLI, creating an output similar to what is + expected from ``lsblk``. In most cases, ``lsblk()`` should be the preferred + method for extracting information about a device. There are some corner + cases where it might provide information that is otherwise unavailable. + + The system call uses the ``-p`` flag which bypasses the cache, the caveat + being that the keys produced are named completely different to expected + names. + + For example, instead of ``PARTLABEL`` it provides a ``PART_ENTRY_NAME``. + A bit of translation between these known keys is done, which is why + ``lsblk`` should always be preferred: the output provided here is not as + rich, given that a translation of keys is required for a uniform interface + with the ``-p`` flag. + + Label name to expected output chart: + + cache bypass name expected name + + UUID UUID + TYPE TYPE + PART_ENTRY_NAME PARTLABEL + PART_ENTRY_UUID PARTUUID + """ + out, err, rc = process.call( + ['blkid', '-c', '/dev/null', '-p', device] + ) + return _blkid_parser(' '.join(out)) + + +def get_part_entry_type(device): + """ + Parses the ``ID_PART_ENTRY_TYPE`` from the "low level" (bypasses the cache) + output that uses the ``udev`` type of output. This output is intended to be + used for udev rules, but it is useful in this case as it is the only + consistent way to retrieve the GUID used by ceph-disk to identify devices. + """ + out, err, rc = process.call(['blkid', '-c', '/dev/null', '-p', '-o', 'udev', device]) + for line in out: + if 'ID_PART_ENTRY_TYPE=' in line: + return line.split('=')[-1].strip() + return '' + + +def get_device_from_partuuid(partuuid): + """ + If a device has a partuuid, query blkid so that it can tell us what that + device is + """ + out, err, rc = process.call( + ['blkid', '-c', '/dev/null', '-t', 'PARTUUID="%s"' % partuuid, '-o', 'device'] + ) + return ' '.join(out).strip() + + +def remove_partition(device): + """ + Removes a partition using parted + + :param device: A ``Device()`` object + """ + # Sometimes there's a race condition that makes 'ID_PART_ENTRY_NUMBER' be not present + # in the output of `udevadm info --query=property`. + # Probably not ideal and not the best fix but this allows to get around that issue. + # The idea is to make it retry multiple times before actually failing. + for i in range(10): + udev_info = udevadm_property(device.path) + partition_number = udev_info.get('ID_PART_ENTRY_NUMBER') + if partition_number: + break + time.sleep(0.2) + if not partition_number: + raise RuntimeError('Unable to detect the partition number for device: %s' % device.path) + + process.run( + ['parted', device.parent_device, '--script', '--', 'rm', partition_number] + ) + + +def _stat_is_device(stat_obj): + """ + Helper function that will interpret ``os.stat`` output directly, so that other + functions can call ``os.stat`` once and interpret that result several times + """ + return stat.S_ISBLK(stat_obj) + + +def _lsblk_parser(line): + """ + Parses lines in lsblk output. Requires output to be in pair mode (``-P`` flag). Lines + need to be whole strings, the line gets split when processed. + + :param line: A string, with the full line from lsblk output + """ + # parse the COLUMN="value" output to construct the dictionary + pairs = line.split('" ') + parsed = {} + for pair in pairs: + try: + column, value = pair.split('=') + except ValueError: + continue + parsed[column] = value.strip().strip().strip('"') + return parsed + + +def device_family(device): + """ + Returns a list of associated devices. It assumes that ``device`` is + a parent device. It is up to the caller to ensure that the device being + used is a parent, not a partition. + """ + labels = ['NAME', 'PARTLABEL', 'TYPE'] + command = ['lsblk', '-P', '-p', '-o', ','.join(labels), device] + out, err, rc = process.call(command) + devices = [] + for line in out: + devices.append(_lsblk_parser(line)) + + return devices + + +def udevadm_property(device, properties=[]): + """ + Query udevadm for information about device properties. + Optionally pass a list of properties to return. A requested property might + not be returned if not present. + + Expected output format:: + # udevadm info --query=property --name=/dev/sda :( + DEVNAME=/dev/sda + DEVTYPE=disk + ID_ATA=1 + ID_BUS=ata + ID_MODEL=SK_hynix_SC311_SATA_512GB + ID_PART_TABLE_TYPE=gpt + ID_PART_TABLE_UUID=c8f91d57-b26c-4de1-8884-0c9541da288c + ID_PATH=pci-0000:00:17.0-ata-3 + ID_PATH_TAG=pci-0000_00_17_0-ata-3 + ID_REVISION=70000P10 + ID_SERIAL=SK_hynix_SC311_SATA_512GB_MS83N71801150416A + TAGS=:systemd: + USEC_INITIALIZED=16117769 + ... + """ + out = _udevadm_info(device) + ret = {} + for line in out: + p, v = line.split('=', 1) + if not properties or p in properties: + ret[p] = v + return ret + + +def _udevadm_info(device): + """ + Call udevadm and return the output + """ + cmd = ['udevadm', 'info', '--query=property', device] + out, _err, _rc = process.call(cmd) + return out + + +def lsblk(device, columns=None, abspath=False): + result = lsblk_all(device=device, + columns=columns, + abspath=abspath) + if not result: + raise RuntimeError(f"{device} not found is lsblk report") + + return result[0] + +def lsblk_all(device='', columns=None, abspath=False): + """ + Create a dictionary of identifying values for a device using ``lsblk``. + Each supported column is a key, in its *raw* format (all uppercase + usually). ``lsblk`` has support for certain "columns" (in blkid these + would be labels), and these columns vary between distributions and + ``lsblk`` versions. The newer versions support a richer set of columns, + while older ones were a bit limited. + + These are a subset of lsblk columns which are known to work on both CentOS 7 and Xenial: + + NAME device name + KNAME internal kernel device name + PKNAME internal kernel parent device name + MAJ:MIN major:minor device number + FSTYPE filesystem type + MOUNTPOINT where the device is mounted + LABEL filesystem LABEL + UUID filesystem UUID + RO read-only device + RM removable device + MODEL device identifier + SIZE size of the device + STATE state of the device + OWNER user name + GROUP group name + MODE device node permissions + ALIGNMENT alignment offset + MIN-IO minimum I/O size + OPT-IO optimal I/O size + PHY-SEC physical sector size + LOG-SEC logical sector size + ROTA rotational device + SCHED I/O scheduler name + RQ-SIZE request queue size + TYPE device type + PKNAME internal parent kernel device name + DISC-ALN discard alignment offset + DISC-GRAN discard granularity + DISC-MAX discard max bytes + DISC-ZERO discard zeroes data + + There is a bug in ``lsblk`` where using all the available (supported) + columns will result in no output (!), in order to workaround this the + following columns have been removed from the default reporting columns: + + * RQ-SIZE (request queue size) + * MIN-IO minimum I/O size + * OPT-IO optimal I/O size + + These should be available however when using `columns`. For example:: + + >>> lsblk('/dev/sda1', columns=['OPT-IO']) + {'OPT-IO': '0'} + + Normal CLI output, as filtered by the flags in this function will look like :: + + $ lsblk -P -o NAME,KNAME,PKNAME,MAJ:MIN,FSTYPE,MOUNTPOINT + NAME="sda1" KNAME="sda1" MAJ:MIN="8:1" FSTYPE="ext4" MOUNTPOINT="/" + + :param columns: A list of columns to report as keys in its original form. + :param abspath: Set the flag for absolute paths on the report + """ + default_columns = [ + 'NAME', 'KNAME', 'PKNAME', 'MAJ:MIN', 'FSTYPE', 'MOUNTPOINT', 'LABEL', + 'UUID', 'RO', 'RM', 'MODEL', 'SIZE', 'STATE', 'OWNER', 'GROUP', 'MODE', + 'ALIGNMENT', 'PHY-SEC', 'LOG-SEC', 'ROTA', 'SCHED', 'TYPE', 'DISC-ALN', + 'DISC-GRAN', 'DISC-MAX', 'DISC-ZERO', 'PKNAME', 'PARTLABEL' + ] + columns = columns or default_columns + # -P -> Produce pairs of COLUMN="value" + # -p -> Return full paths to devices, not just the names, when ``abspath`` is set + # -o -> Use the columns specified or default ones provided by this function + base_command = ['lsblk', '-P'] + if abspath: + base_command.append('-p') + base_command.append('-o') + base_command.append(','.join(columns)) + if device: + base_command.append('--nodeps') + base_command.append(device) + + out, err, rc = process.call(base_command) + + if rc != 0: + raise RuntimeError(f"Error: {err}") + + result = [] + + for line in out: + result.append(_lsblk_parser(line)) + + return result + + +def is_device(dev): + """ + Boolean to determine if a given device is a block device (**not** + a partition!) + + For example: /dev/sda would return True, but not /dev/sdc1 + """ + if not os.path.exists(dev): + return False + if not dev.startswith('/dev/'): + return False + if dev[len('/dev/'):].startswith('loop'): + if not allow_loop_devices(): + return False + + # fallback to stat + return _stat_is_device(os.lstat(dev).st_mode) + + +def is_partition(dev): + """ + Boolean to determine if a given device is a partition, like /dev/sda1 + """ + if not os.path.exists(dev): + return False + # use lsblk first, fall back to using stat + TYPE = lsblk(dev).get('TYPE') + if TYPE: + return TYPE == 'part' + + # fallback to stat + stat_obj = os.stat(dev) + if _stat_is_device(stat_obj.st_mode): + return False + + major = os.major(stat_obj.st_rdev) + minor = os.minor(stat_obj.st_rdev) + if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)): + return True + return False + + +def is_ceph_rbd(dev): + """ + Boolean to determine if a given device is a ceph RBD device, like /dev/rbd0 + """ + return dev.startswith(('/dev/rbd')) + + +class BaseFloatUnit(float): + """ + Base class to support float representations of size values. Suffix is + computed on child classes by inspecting the class name + """ + + def __repr__(self): + return "<%s(%s)>" % (self.__class__.__name__, self.__float__()) + + def __str__(self): + return "{size:.2f} {suffix}".format( + size=self.__float__(), + suffix=self.__class__.__name__.split('Float')[-1] + ) + + def as_int(self): + return int(self.real) + + def as_float(self): + return self.real + + +class FloatB(BaseFloatUnit): + pass + + +class FloatMB(BaseFloatUnit): + pass + + +class FloatGB(BaseFloatUnit): + pass + + +class FloatKB(BaseFloatUnit): + pass + + +class FloatTB(BaseFloatUnit): + pass + +class FloatPB(BaseFloatUnit): + pass + +class Size(object): + """ + Helper to provide an interface for different sizes given a single initial + input. Allows for comparison between different size objects, which avoids + the need to convert sizes before comparison (e.g. comparing megabytes + against gigabytes). + + Common comparison operators are supported:: + + >>> hd1 = Size(gb=400) + >>> hd2 = Size(gb=500) + >>> hd1 > hd2 + False + >>> hd1 < hd2 + True + >>> hd1 == hd2 + False + >>> hd1 == Size(gb=400) + True + + The Size object can also be multiplied or divided:: + + >>> hd1 + + >>> hd1 * 2 + + >>> hd1 + + + Additions and subtractions are only supported between Size objects:: + + >>> Size(gb=224) - Size(gb=100) + + >>> Size(gb=1) + Size(mb=300) + + + Can also display a human-readable representation, with automatic detection + on best suited unit, or alternatively, specific unit representation:: + + >>> s = Size(mb=2211) + >>> s + + >>> s.mb + + >>> print("Total size: %s" % s.mb) + Total size: 2211.00 MB + >>> print("Total size: %s" % s) + Total size: 2.16 GB + """ + + @classmethod + def parse(cls, size): + if (len(size) > 2 and + size[-2].lower() in ['k', 'm', 'g', 't', 'p'] and + size[-1].lower() == 'b'): + return cls(**{size[-2:].lower(): float(size[0:-2])}) + elif size[-1].lower() in ['b', 'k', 'm', 'g', 't', 'p']: + return cls(**{size[-1].lower(): float(size[0:-1])}) + else: + return cls(b=float(size)) + + + def __init__(self, multiplier=1024, **kw): + self._multiplier = multiplier + # create a mapping of units-to-multiplier, skip bytes as that is + # calculated initially always and does not need to convert + aliases = [ + [('k', 'kb', 'kilobytes'), self._multiplier], + [('m', 'mb', 'megabytes'), self._multiplier ** 2], + [('g', 'gb', 'gigabytes'), self._multiplier ** 3], + [('t', 'tb', 'terabytes'), self._multiplier ** 4], + [('p', 'pb', 'petabytes'), self._multiplier ** 5] + ] + # and mappings for units-to-formatters, including bytes and aliases for + # each + format_aliases = [ + [('b', 'bytes'), FloatB], + [('kb', 'kilobytes'), FloatKB], + [('mb', 'megabytes'), FloatMB], + [('gb', 'gigabytes'), FloatGB], + [('tb', 'terabytes'), FloatTB], + [('pb', 'petabytes'), FloatPB], + ] + self._formatters = {} + for key, value in format_aliases: + for alias in key: + self._formatters[alias] = value + self._factors = {} + for key, value in aliases: + for alias in key: + self._factors[alias] = value + + for k, v in kw.items(): + self._convert(v, k) + # only pursue the first occurrence + break + + def _convert(self, size, unit): + """ + Convert any size down to bytes so that other methods can rely on bytes + being available always, regardless of what they pass in, avoiding the + need for a mapping of every permutation. + """ + if unit in ['b', 'bytes']: + self._b = size + return + factor = self._factors[unit] + self._b = float(size * factor) + + def _get_best_format(self): + """ + Go through all the supported units, and use the first one that is less + than 1024. This allows to represent size in the most readable format + available + """ + for unit in ['b', 'kb', 'mb', 'gb', 'tb', 'pb']: + if getattr(self, unit) > 1024: + continue + return getattr(self, unit) + + def __repr__(self): + return "" % self._get_best_format() + + def __str__(self): + return "%s" % self._get_best_format() + + def __format__(self, spec): + return str(self._get_best_format()).__format__(spec) + + def __int__(self): + return int(self._b) + + def __float__(self): + return self._b + + def __lt__(self, other): + if isinstance(other, Size): + return self._b < other._b + else: + return self.b < other + + def __le__(self, other): + if isinstance(other, Size): + return self._b <= other._b + else: + return self.b <= other + + def __eq__(self, other): + if isinstance(other, Size): + return self._b == other._b + else: + return self.b == other + + def __ne__(self, other): + if isinstance(other, Size): + return self._b != other._b + else: + return self.b != other + + def __ge__(self, other): + if isinstance(other, Size): + return self._b >= other._b + else: + return self.b >= other + + def __gt__(self, other): + if isinstance(other, Size): + return self._b > other._b + else: + return self.b > other + + def __add__(self, other): + if isinstance(other, Size): + _b = self._b + other._b + return Size(b=_b) + raise TypeError('Cannot add "Size" object with int') + + def __sub__(self, other): + if isinstance(other, Size): + _b = self._b - other._b + return Size(b=_b) + raise TypeError('Cannot subtract "Size" object from int') + + def __mul__(self, other): + if isinstance(other, Size): + raise TypeError('Cannot multiply with "Size" object') + _b = self._b * other + return Size(b=_b) + + def __truediv__(self, other): + if isinstance(other, Size): + return self._b / other._b + _b = self._b / other + return Size(b=_b) + + def __div__(self, other): + if isinstance(other, Size): + return self._b / other._b + _b = self._b / other + return Size(b=_b) + + def __bool__(self): + return self.b != 0 + + def __nonzero__(self): + return self.__bool__() + + def __getattr__(self, unit): + """ + Calculate units on the fly, relies on the fact that ``bytes`` has been + converted at instantiation. Units that don't exist will trigger an + ``AttributeError`` + """ + try: + formatter = self._formatters[unit] + except KeyError: + raise AttributeError('Size object has not attribute "%s"' % unit) + if unit in ['b', 'bytes']: + return formatter(self._b) + try: + factor = self._factors[unit] + except KeyError: + raise AttributeError('Size object has not attribute "%s"' % unit) + return formatter(float(self._b) / factor) + + +def human_readable_size(size): + """ + Take a size in bytes, and transform it into a human readable size with up + to two decimals of precision. + """ + suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB'] + for suffix in suffixes: + if size >= 1024: + size = size / 1024 + else: + break + return "{size:.2f} {suffix}".format( + size=size, + suffix=suffix) + + +def size_from_human_readable(s): + """ + Takes a human readable string and converts into a Size. If no unit is + passed, bytes is assumed. + """ + s = s.replace(' ', '') + if s[-1].isdigit(): + return Size(b=float(s)) + n = float(s[:-1]) + if s[-1].lower() == 'p': + return Size(pb=n) + if s[-1].lower() == 't': + return Size(tb=n) + if s[-1].lower() == 'g': + return Size(gb=n) + if s[-1].lower() == 'm': + return Size(mb=n) + if s[-1].lower() == 'k': + return Size(kb=n) + return None + + +def get_partitions_facts(sys_block_path): + partition_metadata = {} + for folder in os.listdir(sys_block_path): + folder_path = os.path.join(sys_block_path, folder) + if os.path.exists(os.path.join(folder_path, 'partition')): + contents = get_file_contents(os.path.join(folder_path, 'partition')) + if contents: + part = {} + partname = folder + part_sys_block_path = os.path.join(sys_block_path, partname) + + part['start'] = get_file_contents(part_sys_block_path + "/start", 0) + part['sectors'] = get_file_contents(part_sys_block_path + "/size", 0) + + part['sectorsize'] = get_file_contents( + part_sys_block_path + "/queue/logical_block_size") + if not part['sectorsize']: + part['sectorsize'] = get_file_contents( + part_sys_block_path + "/queue/hw_sector_size", 512) + part['size'] = float(part['sectors']) * 512 + part['human_readable_size'] = human_readable_size(float(part['sectors']) * 512) + part['holders'] = [] + for holder in os.listdir(part_sys_block_path + '/holders'): + part['holders'].append(holder) + + partition_metadata[partname] = part + return partition_metadata + + +def is_mapper_device(device_name): + return device_name.startswith(('/dev/mapper', '/dev/dm-')) + + +def is_locked_raw_device(disk_path): + """ + A device can be locked by a third party software like a database. + To detect that case, the device is opened in Read/Write and exclusive mode + """ + open_flags = (os.O_RDWR | os.O_EXCL) + open_mode = 0 + fd = None + + try: + fd = os.open(disk_path, open_flags, open_mode) + except OSError: + return 1 + + try: + os.close(fd) + except OSError: + return 1 + + return 0 + + +class AllowLoopDevices(object): + allow = False + warned = False + + @classmethod + def __call__(cls): + val = os.environ.get("CEPH_VOLUME_ALLOW_LOOP_DEVICES", "false").lower() + if val not in ("false", 'no', '0'): + cls.allow = True + if not cls.warned: + logger.warning( + "CEPH_VOLUME_ALLOW_LOOP_DEVICES is set in your " + "environment, so we will allow the use of unattached loop" + " devices as disks. This feature is intended for " + "development purposes only and will never be supported in" + " production. Issues filed based on this behavior will " + "likely be ignored." + ) + cls.warned = True + return cls.allow + + +allow_loop_devices = AllowLoopDevices() + + +def get_block_devs_sysfs(_sys_block_path='/sys/block', _sys_dev_block_path='/sys/dev/block', device=''): + def holder_inner_loop(): + for holder in holders: + # /sys/block/sdy/holders/dm-8/dm/uuid + holder_dm_type = get_file_contents(os.path.join(_sys_block_path, dev, f'holders/{holder}/dm/uuid')).split('-')[0].lower() + if holder_dm_type == 'mpath': + return True + + # First, get devices that are _not_ partitions + result = list() + if not device: + dev_names = os.listdir(_sys_block_path) + else: + dev_names = [device] + for dev in dev_names: + name = kname = os.path.join("/dev", dev) + if not os.path.exists(name): + continue + type_ = 'disk' + holders = os.listdir(os.path.join(_sys_block_path, dev, 'holders')) + if holder_inner_loop(): + continue + dm_dir_path = os.path.join(_sys_block_path, dev, 'dm') + if os.path.isdir(dm_dir_path): + dm_type = get_file_contents(os.path.join(dm_dir_path, 'uuid')) + type_ = dm_type.split('-')[0].lower() + basename = get_file_contents(os.path.join(dm_dir_path, 'name')) + name = os.path.join("/dev/mapper", basename) + if dev.startswith('loop'): + if not allow_loop_devices(): + continue + # Skip loop devices that are not attached + if not os.path.exists(os.path.join(_sys_block_path, dev, 'loop')): + continue + type_ = 'loop' + result.append([kname, name, type_]) + # Next, look for devices that _are_ partitions + for item in os.listdir(_sys_dev_block_path): + is_part = get_file_contents(os.path.join(_sys_dev_block_path, item, 'partition')) == "1" + dev = os.path.basename(os.readlink(os.path.join(_sys_dev_block_path, item))) + if not is_part: + continue + name = kname = os.path.join("/dev", dev) + result.append([name, kname, "part"]) + return sorted(result, key=lambda x: x[0]) + + +def get_devices(_sys_block_path='/sys/block', device=''): + """ + Captures all available block devices as reported by lsblk. + Additional interesting metadata like sectors, size, vendor, + solid/rotational, etc. is collected from /sys/block/ + + Returns a dictionary, where keys are the full paths to devices. + + ..note:: loop devices, removable media, and logical volumes are never included. + """ + + device_facts = {} + + block_devs = get_block_devs_sysfs(_sys_block_path) + + block_types = ['disk', 'mpath'] + if allow_loop_devices(): + block_types.append('loop') + + for block in block_devs: + devname = os.path.basename(block[0]) + diskname = block[1] + if block[2] not in block_types: + continue + sysdir = os.path.join(_sys_block_path, devname) + metadata = {} + + # If the device is ceph rbd it gets excluded + if is_ceph_rbd(diskname): + continue + + # If the mapper device is a logical volume it gets excluded + if is_mapper_device(diskname): + if lvm.get_device_lvs(diskname): + continue + + # all facts that have no defaults + # (, ) + facts = [('removable', 'removable'), + ('ro', 'ro'), + ('vendor', 'device/vendor'), + ('model', 'device/model'), + ('rev', 'device/rev'), + ('sas_address', 'device/sas_address'), + ('sas_device_handle', 'device/sas_device_handle'), + ('support_discard', 'queue/discard_granularity'), + ('rotational', 'queue/rotational'), + ('nr_requests', 'queue/nr_requests'), + ] + for key, file_ in facts: + metadata[key] = get_file_contents(os.path.join(sysdir, file_)) + + device_slaves = os.listdir(os.path.join(sysdir, 'slaves')) + if device_slaves: + metadata['device_nodes'] = ','.join(device_slaves) + else: + metadata['device_nodes'] = devname + + metadata['actuators'] = None + if os.path.isdir(sysdir + "/queue/independent_access_ranges/"): + actuators = 0 + while os.path.isdir(sysdir + "/queue/independent_access_ranges/" + str(actuators)): + actuators += 1 + metadata['actuators'] = actuators + + metadata['scheduler_mode'] = "" + scheduler = get_file_contents(sysdir + "/queue/scheduler") + if scheduler is not None: + m = re.match(r".*?(\[(.*)\])", scheduler) + if m: + metadata['scheduler_mode'] = m.group(2) + + metadata['partitions'] = get_partitions_facts(sysdir) + + size = get_file_contents(os.path.join(sysdir, 'size'), 0) + + metadata['sectors'] = get_file_contents(os.path.join(sysdir, 'sectors'), 0) + fallback_sectorsize = get_file_contents(sysdir + "/queue/hw_sector_size", 512) + metadata['sectorsize'] = get_file_contents(sysdir + + "/queue/logical_block_size", + fallback_sectorsize) + metadata['size'] = float(size) * 512 + metadata['human_readable_size'] = human_readable_size(metadata['size']) + metadata['path'] = diskname + metadata['locked'] = is_locked_raw_device(metadata['path']) + metadata['type'] = block[2] + + # some facts from udevadm + p = udevadm_property(sysdir) + metadata['id_bus'] = p.get('ID_BUS', '') + + device_facts[diskname] = metadata + return device_facts + +def has_bluestore_label(device_path): + isBluestore = False + bluestoreDiskSignature = 'bluestore block device' # 22 bytes long + + # throws OSError on failure + logger.info("opening device {} to check for BlueStore label".format(device_path)) + try: + with open(device_path, "rb") as fd: + # read first 22 bytes looking for bluestore disk signature + signature = fd.read(22) + if signature.decode('ascii', 'replace') == bluestoreDiskSignature: + isBluestore = True + except IsADirectoryError: + logger.info(f'{device_path} is a directory, skipping.') + + return isBluestore