Attempt image with ceph/ceph#49954 patch

This commit is contained in:
Nic Anderson
2023-04-03 22:09:41 -04:00
parent 40e4f97dc7
commit 62132d8cf0
3 changed files with 1645 additions and 0 deletions

6
Dockerfile Normal file
View File

@@ -0,0 +1,6 @@
FROM quay.io/ceph/ceph:v17.2.5
LABEL org.opencontainers.image.source https://github.com/nanderson94/ceph-patch
COPY src/ceph-volume/ceph_volume/util/disk.py /usr/lib/python3.6/site-packages/ceph_volume/util/disk.py
COPY src/ceph-volume/ceph_volume/util/device.py /usr/lib/python3.6/site-packages/ceph_volume/util/device.py

View File

@@ -0,0 +1,702 @@
# -*- coding: utf-8 -*-
import logging
import os
from functools import total_ordering
from ceph_volume import sys_info
from ceph_volume.api import lvm
from ceph_volume.util import disk, system
from ceph_volume.util.lsmdisk import LSMDisk
from ceph_volume.util.constants import ceph_disk_guids
from ceph_volume.util.disk import allow_loop_devices
logger = logging.getLogger(__name__)
report_template = """
{dev:<25} {size:<12} {device_nodes:<15} {rot!s:<7} {available!s:<9} {model}"""
def encryption_status(abspath):
"""
Helper function to run ``encryption.status()``. It is done here to avoid
a circular import issue (encryption module imports from this module) and to
ease testing by allowing monkeypatching of this function.
"""
from ceph_volume.util import encryption
return encryption.status(abspath)
class Devices(object):
"""
A container for Device instances with reporting
"""
def __init__(self, filter_for_batch=False, with_lsm=False):
lvs = lvm.get_lvs()
lsblk_all = disk.lsblk_all()
all_devices_vgs = lvm.get_all_devices_vgs()
if not sys_info.devices:
sys_info.devices = disk.get_devices()
self.devices = [Device(k,
with_lsm,
lvs=lvs,
lsblk_all=lsblk_all,
all_devices_vgs=all_devices_vgs) for k in
sys_info.devices.keys()]
if filter_for_batch:
self.devices = [d for d in self.devices if d.available_lvm_batch]
def pretty_report(self):
output = [
report_template.format(
dev='Device Path',
size='Size',
rot='rotates',
model='Model name',
available='available',
device_nodes='Device nodes',
)]
for device in sorted(self.devices):
output.append(device.report())
return ''.join(output)
def json_report(self):
output = []
for device in sorted(self.devices):
output.append(device.json_report())
return output
@total_ordering
class Device(object):
pretty_template = """
{attr:<25} {value}"""
report_fields = [
'ceph_device',
'rejected_reasons',
'available',
'path',
'sys_api',
'device_id',
'lsm_data',
]
pretty_report_sys_fields = [
'actuators',
'human_readable_size',
'model',
'removable',
'ro',
'rotational',
'sas_address',
'scheduler_mode',
'vendor',
]
# define some class variables; mostly to enable the use of autospec in
# unittests
lvs = []
def __init__(self, path, with_lsm=False, lvs=None, lsblk_all=None, all_devices_vgs=None):
self.path = path
# LVs can have a vg/lv path, while disks will have /dev/sda
self.symlink = None
# check if we are a symlink
if os.path.islink(self.path):
self.symlink = self.path
real_path = os.path.realpath(self.path)
# check if we are not a device mapper
if "dm-" not in real_path:
self.path = real_path
if not sys_info.devices:
if self.path:
sys_info.devices = disk.get_devices(device=self.path)
else:
sys_info.devices = disk.get_devices()
if sys_info.devices.get(self.path, {}):
self.device_nodes = sys_info.devices[self.path]['device_nodes']
self.sys_api = sys_info.devices.get(self.path, {})
self.partitions = self._get_partitions()
self.lv_api = None
self.lvs = [] if not lvs else lvs
self.lsblk_all = lsblk_all
self.all_devices_vgs = all_devices_vgs
self.vgs = []
self.vg_name = None
self.lv_name = None
self.disk_api = {}
self.blkid_api = None
self._exists = None
self._is_lvm_member = None
self.ceph_device = False
self._parse()
self.lsm_data = self.fetch_lsm(with_lsm)
self.available_lvm, self.rejected_reasons_lvm = self._check_lvm_reject_reasons()
self.available_raw, self.rejected_reasons_raw = self._check_raw_reject_reasons()
self.available = self.available_lvm and self.available_raw
self.rejected_reasons = list(set(self.rejected_reasons_lvm +
self.rejected_reasons_raw))
self.device_id = self._get_device_id()
def fetch_lsm(self, with_lsm):
'''
Attempt to fetch libstoragemgmt (LSM) metadata, and return to the caller
as a dict. An empty dict is passed back to the caller if the target path
is not a block device, or lsm is unavailable on the host. Otherwise the
json returned will provide LSM attributes, and any associated errors that
lsm encountered when probing the device.
'''
if not with_lsm or not self.exists or not self.is_device:
return {}
lsm_disk = LSMDisk(self.path)
return lsm_disk.json_report()
def __lt__(self, other):
'''
Implementing this method and __eq__ allows the @total_ordering
decorator to turn the Device class into a totally ordered type.
This can slower then implementing all comparison operations.
This sorting should put available devices before unavailable devices
and sort on the path otherwise (str sorting).
'''
if self.available == other.available:
return self.path < other.path
return self.available and not other.available
def __eq__(self, other):
return self.path == other.path
def __hash__(self):
return hash(self.path)
def load_blkid_api(self):
if self.blkid_api is None:
self.blkid_api = disk.blkid(self.path)
def _parse(self):
lv = None
if not self.sys_api:
# if no device was found check if we are a partition
partname = self.path.split('/')[-1]
for device, info in sys_info.devices.items():
part = info['partitions'].get(partname, {})
if part:
self.sys_api = part
break
if self.lvs:
for _lv in self.lvs:
# if the path is not absolute, we have 'vg/lv', let's use LV name
# to get the LV.
if self.path[0] == '/':
if _lv.lv_path == self.path:
lv = _lv
break
else:
vgname, lvname = self.path.split('/')
if _lv.lv_name == lvname and _lv.vg_name == vgname:
lv = _lv
break
else:
if self.path[0] == '/':
lv = lvm.get_single_lv(filters={'lv_path': self.path})
else:
vgname, lvname = self.path.split('/')
lv = lvm.get_single_lv(filters={'lv_name': lvname,
'vg_name': vgname})
if lv:
self.lv_api = lv
self.lvs = [lv]
self.path = lv.lv_path
self.vg_name = lv.vg_name
self.lv_name = lv.name
self.ceph_device = lvm.is_ceph_device(lv)
else:
self.lvs = []
if self.lsblk_all:
for dev in self.lsblk_all:
if dev['NAME'] == os.path.basename(self.path):
break
else:
dev = disk.lsblk(self.path)
self.disk_api = dev
device_type = dev.get('TYPE', '')
# always check is this is an lvm member
valid_types = ['part', 'disk']
if allow_loop_devices():
valid_types.append('loop')
if device_type in valid_types:
self._set_lvm_membership()
self.ceph_disk = CephDiskDevice(self)
def __repr__(self):
prefix = 'Unknown'
if self.is_lv:
prefix = 'LV'
elif self.is_partition:
prefix = 'Partition'
elif self.is_device:
prefix = 'Raw Device'
return '<%s: %s>' % (prefix, self.path)
def pretty_report(self):
def format_value(v):
if isinstance(v, list):
return ', '.join(v)
else:
return v
def format_key(k):
return k.strip('_').replace('_', ' ')
output = ['\n====== Device report {} ======\n'.format(self.path)]
output.extend(
[self.pretty_template.format(
attr=format_key(k),
value=format_value(v)) for k, v in vars(self).items() if k in
self.report_fields and k != 'disk_api' and k != 'sys_api'] )
output.extend(
[self.pretty_template.format(
attr=format_key(k),
value=format_value(v)) for k, v in self.sys_api.items() if k in
self.pretty_report_sys_fields])
for lv in self.lvs:
output.append("""
--- Logical Volume ---""")
output.extend(
[self.pretty_template.format(
attr=format_key(k),
value=format_value(v)) for k, v in lv.report().items()])
return ''.join(output)
def report(self):
return report_template.format(
dev=self.path,
size=self.size_human,
rot=self.rotational,
available=self.available,
model=self.model,
device_nodes=self.device_nodes
)
def json_report(self):
output = {k.strip('_'): v for k, v in vars(self).items() if k in
self.report_fields}
output['lvs'] = [lv.report() for lv in self.lvs]
return output
def _get_device_id(self):
"""
Please keep this implementation in sync with get_device_id() in
src/common/blkdev.cc
"""
props = ['ID_VENDOR', 'ID_MODEL', 'ID_MODEL_ENC', 'ID_SERIAL_SHORT', 'ID_SERIAL',
'ID_SCSI_SERIAL']
p = disk.udevadm_property(self.path, props)
if p.get('ID_MODEL','').startswith('LVM PV '):
p['ID_MODEL'] = p.get('ID_MODEL_ENC', '').replace('\\x20', ' ').strip()
if 'ID_VENDOR' in p and 'ID_MODEL' in p and 'ID_SCSI_SERIAL' in p:
dev_id = '_'.join([p['ID_VENDOR'], p['ID_MODEL'],
p['ID_SCSI_SERIAL']])
elif 'ID_MODEL' in p and 'ID_SERIAL_SHORT' in p:
dev_id = '_'.join([p['ID_MODEL'], p['ID_SERIAL_SHORT']])
elif 'ID_SERIAL' in p:
dev_id = p['ID_SERIAL']
if dev_id.startswith('MTFD'):
# Micron NVMes hide the vendor
dev_id = 'Micron_' + dev_id
else:
# the else branch should fallback to using sysfs and ioctl to
# retrieve device_id on FreeBSD. Still figuring out if/how the
# python ioctl implementation does that on FreeBSD
dev_id = ''
dev_id = dev_id.replace(' ', '_')
while '__' in dev_id:
dev_id = dev_id.replace('__', '_')
return dev_id
def _set_lvm_membership(self):
if self._is_lvm_member is None:
# this is contentious, if a PV is recognized by LVM but has no
# VGs, should we consider it as part of LVM? We choose not to
# here, because most likely, we need to use VGs from this PV.
self._is_lvm_member = False
device_to_check = [self.path]
device_to_check.extend(self.partitions)
# a pv can only be in one vg, so this should be safe
# FIXME: While the above assumption holds, sda1 and sda2
# can each host a PV and VG. I think the vg_name property is
# actually unused (not 100% sure) and can simply be removed
vgs = None
if not self.all_devices_vgs:
self.all_devices_vgs = lvm.get_all_devices_vgs()
for path in device_to_check:
for dev_vg in self.all_devices_vgs:
if dev_vg.pv_name == path:
vgs = [dev_vg]
if vgs:
self.vgs.extend(vgs)
self.vg_name = vgs[0]
self._is_lvm_member = True
self.lvs.extend(lvm.get_device_lvs(path))
if self.lvs:
self.ceph_device = any([True if lv.tags.get('ceph.osd_id') else False for lv in self.lvs])
def _get_partitions(self):
"""
For block devices LVM can reside on the raw block device or on a
partition. Return a list of paths to be checked for a pv.
"""
partitions = []
path_dir = os.path.dirname(self.path)
for partition in self.sys_api.get('partitions', {}).keys():
partitions.append(os.path.join(path_dir, partition))
return partitions
@property
def exists(self):
return os.path.exists(self.path)
@property
def has_fs(self):
self.load_blkid_api()
return 'TYPE' in self.blkid_api
@property
def has_gpt_headers(self):
self.load_blkid_api()
return self.blkid_api.get("PTTYPE") == "gpt"
@property
def rotational(self):
rotational = self.sys_api.get('rotational')
if rotational is None:
# fall back to lsblk if not found in sys_api
# default to '1' if no value is found with lsblk either
rotational = self.disk_api.get('ROTA', '1')
return rotational == '1'
@property
def model(self):
return self.sys_api['model']
@property
def size_human(self):
return self.sys_api['human_readable_size']
@property
def size(self):
return self.sys_api['size']
@property
def parent_device(self):
if 'PKNAME' in self.disk_api:
return '/dev/%s' % self.disk_api['PKNAME']
return None
@property
def lvm_size(self):
"""
If this device was made into a PV it would lose 1GB in total size
due to the 1GB physical extent size we set when creating volume groups
"""
size = disk.Size(b=self.size)
lvm_size = disk.Size(gb=size.gb.as_int()) - disk.Size(gb=1)
return lvm_size
@property
def is_lvm_member(self):
if self._is_lvm_member is None:
self._set_lvm_membership()
return self._is_lvm_member
@property
def is_ceph_disk_member(self):
def is_member(device):
return 'ceph' in device.get('PARTLABEL', '') or \
device.get('PARTTYPE', '') in ceph_disk_guids.keys()
# If we come from Devices(), self.lsblk_all is set already.
# Otherwise, we have to grab the data.
details = self.lsblk_all or disk.lsblk_all()
_is_member = False
if self.sys_api.get("partitions"):
for part in self.sys_api.get("partitions").keys():
for dev in details:
if part.startswith(dev['NAME']):
if is_member(dev):
_is_member = True
return _is_member
else:
return is_member(self.disk_api)
raise RuntimeError(f"Couln't check if device {self.path} is a ceph-disk member.")
@property
def has_bluestore_label(self):
return disk.has_bluestore_label(self.path)
@property
def is_mapper(self):
return self.path.startswith(('/dev/mapper', '/dev/dm-'))
@property
def device_type(self):
self.load_blkid_api()
if 'type' in self.sys_api:
return self.sys_api['type']
elif self.disk_api:
return self.disk_api['TYPE']
elif self.blkid_api:
return self.blkid_api['TYPE']
@property
def is_mpath(self):
return self.device_type == 'mpath'
@property
def is_lv(self):
return self.lv_api is not None
@property
def is_partition(self):
self.load_blkid_api()
if self.disk_api:
return self.disk_api['TYPE'] == 'part'
elif self.blkid_api:
return self.blkid_api['TYPE'] == 'part'
return False
@property
def is_device(self):
self.load_blkid_api()
api = None
if self.disk_api:
api = self.disk_api
elif self.blkid_api:
api = self.blkid_api
if api:
valid_types = ['disk', 'device', 'mpath']
if allow_loop_devices():
valid_types.append('loop')
return self.device_type in valid_types
return False
@property
def is_acceptable_device(self):
return self.is_device or self.is_partition
@property
def is_encrypted(self):
"""
Only correct for LVs, device mappers, and partitions. Will report a ``None``
for raw devices.
"""
self.load_blkid_api()
crypt_reports = [self.blkid_api.get('TYPE', ''), self.disk_api.get('FSTYPE', '')]
if self.is_lv:
# if disk APIs are reporting this is encrypted use that:
if 'crypto_LUKS' in crypt_reports:
return True
# if ceph-volume created this, then a tag would let us know
elif self.lv_api.encrypted:
return True
return False
elif self.is_partition:
return 'crypto_LUKS' in crypt_reports
elif self.is_mapper:
active_mapper = encryption_status(self.path)
if active_mapper:
# normalize a bit to ensure same values regardless of source
encryption_type = active_mapper['type'].lower().strip('12') # turn LUKS1 or LUKS2 into luks
return True if encryption_type in ['plain', 'luks'] else False
else:
return False
else:
return None
@property
def used_by_ceph(self):
# only filter out data devices as journals could potentially be reused
osd_ids = [lv.tags.get("ceph.osd_id") is not None for lv in self.lvs
if lv.tags.get("ceph.type") in ["data", "block"]]
return any(osd_ids)
@property
def vg_free_percent(self):
if self.vgs:
return [vg.free_percent for vg in self.vgs]
else:
return [1]
@property
def vg_size(self):
if self.vgs:
return [vg.size for vg in self.vgs]
else:
# TODO fix this...we can probably get rid of vg_free
return self.vg_free
@property
def vg_free(self):
'''
Returns the free space in all VGs on this device. If no VGs are
present, returns the disk size.
'''
if self.vgs:
return [vg.free for vg in self.vgs]
else:
# We could also query 'lvmconfig
# --typeconfig full' and use allocations -> physical_extent_size
# value to project the space for a vg
# assuming 4M extents here
extent_size = 4194304
vg_free = int(self.size / extent_size) * extent_size
if self.size % extent_size == 0:
# If the extent size divides size exactly, deduct on extent for
# LVM metadata
vg_free -= extent_size
return [vg_free]
@property
def has_partitions(self):
'''
Boolean to determine if a given device has partitions.
'''
if self.sys_api.get('partitions'):
return True
return False
def _check_generic_reject_reasons(self):
reasons = [
('id_bus', 'usb', 'id_bus'),
('ro', '1', 'read-only'),
('locked', 1, 'locked'),
]
rejected = [reason for (k, v, reason) in reasons if
self.sys_api.get(k, '') == v]
if self.is_acceptable_device:
# reject disks smaller than 5GB
if int(self.sys_api.get('size', 0)) < 5368709120:
rejected.append('Insufficient space (<5GB)')
else:
rejected.append("Device type is not acceptable. It should be raw device or partition")
if self.is_ceph_disk_member:
rejected.append("Used by ceph-disk")
try:
if self.has_bluestore_label:
rejected.append('Has BlueStore device label')
except OSError as e:
# likely failed to open the device. assuming it is BlueStore is the safest option
# so that a possibly-already-existing OSD doesn't get overwritten
logger.error('failed to determine if device {} is BlueStore. device should not be used to avoid false negatives. err: {}'.format(self.path, e))
rejected.append('Failed to determine if device is BlueStore')
if self.is_partition:
try:
if disk.has_bluestore_label(self.parent_device):
rejected.append('Parent has BlueStore device label')
except OSError as e:
# likely failed to open the device. assuming the parent is BlueStore is the safest
# option so that a possibly-already-existing OSD doesn't get overwritten
logger.error('failed to determine if partition {} (parent: {}) has a BlueStore parent. partition should not be used to avoid false negatives. err: {}'.format(self.path, self.parent_device, e))
rejected.append('Failed to determine if parent device is BlueStore')
if self.has_gpt_headers:
rejected.append('Has GPT headers')
if self.has_partitions:
rejected.append('Has partitions')
return rejected
def _check_lvm_reject_reasons(self):
rejected = []
if self.vgs:
available_vgs = [vg for vg in self.vgs if int(vg.vg_free_count) > 10]
if not available_vgs:
rejected.append('Insufficient space (<10 extents) on vgs')
else:
# only check generic if no vgs are present. Vgs might hold lvs and
# that might cause 'locked' to trigger
rejected.extend(self._check_generic_reject_reasons())
return len(rejected) == 0, rejected
def _check_raw_reject_reasons(self):
rejected = self._check_generic_reject_reasons()
if len(self.vgs) > 0:
rejected.append('LVM detected')
return len(rejected) == 0, rejected
@property
def available_lvm_batch(self):
if self.sys_api.get("partitions"):
return False
if system.device_is_mounted(self.path):
return False
return self.is_device or self.is_lv
class CephDiskDevice(object):
"""
Detect devices that have been created by ceph-disk, report their type
(journal, data, etc..). Requires a ``Device`` object as input.
"""
def __init__(self, device):
self.device = device
self._is_ceph_disk_member = None
@property
def partlabel(self):
"""
In containers, the 'PARTLABEL' attribute might not be detected
correctly via ``lsblk``, so we poke at the value with ``lsblk`` first,
falling back to ``blkid`` (which works correclty in containers).
"""
lsblk_partlabel = self.device.disk_api.get('PARTLABEL')
if lsblk_partlabel:
return lsblk_partlabel
return self.device.blkid_api.get('PARTLABEL', '')
@property
def parttype(self):
"""
Seems like older version do not detect PARTTYPE correctly (assuming the
info in util/disk.py#lsblk is still valid).
SImply resolve to using blkid since lsblk will throw an error if asked
for an unknown columns
"""
return self.device.blkid_api.get('PARTTYPE', '')
@property
def is_member(self):
if self._is_ceph_disk_member is None:
if 'ceph' in self.partlabel:
self._is_ceph_disk_member = True
return True
elif self.parttype in ceph_disk_guids.keys():
return True
return False
return self._is_ceph_disk_member
@property
def type(self):
types = [
'data', 'wal', 'db', 'lockbox', 'journal',
# ceph-disk uses 'ceph block' when placing data in bluestore, but
# keeps the regular OSD files in 'ceph data' :( :( :( :(
'block',
]
for t in types:
if t in self.partlabel:
return t
label = ceph_disk_guids.get(self.parttype, {})
return label.get('type', 'unknown').split('.')[-1]

View File

@@ -0,0 +1,937 @@
import logging
import os
import re
import stat
import time
from ceph_volume import process
from ceph_volume.api import lvm
from ceph_volume.util.system import get_file_contents
logger = logging.getLogger(__name__)
# The blkid CLI tool has some oddities which prevents having one common call
# to extract the information instead of having separate utilities. The `udev`
# type of output is needed in older versions of blkid (v 2.23) that will not
# work correctly with just the ``-p`` flag to bypass the cache for example.
# Xenial doesn't have this problem as it uses a newer blkid version.
def get_partuuid(device):
"""
If a device is a partition, it will probably have a PARTUUID on it that
will persist and can be queried against `blkid` later to detect the actual
device
"""
out, err, rc = process.call(
['blkid', '-c', '/dev/null', '-s', 'PARTUUID', '-o', 'value', device]
)
return ' '.join(out).strip()
def _blkid_parser(output):
"""
Parses the output from a system ``blkid`` call, requires output to be
produced using the ``-p`` flag which bypasses the cache, mangling the
names. These names are corrected to what it would look like without the
``-p`` flag.
Normal output::
/dev/sdb1: UUID="62416664-cbaf-40bd-9689-10bd337379c3" TYPE="xfs" [...]
"""
# first spaced separated item is garbage, gets tossed:
output = ' '.join(output.split()[1:])
# split again, respecting possible whitespace in quoted values
pairs = output.split('" ')
raw = {}
processed = {}
mapping = {
'UUID': 'UUID',
'TYPE': 'TYPE',
'PART_ENTRY_NAME': 'PARTLABEL',
'PART_ENTRY_UUID': 'PARTUUID',
'PART_ENTRY_TYPE': 'PARTTYPE',
'PTTYPE': 'PTTYPE',
}
for pair in pairs:
try:
column, value = pair.split('=')
except ValueError:
continue
raw[column] = value.strip().strip().strip('"')
for key, value in raw.items():
new_key = mapping.get(key)
if not new_key:
continue
processed[new_key] = value
return processed
def blkid(device):
"""
The blkid interface to its CLI, creating an output similar to what is
expected from ``lsblk``. In most cases, ``lsblk()`` should be the preferred
method for extracting information about a device. There are some corner
cases where it might provide information that is otherwise unavailable.
The system call uses the ``-p`` flag which bypasses the cache, the caveat
being that the keys produced are named completely different to expected
names.
For example, instead of ``PARTLABEL`` it provides a ``PART_ENTRY_NAME``.
A bit of translation between these known keys is done, which is why
``lsblk`` should always be preferred: the output provided here is not as
rich, given that a translation of keys is required for a uniform interface
with the ``-p`` flag.
Label name to expected output chart:
cache bypass name expected name
UUID UUID
TYPE TYPE
PART_ENTRY_NAME PARTLABEL
PART_ENTRY_UUID PARTUUID
"""
out, err, rc = process.call(
['blkid', '-c', '/dev/null', '-p', device]
)
return _blkid_parser(' '.join(out))
def get_part_entry_type(device):
"""
Parses the ``ID_PART_ENTRY_TYPE`` from the "low level" (bypasses the cache)
output that uses the ``udev`` type of output. This output is intended to be
used for udev rules, but it is useful in this case as it is the only
consistent way to retrieve the GUID used by ceph-disk to identify devices.
"""
out, err, rc = process.call(['blkid', '-c', '/dev/null', '-p', '-o', 'udev', device])
for line in out:
if 'ID_PART_ENTRY_TYPE=' in line:
return line.split('=')[-1].strip()
return ''
def get_device_from_partuuid(partuuid):
"""
If a device has a partuuid, query blkid so that it can tell us what that
device is
"""
out, err, rc = process.call(
['blkid', '-c', '/dev/null', '-t', 'PARTUUID="%s"' % partuuid, '-o', 'device']
)
return ' '.join(out).strip()
def remove_partition(device):
"""
Removes a partition using parted
:param device: A ``Device()`` object
"""
# Sometimes there's a race condition that makes 'ID_PART_ENTRY_NUMBER' be not present
# in the output of `udevadm info --query=property`.
# Probably not ideal and not the best fix but this allows to get around that issue.
# The idea is to make it retry multiple times before actually failing.
for i in range(10):
udev_info = udevadm_property(device.path)
partition_number = udev_info.get('ID_PART_ENTRY_NUMBER')
if partition_number:
break
time.sleep(0.2)
if not partition_number:
raise RuntimeError('Unable to detect the partition number for device: %s' % device.path)
process.run(
['parted', device.parent_device, '--script', '--', 'rm', partition_number]
)
def _stat_is_device(stat_obj):
"""
Helper function that will interpret ``os.stat`` output directly, so that other
functions can call ``os.stat`` once and interpret that result several times
"""
return stat.S_ISBLK(stat_obj)
def _lsblk_parser(line):
"""
Parses lines in lsblk output. Requires output to be in pair mode (``-P`` flag). Lines
need to be whole strings, the line gets split when processed.
:param line: A string, with the full line from lsblk output
"""
# parse the COLUMN="value" output to construct the dictionary
pairs = line.split('" ')
parsed = {}
for pair in pairs:
try:
column, value = pair.split('=')
except ValueError:
continue
parsed[column] = value.strip().strip().strip('"')
return parsed
def device_family(device):
"""
Returns a list of associated devices. It assumes that ``device`` is
a parent device. It is up to the caller to ensure that the device being
used is a parent, not a partition.
"""
labels = ['NAME', 'PARTLABEL', 'TYPE']
command = ['lsblk', '-P', '-p', '-o', ','.join(labels), device]
out, err, rc = process.call(command)
devices = []
for line in out:
devices.append(_lsblk_parser(line))
return devices
def udevadm_property(device, properties=[]):
"""
Query udevadm for information about device properties.
Optionally pass a list of properties to return. A requested property might
not be returned if not present.
Expected output format::
# udevadm info --query=property --name=/dev/sda :(
DEVNAME=/dev/sda
DEVTYPE=disk
ID_ATA=1
ID_BUS=ata
ID_MODEL=SK_hynix_SC311_SATA_512GB
ID_PART_TABLE_TYPE=gpt
ID_PART_TABLE_UUID=c8f91d57-b26c-4de1-8884-0c9541da288c
ID_PATH=pci-0000:00:17.0-ata-3
ID_PATH_TAG=pci-0000_00_17_0-ata-3
ID_REVISION=70000P10
ID_SERIAL=SK_hynix_SC311_SATA_512GB_MS83N71801150416A
TAGS=:systemd:
USEC_INITIALIZED=16117769
...
"""
out = _udevadm_info(device)
ret = {}
for line in out:
p, v = line.split('=', 1)
if not properties or p in properties:
ret[p] = v
return ret
def _udevadm_info(device):
"""
Call udevadm and return the output
"""
cmd = ['udevadm', 'info', '--query=property', device]
out, _err, _rc = process.call(cmd)
return out
def lsblk(device, columns=None, abspath=False):
result = lsblk_all(device=device,
columns=columns,
abspath=abspath)
if not result:
raise RuntimeError(f"{device} not found is lsblk report")
return result[0]
def lsblk_all(device='', columns=None, abspath=False):
"""
Create a dictionary of identifying values for a device using ``lsblk``.
Each supported column is a key, in its *raw* format (all uppercase
usually). ``lsblk`` has support for certain "columns" (in blkid these
would be labels), and these columns vary between distributions and
``lsblk`` versions. The newer versions support a richer set of columns,
while older ones were a bit limited.
These are a subset of lsblk columns which are known to work on both CentOS 7 and Xenial:
NAME device name
KNAME internal kernel device name
PKNAME internal kernel parent device name
MAJ:MIN major:minor device number
FSTYPE filesystem type
MOUNTPOINT where the device is mounted
LABEL filesystem LABEL
UUID filesystem UUID
RO read-only device
RM removable device
MODEL device identifier
SIZE size of the device
STATE state of the device
OWNER user name
GROUP group name
MODE device node permissions
ALIGNMENT alignment offset
MIN-IO minimum I/O size
OPT-IO optimal I/O size
PHY-SEC physical sector size
LOG-SEC logical sector size
ROTA rotational device
SCHED I/O scheduler name
RQ-SIZE request queue size
TYPE device type
PKNAME internal parent kernel device name
DISC-ALN discard alignment offset
DISC-GRAN discard granularity
DISC-MAX discard max bytes
DISC-ZERO discard zeroes data
There is a bug in ``lsblk`` where using all the available (supported)
columns will result in no output (!), in order to workaround this the
following columns have been removed from the default reporting columns:
* RQ-SIZE (request queue size)
* MIN-IO minimum I/O size
* OPT-IO optimal I/O size
These should be available however when using `columns`. For example::
>>> lsblk('/dev/sda1', columns=['OPT-IO'])
{'OPT-IO': '0'}
Normal CLI output, as filtered by the flags in this function will look like ::
$ lsblk -P -o NAME,KNAME,PKNAME,MAJ:MIN,FSTYPE,MOUNTPOINT
NAME="sda1" KNAME="sda1" MAJ:MIN="8:1" FSTYPE="ext4" MOUNTPOINT="/"
:param columns: A list of columns to report as keys in its original form.
:param abspath: Set the flag for absolute paths on the report
"""
default_columns = [
'NAME', 'KNAME', 'PKNAME', 'MAJ:MIN', 'FSTYPE', 'MOUNTPOINT', 'LABEL',
'UUID', 'RO', 'RM', 'MODEL', 'SIZE', 'STATE', 'OWNER', 'GROUP', 'MODE',
'ALIGNMENT', 'PHY-SEC', 'LOG-SEC', 'ROTA', 'SCHED', 'TYPE', 'DISC-ALN',
'DISC-GRAN', 'DISC-MAX', 'DISC-ZERO', 'PKNAME', 'PARTLABEL'
]
columns = columns or default_columns
# -P -> Produce pairs of COLUMN="value"
# -p -> Return full paths to devices, not just the names, when ``abspath`` is set
# -o -> Use the columns specified or default ones provided by this function
base_command = ['lsblk', '-P']
if abspath:
base_command.append('-p')
base_command.append('-o')
base_command.append(','.join(columns))
if device:
base_command.append('--nodeps')
base_command.append(device)
out, err, rc = process.call(base_command)
if rc != 0:
raise RuntimeError(f"Error: {err}")
result = []
for line in out:
result.append(_lsblk_parser(line))
return result
def is_device(dev):
"""
Boolean to determine if a given device is a block device (**not**
a partition!)
For example: /dev/sda would return True, but not /dev/sdc1
"""
if not os.path.exists(dev):
return False
if not dev.startswith('/dev/'):
return False
if dev[len('/dev/'):].startswith('loop'):
if not allow_loop_devices():
return False
# fallback to stat
return _stat_is_device(os.lstat(dev).st_mode)
def is_partition(dev):
"""
Boolean to determine if a given device is a partition, like /dev/sda1
"""
if not os.path.exists(dev):
return False
# use lsblk first, fall back to using stat
TYPE = lsblk(dev).get('TYPE')
if TYPE:
return TYPE == 'part'
# fallback to stat
stat_obj = os.stat(dev)
if _stat_is_device(stat_obj.st_mode):
return False
major = os.major(stat_obj.st_rdev)
minor = os.minor(stat_obj.st_rdev)
if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)):
return True
return False
def is_ceph_rbd(dev):
"""
Boolean to determine if a given device is a ceph RBD device, like /dev/rbd0
"""
return dev.startswith(('/dev/rbd'))
class BaseFloatUnit(float):
"""
Base class to support float representations of size values. Suffix is
computed on child classes by inspecting the class name
"""
def __repr__(self):
return "<%s(%s)>" % (self.__class__.__name__, self.__float__())
def __str__(self):
return "{size:.2f} {suffix}".format(
size=self.__float__(),
suffix=self.__class__.__name__.split('Float')[-1]
)
def as_int(self):
return int(self.real)
def as_float(self):
return self.real
class FloatB(BaseFloatUnit):
pass
class FloatMB(BaseFloatUnit):
pass
class FloatGB(BaseFloatUnit):
pass
class FloatKB(BaseFloatUnit):
pass
class FloatTB(BaseFloatUnit):
pass
class FloatPB(BaseFloatUnit):
pass
class Size(object):
"""
Helper to provide an interface for different sizes given a single initial
input. Allows for comparison between different size objects, which avoids
the need to convert sizes before comparison (e.g. comparing megabytes
against gigabytes).
Common comparison operators are supported::
>>> hd1 = Size(gb=400)
>>> hd2 = Size(gb=500)
>>> hd1 > hd2
False
>>> hd1 < hd2
True
>>> hd1 == hd2
False
>>> hd1 == Size(gb=400)
True
The Size object can also be multiplied or divided::
>>> hd1
<Size(400.00 GB)>
>>> hd1 * 2
<Size(800.00 GB)>
>>> hd1
<Size(800.00 GB)>
Additions and subtractions are only supported between Size objects::
>>> Size(gb=224) - Size(gb=100)
<Size(124.00 GB)>
>>> Size(gb=1) + Size(mb=300)
<Size(1.29 GB)>
Can also display a human-readable representation, with automatic detection
on best suited unit, or alternatively, specific unit representation::
>>> s = Size(mb=2211)
>>> s
<Size(2.16 GB)>
>>> s.mb
<FloatMB(2211.0)>
>>> print("Total size: %s" % s.mb)
Total size: 2211.00 MB
>>> print("Total size: %s" % s)
Total size: 2.16 GB
"""
@classmethod
def parse(cls, size):
if (len(size) > 2 and
size[-2].lower() in ['k', 'm', 'g', 't', 'p'] and
size[-1].lower() == 'b'):
return cls(**{size[-2:].lower(): float(size[0:-2])})
elif size[-1].lower() in ['b', 'k', 'm', 'g', 't', 'p']:
return cls(**{size[-1].lower(): float(size[0:-1])})
else:
return cls(b=float(size))
def __init__(self, multiplier=1024, **kw):
self._multiplier = multiplier
# create a mapping of units-to-multiplier, skip bytes as that is
# calculated initially always and does not need to convert
aliases = [
[('k', 'kb', 'kilobytes'), self._multiplier],
[('m', 'mb', 'megabytes'), self._multiplier ** 2],
[('g', 'gb', 'gigabytes'), self._multiplier ** 3],
[('t', 'tb', 'terabytes'), self._multiplier ** 4],
[('p', 'pb', 'petabytes'), self._multiplier ** 5]
]
# and mappings for units-to-formatters, including bytes and aliases for
# each
format_aliases = [
[('b', 'bytes'), FloatB],
[('kb', 'kilobytes'), FloatKB],
[('mb', 'megabytes'), FloatMB],
[('gb', 'gigabytes'), FloatGB],
[('tb', 'terabytes'), FloatTB],
[('pb', 'petabytes'), FloatPB],
]
self._formatters = {}
for key, value in format_aliases:
for alias in key:
self._formatters[alias] = value
self._factors = {}
for key, value in aliases:
for alias in key:
self._factors[alias] = value
for k, v in kw.items():
self._convert(v, k)
# only pursue the first occurrence
break
def _convert(self, size, unit):
"""
Convert any size down to bytes so that other methods can rely on bytes
being available always, regardless of what they pass in, avoiding the
need for a mapping of every permutation.
"""
if unit in ['b', 'bytes']:
self._b = size
return
factor = self._factors[unit]
self._b = float(size * factor)
def _get_best_format(self):
"""
Go through all the supported units, and use the first one that is less
than 1024. This allows to represent size in the most readable format
available
"""
for unit in ['b', 'kb', 'mb', 'gb', 'tb', 'pb']:
if getattr(self, unit) > 1024:
continue
return getattr(self, unit)
def __repr__(self):
return "<Size(%s)>" % self._get_best_format()
def __str__(self):
return "%s" % self._get_best_format()
def __format__(self, spec):
return str(self._get_best_format()).__format__(spec)
def __int__(self):
return int(self._b)
def __float__(self):
return self._b
def __lt__(self, other):
if isinstance(other, Size):
return self._b < other._b
else:
return self.b < other
def __le__(self, other):
if isinstance(other, Size):
return self._b <= other._b
else:
return self.b <= other
def __eq__(self, other):
if isinstance(other, Size):
return self._b == other._b
else:
return self.b == other
def __ne__(self, other):
if isinstance(other, Size):
return self._b != other._b
else:
return self.b != other
def __ge__(self, other):
if isinstance(other, Size):
return self._b >= other._b
else:
return self.b >= other
def __gt__(self, other):
if isinstance(other, Size):
return self._b > other._b
else:
return self.b > other
def __add__(self, other):
if isinstance(other, Size):
_b = self._b + other._b
return Size(b=_b)
raise TypeError('Cannot add "Size" object with int')
def __sub__(self, other):
if isinstance(other, Size):
_b = self._b - other._b
return Size(b=_b)
raise TypeError('Cannot subtract "Size" object from int')
def __mul__(self, other):
if isinstance(other, Size):
raise TypeError('Cannot multiply with "Size" object')
_b = self._b * other
return Size(b=_b)
def __truediv__(self, other):
if isinstance(other, Size):
return self._b / other._b
_b = self._b / other
return Size(b=_b)
def __div__(self, other):
if isinstance(other, Size):
return self._b / other._b
_b = self._b / other
return Size(b=_b)
def __bool__(self):
return self.b != 0
def __nonzero__(self):
return self.__bool__()
def __getattr__(self, unit):
"""
Calculate units on the fly, relies on the fact that ``bytes`` has been
converted at instantiation. Units that don't exist will trigger an
``AttributeError``
"""
try:
formatter = self._formatters[unit]
except KeyError:
raise AttributeError('Size object has not attribute "%s"' % unit)
if unit in ['b', 'bytes']:
return formatter(self._b)
try:
factor = self._factors[unit]
except KeyError:
raise AttributeError('Size object has not attribute "%s"' % unit)
return formatter(float(self._b) / factor)
def human_readable_size(size):
"""
Take a size in bytes, and transform it into a human readable size with up
to two decimals of precision.
"""
suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
for suffix in suffixes:
if size >= 1024:
size = size / 1024
else:
break
return "{size:.2f} {suffix}".format(
size=size,
suffix=suffix)
def size_from_human_readable(s):
"""
Takes a human readable string and converts into a Size. If no unit is
passed, bytes is assumed.
"""
s = s.replace(' ', '')
if s[-1].isdigit():
return Size(b=float(s))
n = float(s[:-1])
if s[-1].lower() == 'p':
return Size(pb=n)
if s[-1].lower() == 't':
return Size(tb=n)
if s[-1].lower() == 'g':
return Size(gb=n)
if s[-1].lower() == 'm':
return Size(mb=n)
if s[-1].lower() == 'k':
return Size(kb=n)
return None
def get_partitions_facts(sys_block_path):
partition_metadata = {}
for folder in os.listdir(sys_block_path):
folder_path = os.path.join(sys_block_path, folder)
if os.path.exists(os.path.join(folder_path, 'partition')):
contents = get_file_contents(os.path.join(folder_path, 'partition'))
if contents:
part = {}
partname = folder
part_sys_block_path = os.path.join(sys_block_path, partname)
part['start'] = get_file_contents(part_sys_block_path + "/start", 0)
part['sectors'] = get_file_contents(part_sys_block_path + "/size", 0)
part['sectorsize'] = get_file_contents(
part_sys_block_path + "/queue/logical_block_size")
if not part['sectorsize']:
part['sectorsize'] = get_file_contents(
part_sys_block_path + "/queue/hw_sector_size", 512)
part['size'] = float(part['sectors']) * 512
part['human_readable_size'] = human_readable_size(float(part['sectors']) * 512)
part['holders'] = []
for holder in os.listdir(part_sys_block_path + '/holders'):
part['holders'].append(holder)
partition_metadata[partname] = part
return partition_metadata
def is_mapper_device(device_name):
return device_name.startswith(('/dev/mapper', '/dev/dm-'))
def is_locked_raw_device(disk_path):
"""
A device can be locked by a third party software like a database.
To detect that case, the device is opened in Read/Write and exclusive mode
"""
open_flags = (os.O_RDWR | os.O_EXCL)
open_mode = 0
fd = None
try:
fd = os.open(disk_path, open_flags, open_mode)
except OSError:
return 1
try:
os.close(fd)
except OSError:
return 1
return 0
class AllowLoopDevices(object):
allow = False
warned = False
@classmethod
def __call__(cls):
val = os.environ.get("CEPH_VOLUME_ALLOW_LOOP_DEVICES", "false").lower()
if val not in ("false", 'no', '0'):
cls.allow = True
if not cls.warned:
logger.warning(
"CEPH_VOLUME_ALLOW_LOOP_DEVICES is set in your "
"environment, so we will allow the use of unattached loop"
" devices as disks. This feature is intended for "
"development purposes only and will never be supported in"
" production. Issues filed based on this behavior will "
"likely be ignored."
)
cls.warned = True
return cls.allow
allow_loop_devices = AllowLoopDevices()
def get_block_devs_sysfs(_sys_block_path='/sys/block', _sys_dev_block_path='/sys/dev/block', device=''):
def holder_inner_loop():
for holder in holders:
# /sys/block/sdy/holders/dm-8/dm/uuid
holder_dm_type = get_file_contents(os.path.join(_sys_block_path, dev, f'holders/{holder}/dm/uuid')).split('-')[0].lower()
if holder_dm_type == 'mpath':
return True
# First, get devices that are _not_ partitions
result = list()
if not device:
dev_names = os.listdir(_sys_block_path)
else:
dev_names = [device]
for dev in dev_names:
name = kname = os.path.join("/dev", dev)
if not os.path.exists(name):
continue
type_ = 'disk'
holders = os.listdir(os.path.join(_sys_block_path, dev, 'holders'))
if holder_inner_loop():
continue
dm_dir_path = os.path.join(_sys_block_path, dev, 'dm')
if os.path.isdir(dm_dir_path):
dm_type = get_file_contents(os.path.join(dm_dir_path, 'uuid'))
type_ = dm_type.split('-')[0].lower()
basename = get_file_contents(os.path.join(dm_dir_path, 'name'))
name = os.path.join("/dev/mapper", basename)
if dev.startswith('loop'):
if not allow_loop_devices():
continue
# Skip loop devices that are not attached
if not os.path.exists(os.path.join(_sys_block_path, dev, 'loop')):
continue
type_ = 'loop'
result.append([kname, name, type_])
# Next, look for devices that _are_ partitions
for item in os.listdir(_sys_dev_block_path):
is_part = get_file_contents(os.path.join(_sys_dev_block_path, item, 'partition')) == "1"
dev = os.path.basename(os.readlink(os.path.join(_sys_dev_block_path, item)))
if not is_part:
continue
name = kname = os.path.join("/dev", dev)
result.append([name, kname, "part"])
return sorted(result, key=lambda x: x[0])
def get_devices(_sys_block_path='/sys/block', device=''):
"""
Captures all available block devices as reported by lsblk.
Additional interesting metadata like sectors, size, vendor,
solid/rotational, etc. is collected from /sys/block/<device>
Returns a dictionary, where keys are the full paths to devices.
..note:: loop devices, removable media, and logical volumes are never included.
"""
device_facts = {}
block_devs = get_block_devs_sysfs(_sys_block_path)
block_types = ['disk', 'mpath']
if allow_loop_devices():
block_types.append('loop')
for block in block_devs:
devname = os.path.basename(block[0])
diskname = block[1]
if block[2] not in block_types:
continue
sysdir = os.path.join(_sys_block_path, devname)
metadata = {}
# If the device is ceph rbd it gets excluded
if is_ceph_rbd(diskname):
continue
# If the mapper device is a logical volume it gets excluded
if is_mapper_device(diskname):
if lvm.get_device_lvs(diskname):
continue
# all facts that have no defaults
# (<name>, <path relative to _sys_block_path>)
facts = [('removable', 'removable'),
('ro', 'ro'),
('vendor', 'device/vendor'),
('model', 'device/model'),
('rev', 'device/rev'),
('sas_address', 'device/sas_address'),
('sas_device_handle', 'device/sas_device_handle'),
('support_discard', 'queue/discard_granularity'),
('rotational', 'queue/rotational'),
('nr_requests', 'queue/nr_requests'),
]
for key, file_ in facts:
metadata[key] = get_file_contents(os.path.join(sysdir, file_))
device_slaves = os.listdir(os.path.join(sysdir, 'slaves'))
if device_slaves:
metadata['device_nodes'] = ','.join(device_slaves)
else:
metadata['device_nodes'] = devname
metadata['actuators'] = None
if os.path.isdir(sysdir + "/queue/independent_access_ranges/"):
actuators = 0
while os.path.isdir(sysdir + "/queue/independent_access_ranges/" + str(actuators)):
actuators += 1
metadata['actuators'] = actuators
metadata['scheduler_mode'] = ""
scheduler = get_file_contents(sysdir + "/queue/scheduler")
if scheduler is not None:
m = re.match(r".*?(\[(.*)\])", scheduler)
if m:
metadata['scheduler_mode'] = m.group(2)
metadata['partitions'] = get_partitions_facts(sysdir)
size = get_file_contents(os.path.join(sysdir, 'size'), 0)
metadata['sectors'] = get_file_contents(os.path.join(sysdir, 'sectors'), 0)
fallback_sectorsize = get_file_contents(sysdir + "/queue/hw_sector_size", 512)
metadata['sectorsize'] = get_file_contents(sysdir +
"/queue/logical_block_size",
fallback_sectorsize)
metadata['size'] = float(size) * 512
metadata['human_readable_size'] = human_readable_size(metadata['size'])
metadata['path'] = diskname
metadata['locked'] = is_locked_raw_device(metadata['path'])
metadata['type'] = block[2]
# some facts from udevadm
p = udevadm_property(sysdir)
metadata['id_bus'] = p.get('ID_BUS', '')
device_facts[diskname] = metadata
return device_facts
def has_bluestore_label(device_path):
isBluestore = False
bluestoreDiskSignature = 'bluestore block device' # 22 bytes long
# throws OSError on failure
logger.info("opening device {} to check for BlueStore label".format(device_path))
try:
with open(device_path, "rb") as fd:
# read first 22 bytes looking for bluestore disk signature
signature = fd.read(22)
if signature.decode('ascii', 'replace') == bluestoreDiskSignature:
isBluestore = True
except IsADirectoryError:
logger.info(f'{device_path} is a directory, skipping.')
return isBluestore