X-Git-Url: https://gerrit.o-ran-sc.org/r/gitweb?a=blobdiff_plain;f=meta-starlingx%2Fmeta-stx-virt%2Frecipes-extended%2Fceph%2Ffiles%2Fceph-manage-journal.py;fp=meta-starlingx%2Fmeta-stx-virt%2Frecipes-extended%2Fceph%2Ffiles%2Fceph-manage-journal.py;h=f91cbc1667c9d45044bb3cb862f63f1b1241e4db;hb=e0634c6eaf2fe2641a0fb90e84a5defb880b1335;hp=0000000000000000000000000000000000000000;hpb=210d0f78485e760dffcdd3f630f59cec797f3f11;p=pti%2Frtp.git diff --git a/meta-starlingx/meta-stx-virt/recipes-extended/ceph/files/ceph-manage-journal.py b/meta-starlingx/meta-stx-virt/recipes-extended/ceph/files/ceph-manage-journal.py new file mode 100644 index 0000000..f91cbc1 --- /dev/null +++ b/meta-starlingx/meta-stx-virt/recipes-extended/ceph/files/ceph-manage-journal.py @@ -0,0 +1,334 @@ +#!/usr/bin/python +# +# Copyright (c) 2019 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +import ast +import os +import os.path +import re +import subprocess +import sys + +DEVICE_NAME_NVME = "nvme" + +######### +# Utils # +######### + + +def command(arguments, **kwargs): + """Execute e command and capture stdout, stderr & return code""" + process = subprocess.Popen( + arguments, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + **kwargs) + out, err = process.communicate() + return out, err, process.returncode + + +def get_input(arg, valid_keys): + """Convert the input to a dict and perform basic validation""" + json_string = arg.replace("\\n", "\n") + try: + input_dict = ast.literal_eval(json_string) + if not all(k in input_dict for k in valid_keys): + return None + except Exception: + return None + + return input_dict + + +def get_partition_uuid(dev): + output, _, _ = command(['blkid', dev]) + try: + return re.search('PARTUUID=\"(.+?)\"', output).group(1) + except AttributeError: + return None + + +def device_path_to_device_node(device_path): + try: + output, _, _ = command(["udevadm", "settle", "-E", device_path]) + out, err, retcode = command(["readlink", "-f", device_path]) + out = out.rstrip() + except Exception as e: + return None + + return out + + +########################################### +# Manage Journal Disk Partitioning Scheme # +########################################### + +DISK_BY_PARTUUID = "/dev/disk/by-partuuid/" +JOURNAL_UUID = '45b0969e-9b03-4f30-b4c6-b4b80ceff106' # Type of a journal partition + + +def is_partitioning_correct(disk_path, partition_sizes): + """Validate the existence and size of journal partitions""" + + # Obtain the device node from the device path. + disk_node = device_path_to_device_node(disk_path) + + # Check that partition table format is GPT + output, _, _ = command(["udevadm", "settle", "-E", disk_node]) + output, _, _ = command(["parted", "-s", disk_node, "print"]) + if not re.search('Partition Table: gpt', output): + print("Format of disk node %s is not GPT, zapping disk" % disk_node) + return False + + # Check each partition size + partition_index = 1 + for size in partition_sizes: + # Check that each partition size matches the one in input + if DEVICE_NAME_NVME in disk_node: + partition_node = '{}p{}'.format(disk_node, str(partition_index)) + else: + partition_node = '{}{}'.format(disk_node, str(partition_index)) + + output, _, _ = command(["udevadm", "settle", "-E", partition_node]) + cmd = ["parted", "-s", partition_node, "unit", "MiB", "print"] + output, _, _ = command(cmd) + + regex = ("^Disk " + str(partition_node) + ":\\s*" + + str(size) + "[\\.0]*MiB") + if not re.search(regex, output, re.MULTILINE): + print("Journal partition %(node)s size is not %(size)s, " + "zapping disk" % {"node": partition_node, "size": size}) + return False + + partition_index += 1 + + output, _, _ = command(["udevadm", "settle", "-t", "10"]) + return True + + +def create_partitions(disk_path, partition_sizes): + """Recreate partitions""" + + # Obtain the device node from the device path. + disk_node = device_path_to_device_node(disk_path) + + # Issue: After creating a new partition table on a device, Udev does not + # always remove old symlinks (i.e. to previous partitions on that device). + # Also, even if links are erased before zapping the disk, some of them will + # be recreated even though there is no partition to back them! + # Therefore, we have to remove the links AFTER we erase the partition table + # Issue: DISK_BY_PARTUUID directory is not present at all if there are no + # GPT partitions on the storage node so nothing to remove in this case + links = [] + if os.path.isdir(DISK_BY_PARTUUID): + links = [os.path.join(DISK_BY_PARTUUID, l) for l in os.listdir(DISK_BY_PARTUUID) + if os.path.islink(os.path.join(DISK_BY_PARTUUID, l))] + + # Erase all partitions on current node by creating a new GPT table + _, err, ret = command(["parted", "-s", disk_node, "mktable", "gpt"]) + if ret: + print("Error erasing partition table of %(node)s\n" + "Return code: %(ret)s reason: %(reason)s" % + {"node": disk_node, "ret": ret, "reason": err}) + exit(1) + + # Erase old symlinks + for l in links: + if disk_node in os.path.realpath(l): + os.remove(l) + + # Create partitions in order + used_space_mib = 1 # leave 1 MB at the beginning of the disk + num = 1 + for size in partition_sizes: + cmd = ['parted', '-s', disk_node, 'unit', 'mib', + 'mkpart', 'primary', + str(used_space_mib), str(used_space_mib + size)] + _, err, ret = command(cmd) + parms = {"disk_node": disk_node, + "start": used_space_mib, + "end": used_space_mib + size, + "reason": err} + print("Created partition from start=%(start)s MiB to end=%(end)s MiB" + " on %(disk_node)s" % parms) + if ret: + print("Failed to create partition with " + "start=%(start)s, end=%(end)s " + "on %(disk_node)s reason: %(reason)s" % parms) + exit(1) + # Set partition type to ceph journal + # noncritical operation, it makes 'ceph-disk list' output correct info + cmd = ['sgdisk', + '--change-name={num}:ceph journal'.format(num=num), + '--typecode={num}:{uuid}'.format( + num=num, + uuid=JOURNAL_UUID, + ), + disk_node] + _, err, ret = command(cmd) + if ret: + print("WARNINIG: Failed to set partition name and typecode") + used_space_mib += size + num += 1 + + +########################### +# Manage Journal Location # +########################### + +OSD_PATH = "/var/lib/ceph/osd/" + + +def mount_data_partition(data_path, osdid): + """Mount an OSD data partition and return the mounted path""" + + # Obtain the device node from the device path. + data_node = device_path_to_device_node(data_path) + + mount_path = OSD_PATH + "ceph-" + str(osdid) + output, _, _ = command(['mount']) + regex = "^" + data_node + ".*" + mount_path + if not re.search(regex, output, re.MULTILINE): + cmd = ['mount', '-t', 'xfs', data_node, mount_path] + _, _, ret = command(cmd) + params = {"node": data_node, "path": mount_path} + if ret: + print("Failed to mount %(node)s to %(path), aborting" % params) + exit(1) + else: + print("Mounted %(node)s to %(path)s" % params) + return mount_path + + +def is_location_correct(path, journal_path, osdid): + """Check if location points to the correct device""" + + # Obtain the device node from the device path. + journal_node = device_path_to_device_node(journal_path) + + cur_node = os.path.realpath(path + "/journal") + if cur_node == journal_node: + return True + else: + return False + + +def fix_location(mount_point, journal_path, osdid): + """Move the journal to the new partition""" + + # Obtain the device node from the device path. + journal_node = device_path_to_device_node(journal_path) + + # Fix symlink + path = mount_point + "/journal" # 'journal' symlink path used by ceph-osd + journal_uuid = get_partition_uuid(journal_node) + new_target = DISK_BY_PARTUUID + journal_uuid + params = {"path": path, "target": new_target} + try: + if os.path.lexists(path): + os.unlink(path) # delete the old symlink + os.symlink(new_target, path) + print("Symlink created: %(path)s -> %(target)s" % params) + except: + print("Failed to create symlink: %(path)s -> %(target)s" % params) + exit(1) + # Fix journal_uuid + path = mount_point + "/journal_uuid" + try: + with open(path, 'w') as f: + f.write(journal_uuid) + except Exception as ex: + # The operation is noncritical, it only makes 'ceph-disk list' + # display complete output. We log and continue. + params = {"path": path, "uuid": journal_uuid} + print("WARNING: Failed to set uuid of %(path)s to %(uuid)s" % params) + + # Clean the journal partition + # even if erasing the partition table, if another journal was present here + # it's going to be reused. Journals are always bigger than 100MB. + command(['dd', 'if=/dev/zero', 'of=%s' % journal_node, + 'bs=1M', 'count=100']) + + # Format the journal + cmd = ['/usr/bin/ceph-osd', '-i', str(osdid), + '--pid-file', '/var/run/ceph/osd.%s.pid' % osdid, + '-c', '/etc/ceph/ceph.conf', + '--cluster', 'ceph', + '--mkjournal'] + out, err, ret = command(cmd) + params = {"journal_node": journal_node, + "osdid": osdid, + "ret": ret, + "reason": err} + if not ret: + print("Prepared new journal partition: %(journal_node)s " + "for osd id: %(osdid)s" % params) + else: + print("Error initializing journal node: " + "%(journal_node)s for osd id: %(osdid)s " + "ceph-osd return code: %(ret)s reason: %(reason)s" % params) + + +######## +# Main # +######## + +def main(argv): + # parse and validate arguments + err = False + partitions = None + location = None + if len(argv) != 2: + err = True + elif argv[0] == "partitions": + valid_keys = ['disk_path', 'journals'] + partitions = get_input(argv[1], valid_keys) + if not partitions: + err = True + elif not isinstance(partitions['journals'], list): + err = True + elif argv[0] == "location": + valid_keys = ['data_path', 'journal_path', 'osdid'] + location = get_input(argv[1], valid_keys) + if not location: + err = True + elif not isinstance(location['osdid'], int): + err = True + else: + err = True + if err: + print("Command intended for internal use only") + exit(-1) + + if partitions: + # Recreate partitions only if the existing ones don't match input + if not is_partitioning_correct(partitions['disk_path'], + partitions['journals']): + create_partitions(partitions['disk_path'], partitions['journals']) + else: + print("Partition table for %s is correct, " + "no need to repartition" % + device_path_to_device_node(partitions['disk_path'])) + elif location: + # we need to have the data partition mounted & we can let it mounted + mount_point = mount_data_partition(location['data_path'], + location['osdid']) + # Update journal location only if link point to another partition + if not is_location_correct(mount_point, + location['journal_path'], + location['osdid']): + print("Fixing journal location for " + "OSD id: %(id)s" % {"node": location['data_path'], + "id": location['osdid']}) + fix_location(mount_point, + location['journal_path'], + location['osdid']) + else: + print("Journal location for %s is correct," + "no need to change it" % location['data_path']) + + +main(sys.argv[1:])