#!/usr/bin/python3
#
# Copyright 2014-2017 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#
# Refer to the README and COPYING files for full details of the license
#

"""
Usage: fc-scan [-v|-h] [--no-devices]

Perform SCSI scan on Fibre Channel scsi_hosts and devices, adding new LUNs and
updating sizes of existing devices. This procedure will not remove existing
LUNs. Must run as root.

Options:
  -v            enable verbose logging
  -h            display this help and exit
  --no-devices  do not scan devices

Exit codes:
  0         scanned fc_hosts and devices successfully
  1         scanning some hosts or devices failed
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import glob
import logging
import os
import random
import sys

from vdsm import utils
from vdsm.common import concurrent

log = logging.getLogger("fc-scan")

# Testing shows that 64 workers is optimal value. Compared with older
# fc-scan using thread per FC port, this is about 10x faster on an idle
# system, and about 19x faster on a system with high I/O load.
#
# We can use a smaller number (e.g. 32) for most of the benefit, but
# bigger thread pool is more resilient in case scanning some devices is
# very slow.
#
# workers  old/idle  old/load  new/idle    new/load
# -------------------------------------------------
#       1         -         -    19.926      41.250
#       2    10.971    26.097    10.796      21.453
#       4         -         -     5.913      11.538
#       8         -         -     3.467       6.168
#      16         -         -     2.134       3.387
#      32         -         -     1.434       2.087
#      64         -         -     1.081       1.356
#     128         -         -     0.855       0.965
#     256         -         -     0.819       0.917
#     512         -         -     0.880       0.963
#
# For more info see:
# https://bugzilla.redhat.com/1598266#c63
#
# However on older systems with lower number of cpus, fc-scan seem to
# take too many resources. Lets limit the number of workers to 1 worker
# per cpu.
MAX_WORKERS = min(64, os.sysconf('SC_NPROCESSORS_CONF'))


class Error(Exception):
    """
    Scanning hosts or devices failed.
    """


def main(args):
    if '-h' in args:
        print(__doc__)
        return

    should_rescan_devices = "--no-devices" not in args

    logging.basicConfig(
        level=logging.DEBUG if '-v' in args else logging.INFO,
        format="%(name)s: %(message)s")

    hosts = find_fc_hosts()
    if not hosts:
        log.debug("No fc_host found")
        return

    scan_hosts(hosts)

    if should_rescan_devices:
        rescan_devices()


# Scanning hosts.

def find_fc_hosts():
    return [os.path.basename(path)
            for path in glob.glob("/sys/class/fc_host/host*")]


def scan_hosts(hosts):
    results = concurrent.tmap(
        scan_scsi_host,
        hosts,
        max_workers=MAX_WORKERS,
        name="scan")

    if not all(res.value for res in results):
        raise Error("Scanning hosts failed")


def scan_scsi_host(host):
    try:
        path = "/sys/class/scsi_host/{}/scan".format(host)
        log.debug("Scanning %s", host)
        with utils.stopwatch("Scanned %s" % host, log=log):
            fd = os.open(path, os.O_WRONLY)
            try:
                os.write(fd, b"- - -")
            finally:
                os.close(fd)
        return True
    except OSError as e:
        log.error("Scanning %s failed: %s", host, e)
        return False
    except Exception:
        log.exception("Scanning %s failed", host)
        return False


# Rescanning fc devices.

def rescan_devices():
    # This takes about 0.4 seconds on a system with 1300 devices.
    log.debug("Looking up devices")
    with utils.stopwatch("Looking up devices", log=log):
        devices = list(find_fc_devices())

    # Randomizing introduces a small delay for collecting all devices,
    # but it distributes scans evenly and may improve concurrency during
    # scanning, reducing the chance that all workers are blocked on
    # inaccessible or slow devices.
    random.shuffle(devices)

    results = concurrent.tmap(
        rescan_scsi_device,
        devices,
        max_workers=MAX_WORKERS,
        name="rescan")

    if not all(res.value for res in results):
        raise Error("Rescanning devices failed")


def find_fc_devices():
    pattern = "/sys/class/fc_host/*/device/rport-*/target*/*/rescan"
    for p in glob.iglob(pattern):
        yield os.path.realpath(p)


def rescan_scsi_device(path):
    try:
        log.debug("Rescanning device %s", path)
        with utils.stopwatch("Rescanned device %s" % path, log=log):
            fd = os.open(path, os.O_WRONLY)
            try:
                os.write(fd, b"1")
            finally:
                os.close(fd)
        return True
    except OSError as e:
        log.error("Rescanning %s failed: %s", path, e)
        return False
    except Exception:
        log.exception("Rescanning %s failed", path)
        return False


if __name__ == '__main__':
    try:
        main(sys.argv[1:])
    except Error as e:
        log.error("%s", e)
        sys.exit(1)
