#!/usr/bin/env bash

# version 2023-03-21

# rgw-restore-bucket-index is an EXPERIMENTAL tool to use in case
# bucket index entries for objects in the bucket are somehow lost. It
# is expected to be needed and used rarely. A bucket name is provided
# and the data pool for that bucket is scanned for all head objects
# matching the bucket's marker. The rgw object name is then extracted
# from the rados object name, and `radosgw-admin bucket reindex ...`
# is used to add the bucket index entry.
#
# Because this script must process json objects, the `jq` tool must be
# installed on the system.
#
# Usage: $0 [--proceed] <bucket-name> [data-pool-name]
#
# This tool is designed to be interactive, allowing the user to
# examine the list of objects to be reindexed before
# proceeding. However, if the "--proceed" option is provided, the
# script will not prompt the user and simply proceed.

trap "clean ; exit 1" TERM
export TOP_PID=$$

# IMPORTANT: affects order produced by 'sort' and 'ceph-diff-sorted'
# relies on this ordering
export LC_ALL=C

export bkt_entry=/tmp/rgwrbi-bkt-entry.$$
export bkt_inst=/tmp/rgwrbi-bkt-inst.$$
export bkt_inst_new=/tmp/rgwrbi-bkt-inst-new.$$
export obj_list=/tmp/rgwrbi-object-list.$$
export zone_info=/tmp/rgwrbi-zone-info.$$
export clean_temps=1

# number of seconds for a bucket index pending op to be completed via
# dir_suggest mechanism
pending_op_secs=120

#
if which radosgw-admin > /dev/null ;then
  :
else
  echo 'Error: must have command `radosgw-admin` installed and on $PATH for operation.'
  exit 1
fi

# make sure jq is available
if which jq > /dev/null ;then
  :
else
  echo 'Error: must have command `jq` installed and on $PATH for json parsing.'
  exit 1
fi


clean() {
  if [ -n "$clean_temps" ] ;then
    rm -f $bkt_entry $bkt_inst $bkt_inst_new $obj_list $zone_info
  fi
}

super_exit() {
   kill -s TERM $TOP_PID
}

usage() {
  >&2 cat << EOF

Usage: $0 -b <bucketname> [-l <rados-ls-file>] [-p <pool>] [-y]

where:
  -b <bucketname>     Required - The name of the bucket to operate on
  -l <rados-ls-file>  Optional - A file with the output of 'rados ls -p <pool>'
  -p <pool>           Optional - If not provided, will be inferred from bucket and zone information.
  -y                  Optional - Proceed with correction without prompting the user
                      USE WITH CAUTION.

EOF
  super_exit
}

# strips the starting and ending double quotes from a string, so:
#   "dog"   -> dog
#   "dog    -> "dog
#   d"o"g   -> d"o"g
#   "do"g"  -> do"g
strip_quotes() {
  echo "$1" | sed 's/^"\(.*\)"$/\1/'
}

# Determines the name of the data pool. Expects the optional
# command-line argument to appear as $1 if there is one. The
# command-line has the highest priority, then the "explicit_placement"
# in the bucket instance data, and finally the "placement_rule" in the
# bucket instance data.
get_pool() {

  # explicit_placement
  expl_pool=$(strip_quotes $(jq '.data.bucket_info.bucket.explicit_placement.data_pool' $bkt_inst))
  if [ -n "$expl_pool" ] ;then
    echo "$expl_pool"
    exit 0
  fi

  # placement_rule
  plmt_rule=$(strip_quotes $(jq '.data.bucket_info.placement_rule' $bkt_inst))
  plmt_pool=$(echo "$plmt_rule" | awk -F / '{print $1}')
  plmt_class=$(echo "$plmt_rule" | awk -F / '{print $2}')
  if [ -z "$plmt_class" ] ;then
    plmt_class=STANDARD
  fi

  radosgw-admin zone get >$zone_info 2>/dev/null
  pool=$(strip_quotes $(jq ".placement_pools [] | select(.key | contains(\"${plmt_pool}\")) .val .storage_classes.${plmt_class}.data_pool" $zone_info))

  if [ -z "$pool" ] ;then
      echo ERROR: unable to determine pool.
      super_exit
  fi
  echo "$pool"
}

bucket=""
pool=""
lsoutput=""
while getopts ":b:l:p:y" o; do
  case "${o}" in
    b)
      bucket="${OPTARG}"
    ;;
    l)
      if [ -e "${OPTARG}" ]; then
        lsoutput="${OPTARG}"
      else
        echo 
        echo "ERROR: Provided 'rados ls' output file name does not exist. ${OPTARG}"
        exit 1
      fi
    ;;
    p)
      pool="${OPTARG}"
    ;;
    y)
      echo "NOTICE: This tool is currently considered EXPERIMENTAL."
      proceed=1
    ;;
    *)
      echo
      echo "ERROR: Unrecognized argument: ${o}"
      usage
    ;;
  esac
done
shift $((OPTIND-1))

if [ -z "$bucket" ]; then
  echo
  echo "ERROR: Bucket option ( -b ) is required."
  usage
fi

# read bucket entry metadata
radosgw-admin metadata get bucket:$bucket >$bkt_entry 2>/dev/null
marker=$(strip_quotes $(jq ".data.bucket.marker" $bkt_entry))
bucket_id=$(strip_quotes $(jq ".data.bucket.bucket_id" $bkt_entry))
if [ -z "$marker" -o -z "$bucket_id" ] ;then
    echo "ERROR: unable to read entry-point metadata for bucket \"$bucket\"."
    clean
    exit 1
fi

echo marker is $marker
echo bucket_id is $bucket_id

# read bucket instance metadata
radosgw-admin metadata get bucket.instance:${bucket}:$bucket_id >$bkt_inst 2>/dev/null

# handle versioned buckets
bkt_flags=$(jq ".data.bucket_info.flags" $bkt_inst)
if [ -z "$bkt_flags" ] ;then
    echo "ERROR: unable to read instance metadata for bucket \"$bucket\"."
    exit 1
fi

# mask bit indicating it's a versioned bucket
is_versioned=$(( $bkt_flags & 2))
if [ "$is_versioned" -ne 0 ] ;then
    echo "Error: this bucket appears to be versioned, and this tool cannot work with versioned buckets."
    clean
    exit 1
fi

# examine number of bucket index shards
num_shards=$(jq ".data.bucket_info.num_shards" $bkt_inst)
echo number of bucket index shards is $num_shards

# determine data pool
if [ -z "$pool" ]; then
  pool=$(get_pool)
fi
echo data pool is $pool

# search the data pool for all of the head objects that begin with the
# marker that are not in namespaces (indicated by an extra underscore)
# and then strip away all but the rgw object name
if [ -z "$lsoutput" ]; then
  ( rados -p $pool ls | grep "^${marker}_[^_]" | sed "s/^${marker}_\(.*\)/\1/" >$obj_list ) 2>/dev/null
else
  ( grep "^${marker}_[^_]" "${lsoutput}" | sed "s/^${marker}_\(.*\)/\1/" >$obj_list ) 2>/dev/null
fi

# handle the case where the resulting object list file is empty
if [ -s $obj_list ] ;then
    :
else
    echo "NOTICE: No head objects for bucket \"$bucket\" were found in pool \"$pool\", so nothing was recovered."
    clean
    exit 0
fi

if [ -z "$proceed" ] ;then
    # warn user and get permission to proceed
    echo "NOTICE: This tool is currently considered EXPERIMENTAL."
    echo "The list of objects that we will attempt to restore can be found in \"$obj_list\"."
    echo "Please review the object names in that file (either below or in another window/terminal) before proceeding."
    while true ; do
	read -p "Type \"proceed!\" to proceed, \"view\" to view object list, or \"q\" to quit: " action
	if [ "$action" == "q" ] ;then
	    echo "Exiting..."
	    clean
	    exit 0
	elif [ "$action" == "view" ] ;then
	    echo "Viewing..."
	    less $obj_list
	elif [ "$action" == "proceed!" ] ;then
	    echo "Proceeding..."
	    break
	else
	    echo "Error: response \"$action\" is not understood."
	fi
    done
fi

# execute object rewrite on all of the head objects
radosgw-admin object reindex --bucket=$bucket --objects-file=$obj_list 2>/dev/null
reindex_done=$(date +%s)

# note: large is 2^30
export large=1073741824

listcmd="radosgw-admin bucket list --bucket=$bucket --allow-unordered --max-entries=$large"

if [ -n "$proceed" ] ;then
    sleep $pending_op_secs
    $listcmd >/dev/null 2>/dev/null
else
    echo "NOTICE: Bucket stats are currently incorrect. They can be restored with the following command after 2 minutes:"
    echo "    $listcmd"

    while true ; do
	read -p "Would you like to take the time to recalculate bucket stats now? [yes/no] " action
	if [ "$action" == "no" ] ;then
	    break
	elif [ "$action" == "yes" ] ;then
	    # make sure at least $pending_op_secs since reindex completed
	    now=$(date +%s)
	    sleep_time=$(expr $pending_op_secs - $now + $reindex_done)
	    if [ "$sleep_time" -gt 0 ] ;then
		sleep $sleep_time
	    fi

	    $listcmd >/dev/null 2>/dev/null
	    break
	else
	    echo "Error: response \"$action\" is not understood."
	fi
    done
fi

clean
echo Done
