#!/usr/bin/env python3
#
# infer_mv
# Copyright 2019-2021 by Larry Hastings
#
# A sample script demonstrating one possible use for "correlate".
# See usage for more information.
#
# Part of the "correlate" package:
# http://github.com/larryhastings/correlate

import correlate
from datetime import datetime
import os.path
from os.path import join, normpath
import shlex
import sys

def usage():
    print("Usage:")
    print("   ", os.path.basename(sys.argv[0]), "<source_dir>", "<file>", "[<file2> ...]")
    print()
    print("Produces a shell script on stdout that, if executed,")
    print("renames <file> based on correlating to filenames in <source_dir>.")
    print("(Why a shell script?  So you can edit the results before you commit.)")
    print()
    print("Supports recursively renaming directory trees.")
    print("Preserves extension from each file.")
    sys.exit(0)

try:
    source_dir = sys.argv[1]
    files = sys.argv[2:]
    assert len(files)
except AssertionError:
    usage()
except IndexError:
    usage()


c = correlate.Correlator()

def _find_filenames(filenames):
    output_filenames = []
    for filename in filenames:
        if os.path.isdir(filename):
            for dirpath, dirnames, filenames in os.walk(filename):
                if dirpath.startswith(("./", ".\\")):
                    dirpath = dirpath[2:]
                dirnames.sort()
                output_filenames.extend(_find_filenames((join(dirpath, filename) for filename in filenames)))
            continue
        output_filenames.append(filename)

    output_filenames = [normpath(f) for f in output_filenames]
    output_filenames.sort()
    return output_filenames

# sort the filenames before setting them,
# just to try and preserve some presentation for the user
def filenames_to_keys(filenames, dataset):
    filenames = _find_filenames(filenames)
    for filename in filenames:
        basename = filename.rpartition(".")[0]
        basename = basename.replace("/", " ")
        basename = basename.replace("\\", " ") # hello, windows users!
        keys = correlate.str_to_keys(basename)
        dataset.set_keys(keys, filename)

# old names
filenames_to_keys(files, c.dataset_a)

# new names
old_dir = os.getcwd()
os.chdir(source_dir)
filenames_to_keys((".",), c.dataset_b)
os.chdir(old_dir)

result = c.correlate()

old_dirs_to_rmdir = set()
new_dirs_created = set()

unmatched_source_files = [normpath(join(source_dir, x)) for x in result.unmatched_b]
unmatched_destination_files = result.unmatched_a
correct_names = []
renames = []

for match in result.matches:
    src = match.value_a
    dst = match.value_b

    # this lets you, for example, use a directory of MP3s to rename a directory of FLACs
    basename, _ = os.path.splitext(dst)
    _, ext = os.path.splitext(src)
    dst = basename + ext

    if src == dst:
        correct_names.append(dst)
        continue
    renames.append((src, dst, match.score))



def print_list(list, description):
    d = description
    if len(list) != 1:
        d = d.replace("file", "files")
        d = d.replace("has", "have")
    ending_punctuation = ":" if len(list) != 0 else "."
    print(f"# {len(list)} {d}{ending_punctuation}")

    for file in list:
        print(f"#   {shlex.quote(file)}")


print("#!/bin/sh")
print("#")
print("#", datetime.now().strftime("%Y/%m/%d %H:%M:%S"))
print("# produced using")

quoted = [shlex.quote(a) for a in sys.argv[1:]]
quoted.insert(0, "#  % infer_mv")
line = " ".join(quoted)
if len(line) > 78:
    line = line[:73] + "[...]"
print(line)

print()
print(f"# {len(result.matches)} matches")
print_list(unmatched_source_files, "unmatched source file")
print_list(unmatched_destination_files, "unmatched destination file we can't rename")
print_list(correct_names, "file that already has the correct name")
if not renames:
    print("\n# nothing to do!")

if not (unmatched_source_files or unmatched_destination_files):
    print("#\n# great! a perfect match.")

def interesting_dir(dir):
    return dir and (dir not in (".", ".."))

for src, dst, score in renames:
    old_dir = os.path.dirname(src)
    if interesting_dir(old_dir):
        old_dirs_to_rmdir.add(old_dir)

    new_dir = os.path.dirname(dst)
    if interesting_dir(new_dir) and new_dir not in new_dirs_created:
        new_dirs_created.add(new_dir)
        print("mkdir", shlex.quote(new_dir))

    print()
    print(f"# score {score}")
    print("mv", shlex.quote(src), shlex.quote(dst))

old_dirs_to_rmdir -= new_dirs_created
for old_dir in sorted(old_dirs_to_rmdir):
    print("rmdir", shlex.quote(old_dir))

