#!/usr/bin/env python
# PYTHON_ARGCOMPLETE_OK
"""
CLI definition
"""
import scriptconfig as scfg # NOQA
import ubelt as ub
from simple_dvc.api import SimpleDVC
[docs]
class SimpleDVC_CLI(scfg.ModalCLI):
"""
A DVC CLI That uses our simplified (and more permissive) interface.
The main advantage is that you can run these commands outside a DVC repo as
long as you point to a valid in-repo path.
"""
[docs]
class Add(scfg.DataConfig):
"""
Add data to the DVC repo.
"""
__command__ = 'add'
paths = scfg.Value([], nargs='+', position=1, help='Input files / directories to add')
verbose = scfg.Value(0, short_alias=['v'], isflag=True, help='verbosity')
[docs]
@classmethod
def main(cls, cmdline=1, **kwargs):
config = cls.cli(cmdline=cmdline, data=kwargs, strict=True)
dvc = SimpleDVC()
dvc.add(config.paths, verbose=config.verbose)
[docs]
class Pull(scfg.DataConfig):
"""
Pull data from a DVC remote.
"""
__command__ = 'pull'
paths = scfg.Value([], nargs='+', position=1, help='Data to attempt to pull')
verbose = scfg.Value(0, short_alias=['v'], isflag=True, help='verbosity')
jobs = scfg.Value('default', short_alias=['-j'], help='Number of jobs to run simultaneously. The default value is 4 * cpu_count()')
remote = scfg.Value(None, short_alias=['r'], help='Remote storage to pull from')
force = scfg.Value(False, isflag=True, short_alias=['f'], help='Do not prompt when removing working directory files.')
recursive = scfg.Value(False, isflag=True, short_alias=['R'], help='Pull cache for subdirectories of the specified directory')
allow_missing = scfg.Value(False, isflag=True, help='Ignore errors if some of the files or directories are missing')
[docs]
@classmethod
def main(cls, cmdline=1, **kwargs):
config = cls.cli(cmdline=cmdline, data=kwargs, strict=True)
dvc = SimpleDVC()
pull_kwargs = ub.compatible(config, dvc.pull)
dvc.pull(config.paths, **pull_kwargs)
[docs]
class Push(scfg.DataConfig):
"""
Push data to a DVC remote.
"""
__command__ = 'push'
paths = scfg.Value([], nargs='+', position=1, help='Data to attempt to push')
verbose = scfg.Value(0, short_alias=['v'], isflag=True, help='verbosity')
jobs = scfg.Value('default', short_alias=['-j'], help='Number of jobs to run simultaneously. The default value is 4 * cpu_count()')
remote = scfg.Value(None, short_alias=['r'], help='Remote storage to push from')
# force = scfg.Value(False, isflag=True, short_alias=['f'], help='Do not prompt when removing working directory files.')
recursive = scfg.Value(False, isflag=True, short_alias=['R'], help='Push cache for subdirectories of the specified directory')
# allow_missing = scfg.Value(False, isflag=True, help='Ignore errors if some of the files or directories are missing')
[docs]
@classmethod
def main(cls, cmdline=1, **kwargs):
config = cls.cli(cmdline=cmdline, data=kwargs, strict=True)
dvc = SimpleDVC()
push_kwargs = ub.compatible(config, dvc.push)
dvc.push(config.paths, **push_kwargs)
[docs]
class Request(scfg.DataConfig):
"""
Like pull, but only tries to pull if the requested file doesn't exist.
"""
__command__ = 'request'
paths = scfg.Value([], nargs='+', position=1, help='Data to attempt to pull. Individual args can be a YAML list.')
remote = scfg.Value(None, short_alias=['r'], help='remote to pull from if needed')
verbose = scfg.Value(0, short_alias=['v'], isflag=True, help='verbosity')
pull = scfg.Value(0, isflag=True, help='if True, pull instead of request.')
[docs]
@classmethod
def main(cls, cmdline=1, **kwargs):
config = cls.cli(cmdline=cmdline, data=kwargs, strict=True)
import rich
if config.verbose:
rich.print(ub.urepr(config, nl=1))
dvc = SimpleDVC()
paths = config.paths
from kwutil.util_path import coerce_patterned_paths
resolved_paths = coerce_patterned_paths(paths, globfallback=True)
if config.verbose:
print('resolved_paths = {}'.format(ub.urepr(resolved_paths, nl=1)))
dvc.request(resolved_paths, verbose=config.verbose, pull=config.pull)
[docs]
class CacheDir(scfg.DataConfig):
"""
Print the cache directory
"""
__command__ = 'cache_dir'
dvc_root = scfg.Value('.', position=1, help='get the cache path for this DVC repo')
[docs]
@classmethod
def main(cls, cmdline=1, **kwargs):
config = cls.cli(cmdline=cmdline, data=kwargs, strict=True)
dvc = SimpleDVC(dvc_root=config.dvc_root)
print(dvc.cache_dir)
[docs]
class ListSidecars(scfg.DataConfig):
"""
List all sidecars associated with a path.
"""
__command__ = 'sidecars'
path = scfg.Value('.', position=1, help='sidecar file')
[docs]
@classmethod
def main(cls, cmdline=1, **kwargs):
config = cls.cli(cmdline=cmdline, data=kwargs, strict=True)
dvc = SimpleDVC.coerce(config.path)
for fpath in dvc.sidecar_paths(config.path):
print(fpath)
[docs]
class ValidateSidecar(scfg.DataConfig):
"""
Validate that everything marked in a sidecar file looks ok.
"""
__command__ = 'validate_sidecar'
path = scfg.Value(None, position=1, help='path associated with sidecars to validate')
check_hash = scfg.Value(False, isflag=True, help='if true also check hashes')
[docs]
@classmethod
def main(cls, cmdline=1, **kwargs):
import rich
config = cls.cli(cmdline=cmdline, data=kwargs, strict=True)
dvc = SimpleDVC.coerce(config.path)
from kwutil import util_progress
pman0 = util_progress.ProgressManager()
with pman0:
sidecar_fpaths = list(pman0.progiter(dvc.sidecar_paths(config.path), desc='find sidecars'))
def process_item(item):
if config.check_hash:
if item['cache_exists']:
got_md5 = ub.hash_file(item['cache_path'], hasher='md5')
item['checksum_ok'] = item['md5'].startswith(got_md5)
else:
item['checksum_ok'] = None
pman1 = util_progress.ProgressManager('progiter', verbose=3)
for sidecar_fpath in pman1.progiter(sidecar_fpaths, desc='read sidecar'):
print(f'Validate: sidecar_fpath={sidecar_fpath}')
item_gen = dvc._sidecar_references(sidecar_fpath)
first_item = None
items = []
item = next(item_gen)
process_item(item)
total = None
if 'nfiles' in item:
total = item['nfiles']
items.append(item)
pman2 = util_progress.ProgressManager()
with pman2:
for item in pman2.progiter(item_gen, total=total):
process_item(item)
items.append(item)
first_item = items[0]
if len(items) > 1:
first_item['n_file_co_exists'] = sum(f['co_exists'] for f in items[1:])
first_item['n_file_cache_exists'] = sum(f['cache_exists'] for f in items[1:])
if config.check_hash:
# first_item['n_file_checksum_ok'] = sum(flag for f in items[1:] if (flag := f['checksum_ok']) is not None)
first_item['n_file_checksum_ok'] = sum(f['checksum_ok'] for f in items[1:] if f['checksum_ok'] is not None)
rich.print(ub.urepr(first_item, nl=1))
# registery = DVC_RegisteryCLI
from simple_dvc.registery import DVC_RegisteryCLI as registery
from simple_dvc.cache_validate import DvcCacheValidateCLI
from simple_dvc.discover_ssh_remote import DiscoverSshRemoteCLI
# Add a subset of the registry to the top level?
# Find = registery.Find
main = SimpleDVC_CLI.main
if __name__ == '__main__':
"""
CommandLine:
python ~/code/simple_dvc/simple_dvc/main.py
"""
main()