Source code for simple_dvc.demo



[docs] def init_randomized_dvc_repo(demo_root, with_git=False, reset=False): """ Builds a medium complexity dvc repo, todo: implement some tests """ import ubelt as ub from simple_dvc import SimpleDVC import random rng = random.Random(10998676167967) precon_dpath = ub.Path.appdir('simpledvc', 'precon') config = { 'with_git': with_git, '__internal_version__': 4, } hashid = ub.hash_data(config, base='hex')[0:8] precon_dvc_root = precon_dpath / f'demo_{hashid}' precon_stamp = ub.CacheStamp(f'precon_demo_{hashid}', dpath=precon_dpath, depends=config) if reset: precon_stamp.clear() precon_dvc_root.delete() if precon_stamp.expired() or not precon_dvc_root.exists(): # Build in a staging area first dvc_root = precon_dvc_root dvc_root.delete() if with_git: dvc_root.ensuredir() ub.cmd('git init', cwd=dvc_root, verbose=2) ub.cmd('git config --local user.email "sdvc-tester@kitware.com"', cwd=dvc_root, verbose=2) ub.cmd('git config --local user.name "Simple DVC Tester"', cwd=dvc_root, verbose=2) ub.cmd('git branch -m main', cwd=dvc_root, verbose=2) SimpleDVC.init(dvc_root, no_scm=not with_git) dvc = SimpleDVC.coerce(dvc_root) if with_git: ub.cmd('dvc config core.autostage true', cwd=dvc.dpath, verbose=3) ub.cmd('dvc config cache.type symlink,reflink,hardlink,copy', cwd=dvc.dpath, verbose=3) ub.cmd('dvc config cache.protected true', cwd=dvc.dpath, verbose=2) ub.cmd('dvc config core.analytics false', cwd=dvc.dpath, verbose=2) ub.cmd('dvc config core.check_update false', cwd=dvc.dpath, verbose=2) ub.cmd('dvc config core.check_update false', cwd=dvc.dpath, verbose=2) # Build basic data print('Writing demo repo structure') (dvc_root / 'test-set1').ensuredir() assets_dpath = (dvc_root / 'test-set1/assets').ensuredir() for idx in range(1, 21): fpath = assets_dpath / f'asset_{idx:03d}.data' fpath.write_text(str(idx) * 100) manifest_fpath = (dvc_root / 'test-set1/manifest.txt') manifest_fpath.write_text('pretend-data') root_fpath = dvc_root / 'root_file' root_fpath.write_text('----' * 100) root_dpath = dvc_root / 'root_dir' node_paths = random_nested_paths(rng=rng) for node_path in node_paths: rel_fpath = ub.Path(*[f'dir_{n}' for n in node_path[0:-1]]) / ('file_' + str(node_path[-1]) + '.data') fpath = root_dpath / rel_fpath fpath.parent.ensuredir() fpath.write_text(str(node_path)) print('Finished writing demo repo structure') print('Adding demo repo structure') dvc.add(root_dpath) dvc.add(root_fpath) dvc.add(manifest_fpath) dvc.add(assets_dpath) if with_git: ub.cmd('cat .dvc/config', cwd=dvc.dpath, verbose=3) ub.cmd('git add .dvc/config', cwd=dvc.dpath, verbose=3) ub.cmd('git status', cwd=dvc.dpath, verbose=3) ub.cmd('git commit -am "initial commit"', cwd=dvc.dpath, verbose=3) ub.cmd('git status', cwd=dvc.dpath, verbose=3) precon_stamp.renew() # import xdev # xdev.tree_repr(dvc_root) demo_root.delete() demo_root.parent.ensuredir() precon_dvc_root.copy(demo_root) return demo_root
[docs] def random_nested_paths(num=30, rng=None): """ Use networkx to make a random complex directory structure. Args: num (int): number of nodes in the random file system rng (None | int): random state / seed Returns: List[List[int]]: A list of "paths", which are represented as list of "names". CommandLine: xdoctest -m simple_dvc.demo random_nested_paths Example: >>> from simple_dvc.demo import * # NOQA >>> import ubelt as ub >>> node_paths = random_nested_paths(num=10, rng=123) >>> print(f'node_paths = {ub.urepr(node_paths, nl=1)}') node_paths = [ [2, 7, 0, 1], [2, 7, 0, 6, 8, 3], [2, 4], [2, 5], [2, 7, 0, 6, 8, 9], ] """ import networkx as nx import ubelt as ub graph = nx.erdos_renyi_graph(num, p=0.2, directed=True, seed=rng) WORKAROUND_NX_3_2_REGRESSION = 1 if WORKAROUND_NX_3_2_REGRESSION: for u, v, d in graph.edges(data=True): d['weight'] = 1.0 try: tree = nx.minimum_spanning_arborescence(graph) except Exception: # Ensure a arboresence will exist sccs = list(nx.strongly_connected_components(graph)) chosen = [min(scc) for scc in sccs] for u, v in ub.iter_window(chosen, 2): graph.add_edge(u, v, weight=1) tree = nx.minimum_spanning_arborescence(graph) # nx.write_network_text(tree) sources = [n for n in tree.nodes if not tree.pred[n]] sinks = [n for n in tree.nodes if not tree.succ[n]] node_paths = [] for t in sinks: for s in sources: paths = list(nx.all_simple_edge_paths(tree, s, t)) if paths: node_path = [u for (u, v) in paths[0]] + [t] node_paths.append(node_path) return node_paths
[docs] def simple_demo_repo(dvc_root): """ Build a simple repo using only standard dvc commands for upstream MWEs """ import ubelt as ub # Build in a staging area first assert not dvc_root.exists(), 'directory must not exist yet' dvc_root = dvc_root dvc_root.ensuredir() def cmd(command): return ub.cmd(command, cwd=dvc_root, verbose=2, system=True) cmd('git init') cmd('dvc init') cmd('dvc config core.autostage true') cmd('dvc config cache.type symlink,reflink,hardlink,copy') cmd('dvc config cache.protected true') cmd('dvc config core.analytics false') cmd('dvc config core.check_update false') cmd('dvc config core.check_update false') # Build basic data (dvc_root / 'test-set1').ensuredir() assets_dpath = (dvc_root / 'test-set1/assets').ensuredir() for idx in range(1, 21): fpath = assets_dpath / f'asset_{idx:03d}.data' fpath.write_text(str(idx) * 100) manifest_fpath = (dvc_root / 'test-set1/manifest.txt') manifest_fpath.write_text('pretend-data') root_fpath = dvc_root / 'root_file' root_fpath.write_text('----' * 100) cmd(f'dvc add {root_fpath}') cmd(f'dvc add {manifest_fpath}') cmd(f'dvc add {assets_dpath}') cmd('git commit -am "initial commit"')
[docs] def mwe(): import ubelt as ub # Build a simple fresh dvc repo dvc_root = ub.Path.appdir('simpledvc', 'simple_demo') dvc_root.delete() simple_demo_repo(dvc_root) _ = ub.cmd('dvc cache migrate -vvv', cwd=dvc_root, verbose=3, system=True)