Преглед изворни кода

Add option to use URI list file for migration.

Stefano Cossu пре 7 година
родитељ
комит
8e3b811f80
3 измењених фајлова са 38 додато и 27 уклоњено
  1. 9 10
      lakesuperior/api/admin.py
  2. 19 13
      lakesuperior/migrator.py
  3. 10 4
      lsup-admin

+ 9 - 10
lakesuperior/api/admin.py

@@ -28,18 +28,17 @@ def stats():
     return repo_stats
 
 
-def migrate(src, dest, start=('/',), **kwargs):
+def migrate(src, dest, start_pts=None, list_file=None, **kwargs):
     """
     Migrate an LDP repository to a new LAKEsuperior instance.
 
     See :py:meth:`Migrator.__init__`.
     """
-    start_pts = (
-            (start,)
-            if not isinstance(start, list) and not isinstance(start, tuple)
-            else start)
-
-    return Migrator(src, dest, start_pts, **kwargs).migrate()
-
-
-
+    if start_pts:
+        if not isinstance(
+                start_pts, list) and not isinstance(start_pts, tuple):
+            start_pts = (start_pts,)
+    elif not list_file:
+        start_pts = ('/',)
+
+    return Migrator(src, dest, **kwargs).migrate(start_pts, list_file)

+ 19 - 13
lakesuperior/migrator.py

@@ -71,8 +71,7 @@ class Migrator:
 
 
     def __init__(
-            self, src, dest, start_pts, zero_binaries=False,
-            compact_uris=False):
+            self, src, dest, zero_binaries=False, compact_uris=False):
         """
         Set up base paths and clean up existing directories.
 
@@ -84,9 +83,6 @@ class Migrator:
         it must be a writable directory. It will be deleted and recreated. If
         it does not exist, it will be created along with its parents if
         missing.
-        :param start_pts: (tuple|list) List of starting points to retrieve
-        resources from. It would typically be the repository root in case of a
-        full dump or one or more resources in the repository for a partial one.
         :param binary_handling: (string) One of ``include``, ``truncate`` or
         ``split``.
         :param compact_uris: (bool) NOT IMPLEMENTED. Whether the process should
@@ -130,7 +126,6 @@ class Migrator:
         env.app_globals.nonrdfly.bootstrap()
 
         self.src = src.rstrip('/')
-        self.start_pts = start_pts
         self.zero_binaries = zero_binaries
 
         from lakesuperior.api import resource as rsrc_api
@@ -140,22 +135,33 @@ class Migrator:
 
 
 
-    def migrate(self):
+    def migrate(self, start_pts=None, list_file=None):
         """
         Migrate the database.
 
         This method creates a fully functional and configured LAKEsuperior
-        environment contained in a folder from an LDP repository.
+        data set contained in a folder from an LDP repository.
+
+        :param tuple|list start_pts: List of starting points to retrieve
+        resources from. It would typically be the repository root in case of a
+        full dump or one or more resources in the repository for a partial one.
+        :param str listf_ile: path to a local file containing a list of URIs,
+        one per line.
         """
         self._ct = 0
         with StoreWrapper(env.app_globals.rdfly.store):
-            for start in self.start_pts:
-                if not start.startswith('/'):
-                    raise ValueError(
+            if start_pts:
+                for start in start_pts:
+                    if not start.startswith('/'):
+                        raise ValueError(
                             'Starting point {} does not begin with a slash.'
                             .format(start))
 
-                self._crawl(start)
+                    self._crawl(start)
+            elif list_file:
+                with open(list_file, 'r') as fp:
+                    for uri in fp:
+                        self._crawl(uri.strip().replace(self.src, ''))
         self._remove_temp_options()
         logger.info('Dumped {} resources.'.format(self._ct))
 
@@ -249,6 +255,7 @@ class Migrator:
         # Now, crawl through outbound links.
         # LDP-NR fcr:metadata must be checked too.
         for pred, obj in gr.predicate_objects():
+            #import pdb; pdb.set_trace()
             obj_uid = obj.replace(ibase, '')
             if (
                     isinstance(obj, URIRef)
@@ -258,7 +265,6 @@ class Migrator:
                     and pred not in self.ignored_preds
             ):
                 print('Object {} will be crawled.'.format(obj_uid))
-                #import pdb; pdb.set_trace()
                 self._crawl(urldefrag(obj_uid).url)
 
 

+ 10 - 4
lsup-admin

@@ -123,16 +123,21 @@ def copy():
 @click.argument('src')
 @click.argument('dest')
 @click.option(
-    '--start', '-s', default='/', show_default=True,
+    '--start', '-s', show_default=True,
     help='Starting point for looking for resources in the repository.\n'
     'The default `/` value starts at the root, i.e. migrates the whole '
     'repository.')
+@click.option(
+    '--list-file', '-l', help='Path to a local file containing URIs to be '
+    'used as starting points, one per line. Use this alternatively to `-s`. '
+    'The URIs can be relative to the repository root (e.g. `/a/b/c`) or fully '
+    'qualified (e.g. `https://example.edu/fcrepo/rest/a/b/c`).')
 @click.option(
     '--zero-binaries', '-z', is_flag=True,
     help='If set, binaries are created as zero-byte files in the proper '
     'folder structure rather than having their full content copied.')
 @click_log.simple_verbosity_option(logger)
-def migrate(src, dest, start, zero_binaries):
+def migrate(src, dest, start, list_file, zero_binaries):
     '''
     Migrate an LDP repository to LAKEsuperior.
 
@@ -147,9 +152,10 @@ def migrate(src, dest, start, zero_binaries):
     from this location.
     '''
     logger.info('Migrating {} into a new repository on {}.'.format(
-        src, dest))
+            src, dest))
     entries = admin_api.migrate(
-            src, dest, start=start, zero_binaries=zero_binaries)
+            src, dest, start_pts=start, list_file=list_file,
+            zero_binaries=zero_binaries)
     logger.info('Migrated {} resources.'.format(entries))
     logger.info('''Migration complete. To start the new repository, from the
     directory you launched this script run: