Prechádzať zdrojové kódy

Skip and output HTTP errors without aborting.

Stefano Cossu 7 rokov pred
rodič
commit
8f0f9b26dd
2 zmenil súbory, kde vykonal 36 pridanie a 16 odobranie
  1. 28 14
      lakesuperior/migrator.py
  2. 8 2
      lsup-admin

+ 28 - 14
lakesuperior/migrator.py

@@ -71,7 +71,8 @@ class Migrator:
 
 
     def __init__(
-            self, src, dest, zero_binaries=False, compact_uris=False):
+            self, src, dest, zero_binaries=False, compact_uris=False,
+            skip_errors=False):
         """
         Set up base paths and clean up existing directories.
 
@@ -127,6 +128,7 @@ class Migrator:
 
         self.src = src.rstrip('/')
         self.zero_binaries = zero_binaries
+        self.skip_errors = skip_errors
 
         from lakesuperior.api import resource as rsrc_api
         self.rsrc_api = rsrc_api
@@ -185,7 +187,11 @@ class Migrator:
         iuri = ibase + uid
 
         rsp = requests.head(uri)
-        rsp.raise_for_status()
+        if not self.skip_errors:
+            rsp.raise_for_status()
+        elif rsp.status_code > 399:
+            print('Error retrieving resource {} headers: {} {}'.format(
+                uri, rsp.status_code, rsp.text))
 
         # Determine LDP type.
         ldp_type = 'ldp_nr'
@@ -214,10 +220,14 @@ class Migrator:
         # links.
         get_uri = (
                 uri if ldp_type == 'ldp_rs' else '{}/fcr:metadata'.format(uri))
-        get_req = requests.get(get_uri)
-        get_req.raise_for_status()
-
-        data = get_req.content.replace(
+        get_rsp = requests.get(get_uri)
+        if not self.skip_errors:
+            get_rsp.raise_for_status()
+        elif get_rsp.status_code > 399:
+            print('Error retrieving resource {} body: {} {}'.format(
+                uri, get_rsp.status_code, get_rsp.text))
+
+        data = get_rsp.content.replace(
                 self.src.encode('utf-8'), ibase.encode('utf-8'))
         #logger.debug('Localized data: {}'.format(data.decode('utf-8')))
         gr = Graph(identifier=iuri).parse(data=data, format='turtle')
@@ -231,10 +241,14 @@ class Migrator:
                         nsc['ebucore'].hasMimeType,
                         default='application/octet-stream'))
             else:
-                bin_resp = requests.get(uri)
-                bin_resp.raise_for_status()
-                data = bin_resp.content
-                mimetype = bin_resp.headers.get('content-type')
+                bin_rsp = requests.get(uri)
+                if not self.skip_errors:
+                    bin_rsp.raise_for_status()
+                elif bin_rsp.status_code > 399:
+                    print('Error retrieving resource {} body: {} {}'.format(
+                        uri, bin_rsp.status_code, bin_rsp.text))
+                data = bin_rsp.content
+                mimetype = bin_rsp.headers.get('content-type')
 
             self.rsrc_api.create_or_replace(
                     uid, mimetype=mimetype, provided_imr=provided_imr,
@@ -248,10 +262,6 @@ class Migrator:
             self.rsrc_api.create_or_replace(
                     uid, mimetype=mimetype, stream=BytesIO(data))
 
-        self._ct += 1
-        if self._ct % 10 ==0:
-            print('{} resources processed.'.format(self._ct))
-
         # Now, crawl through outbound links.
         # LDP-NR fcr:metadata must be checked too.
         for pred, obj in gr.predicate_objects():
@@ -265,6 +275,10 @@ class Migrator:
                     and pred not in self.ignored_preds
             ):
                 print('Object {} will be crawled.'.format(obj_uid))
+                self._ct += 1
+                if self._ct % 10 ==0:
+                    print('{} resources processed.'.format(self._ct))
+
                 self._crawl(urldefrag(obj_uid).url)
 
 

+ 8 - 2
lsup-admin

@@ -136,8 +136,14 @@ def copy():
     '--zero-binaries', '-z', is_flag=True,
     help='If set, binaries are created as zero-byte files in the proper '
     'folder structure rather than having their full content copied.')
+@click.option(
+    '--skip-errors', '-e', is_flag=True,
+    help='If set, when the application encounters an error while retrieving '
+    'a resource from the source repository, it will log the error rather than '
+    'quitting. Other exceptions caused by the application will terminate the '
+    'process as usual.')
 @click_log.simple_verbosity_option(logger)
-def migrate(src, dest, start, list_file, zero_binaries):
+def migrate(src, dest, start, list_file, zero_binaries, skip_errors):
     '''
     Migrate an LDP repository to LAKEsuperior.
 
@@ -155,7 +161,7 @@ def migrate(src, dest, start, list_file, zero_binaries):
             src, dest))
     entries = admin_api.migrate(
             src, dest, start_pts=start, list_file=list_file,
-            zero_binaries=zero_binaries)
+            zero_binaries=zero_binaries, skip_errors=skip_errors)
     logger.info('Migrated {} resources.'.format(entries))
     logger.info('''Migration complete. To start the new repository, from the
     directory you launched this script run: