7 years ago · 92c9a35981
--- a/lakesuperior/api/admin.py
+++ b/lakesuperior/api/admin.py
@@ -1,43 +1,26 @@
 
				 import logging
			
 
				-import os
			
 
				 
			
 
				-import click_log
			
 
				-from contextlib import ExitStack
			
 
				-from shutil import rmtree
			
 
				-
			
 
				-import lmdb
			
 
				-import requests
			
 
				-
			
 
				-from rdflib import Graph, URIRef
			
 
				-
			
 
				-import lakesuperior.env_setup
			
 
				-
			
 
				-from lakesuperior.dictionaries.namespaces import ns_collection as nsc
			
 
				 from lakesuperior.env import env
			
 
				-from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
			
 
				+from lakesuperior.migrator import Migrator
			
 
				 from lakesuperior.store.ldp_nr.default_layout import DefaultLayout as FileLayout
			
 
				+from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
			
 
				 
			
 
				-__doc__ = '''
			
 
				+__doc__ = """
			
 
				 Admin API.
			
 
				 
			
 
				 This module contains maintenance utilities and stats.
			
 
				-'''
			
 
				+"""
			
 
				 
			
 
				 logger = logging.getLogger(__name__)
			
 
				-app_globals = env.app_globals
			
 
				-
			
 
				-_ignore_list = (
			
 
				-    nsc['fcrepo'].hasParent,
			
 
				-    nsc['fcrepo'].hasTransactionProvider,
			
 
				-)
			
 
				 
			
 
				 
			
 
				 def stats():
			
 
				-    '''
			
 
				+    """
			
 
				     Get repository statistics.
			
 
				 
			
 
				     @return dict Store statistics, resource statistics.
			
 
				-    '''
			
 
				+    """
			
 
				+    import lakesuperior.env_setup
			
 
				     repo_stats = {'rsrc_stats': env.app_globals.rdfly.count_rsrc()}
			
 
				     with TxnManager(env.app_globals.rdf_store) as txn:
			
 
				         repo_stats['store_stats'] = env.app_globals.rdf_store.stats()
			
@@ -45,136 +28,19 @@ def stats():
 
				     return repo_stats
			
 
				 
			
 
				 
			
 
				-def dump(
			
 
				-        src, dest, start=('/',), binary_handling='include',
			
 
				-        compact_uris=False):
			
 
				-    '''
			
 
				-    Dump a whole LDP repository or parts of it to disk.
			
 
				+def migrate(src, dest, start=('/',), **kwargs):
			
 
				+    """
			
 
				+    Migrate an LDP repository to a new LAKEsuperior instance.
			
 
				 
			
 
				-    @param src (rdflib.term.URIRef) Webroot of source repository. This must
			
 
				-    correspond to the LDP root node (for Fedora it can be e.g.
			
 
				-    `http://localhost:8080fcrepo/rest/`) and is used to determine if URIs
			
 
				-    retrieved are managed by this repository.
			
 
				-    @param dest (str) Local path of the destination. If the location exists it
			
 
				-    must be a writable directory. It will be deleted and recreated. If it does
			
 
				-    not exist, it will be created along with its parents if missing.
			
 
				-    @param start (tuple|list) List of starting points to retrieve resources
			
 
				-    from. It would typically be the repository root in case of a full dump
			
 
				-    or one or more resources in the repository for a partial one.
			
 
				-    @param binary_handling (string) One of 'include', 'truncate' or 'split'.
			
 
				-    @param compact_uris (bool) NOT IMPLEMENTED. Whether the process should
			
 
				-    attempt to compact URIs generated with broken up path segments. If the UID
			
 
				-    matches a pattern such as `/12/34/56/123456...` it is converted to
			
 
				-    `/123456...`. This would remove a lot of cruft caused by the pairtree
			
 
				-    segments. Note that this will change the publicly exposed URIs. If
			
 
				-    durability is a concern, a rewrite directive can be added to the HTTP
			
 
				-    server that proxies the WSGI endpoint.
			
 
				-    '''
			
 
				+    See :py:meth:`Migrator.__init__`.
			
 
				+    """
			
 
				     # 1. Retrieve list of resources.
			
 
				     start_pts = (
			
 
				             (start,)
			
 
				             if not isinstance(start, list) and not isinstance(start, tuple)
			
 
				             else start)
			
 
				 
			
 
				-    dbpath = '{}/ldprs_store'.format(dest)
			
 
				-    rmtree(dbpath, ignore_errors=True)
			
 
				-    os.makedirs(dbpath)
			
 
				-    fpath = '{}/ldpnr_store'.format(dest)
			
 
				-    rmtree(fpath, ignore_errors=True)
			
 
				-    os.makedirs(fpath)
			
 
				-
			
 
				-    with lmdb.open(
			
 
				-            dbpath, 1024 ** 4, metasync=False, readahead=False,
			
 
				-            meminit=False) as db:
			
 
				-        for start in start_pts:
			
 
				-            if not start.startswith('/'):
			
 
				-                raise ValueError(
			
 
				-                        'Starting point {} does not begin with a slash.'
			
 
				-                        .format(start))
			
 
				-
			
 
				-            _gather_refs(db, src, start, dest)
			
 
				-        entries = db.stat()['entries']
			
 
				-        logger.info('Dumped {} resources.'.format(entries))
			
 
				-
			
 
				-    return entries
			
 
				-
			
 
				-
			
 
				-def _gather_refs(db, base, path, dest):
			
 
				-    '''
			
 
				-    Get the UID of a resource and its relationships recursively.
			
 
				-
			
 
				-    This method recurses into itself each time a reference to a resource
			
 
				-    managed by the repository is encountered.
			
 
				-
			
 
				-    @param base (string) Base URL of repository. This is used to determine
			
 
				-    whether encountered URI terms are repository-managed.
			
 
				-    @param path (string) Path, relative to base URL, of the resource to gather.
			
 
				-    @param dest (string) Local path for RDF database and non-RDF files.
			
 
				-    '''
			
 
				-    pfx = base.rstrip('/')
			
 
				-    # Public URI of source repo.
			
 
				-    uri = pfx + path
			
 
				-    # Internal URI of destination.
			
 
				-    iuri = URIRef(uri.replace(pfx, nsc['fcres']))
			
 
				-    ibase = base.replace(pfx, nsc['fcres'])
			
 
				-
			
 
				-    rsp = requests.head(uri)
			
 
				-    rsp.raise_for_status()
			
 
				-
			
 
				-    # Determine LDP type.
			
 
				-    ldp_type = 'ldp_nr'
			
 
				-    for link in requests.utils.parse_header_links(rsp.headers.get('link')):
			
 
				-        if (
			
 
				-                link.get('rel') == 'type'
			
 
				-                and link.get('url') == str(nsc['ldp'].RDFSource)):
			
 
				-            ldp_type = 'ldp_rs'
			
 
				-            break
			
 
				-
			
 
				-    # Get the whole RDF document now because we have to know all outbound
			
 
				-    # links.
			
 
				-    get_uri = uri if ldp_type == 'ldp_rs' else '{}/fcr:metadata'.format(uri)
			
 
				-    get_req = requests.get(get_uri)
			
 
				-    get_req.raise_for_status()
			
 
				-
			
 
				-    data = get_req.content.replace(base.encode('utf-8'), ibase.encode('utf-8'))
			
 
				-    logger.debug('Localized data: {}'.format(data.decode('utf-8')))
			
 
				-    gr = Graph(identifier=iuri).parse(data=data, format='turtle')
			
 
				-
			
 
				-    # First store the resource, so when we recurse, a resource referring back
			
 
				-    # to this resource will skip it as already existing and avoid an infinite
			
 
				-    # loop.
			
 
				-    #
			
 
				-    # The RDF data stream inserted is the turtle-serialized bytestring as it
			
 
				-    # comes from the request.
			
 
				-    with db.begin(write=True) as txn:
			
 
				-        with txn.cursor() as cur:
			
 
				-            if not cur.set_key(iuri.encode('utf-8')):
			
 
				-                cur.put(uri.encode('utf-8'), data)
			
 
				+    return Migrator(src, dest, start_pts, **kwargs).migrate()
			
 
				 
			
 
				-    # Grab binary.
			
 
				-    if ldp_type == 'ldp_nr':
			
 
				-        bin_resp = requests.get('{}/fcr:content'.format(uri))
			
 
				-        bin_resp.raise_for_status()
			
 
				 
			
 
				-        # @FIXME Use a more robust checking mechanism. Maybe offer the option
			
 
				-        # to verify the content checksum.
			
 
				-        cnt_hash = gr.value(iuri, nsc['premis'].hasMessageDigest).replace(
			
 
				-                'urn:sha1:', '')
			
 
				-        fpath = FileLayout.local_path('{}/ldpnr_store'.format(dest), cnt_hash)
			
 
				-        os.makedirs(os.path.dirname(fpath), exist_ok=True)
			
 
				-        with open(fpath, 'wb') as f:
			
 
				-            f.write(bin_resp.content)
			
 
				 
			
 
				-    # Now, crawl through outbound links.
			
 
				-    # LDP-NR fcr:metadata must be checked too.
			
 
				-    for pred, obj in gr.predicate_objects():
			
 
				-        if (
			
 
				-                isinstance(obj, URIRef)
			
 
				-                and obj.startswith(iuri)
			
 
				-                and pred not in _ignore_list):
			
 
				-            with db.begin() as txn:
			
 
				-                with txn.cursor() as cur:
			
 
				-                    # Avoid ∞
			
 
				-                    if cur.set_key(obj.encode('utf-8')):
			
 
				-                        continue
			
 
				-            _gather_refs(db, base, obj.replace(ibase, ''), dest)
			
--- a/lakesuperior/api/resource.py
+++ b/lakesuperior/api/resource.py
@@ -20,7 +20,6 @@ from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
 
				 
			
 
				 
			
 
				 logger = logging.getLogger(__name__)
			
 
				-app_globals = env.app_globals
			
 
				 
			
 
				 __doc__ = '''
			
 
				 Primary API for resource manipulation.
			
@@ -75,9 +74,9 @@ def transaction(write=False):
 
				             # update timestamps on resources.
			
 
				             env.timestamp = arrow.utcnow()
			
 
				             env.timestamp_term = Literal(env.timestamp, datatype=XSD.dateTime)
			
 
				-            with TxnManager(app_globals.rdf_store, write=write) as txn:
			
 
				+            with TxnManager(env.app_globals.rdf_store, write=write) as txn:
			
 
				                 ret = fn(*args, **kwargs)
			
 
				-            if len(app_globals.changelog):
			
 
				+            if len(env.app_globals.changelog):
			
 
				                 job = Thread(target=process_queue)
			
 
				                 job.start()
			
 
				             logger.debug('Deleting timestamp: {}'.format(getattr(env, 'timestamp')))
			
@@ -94,8 +93,8 @@ def process_queue():
 
				     '''
			
 
				     lock = Lock()
			
 
				     lock.acquire()
			
 
				-    while len(app_globals.changelog):
			
 
				-        send_event_msg(*app_globals.changelog.popleft())
			
 
				+    while len(env.app_globals.changelog):
			
 
				+        send_event_msg(*env.app_globals.changelog.popleft())
			
 
				     lock.release()
			
 
				 
			
 
				 
			
@@ -118,11 +117,35 @@ def send_event_msg(remove_trp, add_trp, metadata):
 
				     subjects = set(remove_dict.keys()) | set(add_dict.keys())
			
 
				     for rsrc_uri in subjects:
			
 
				         logger.debug('Processing event for subject: {}'.format(rsrc_uri))
			
 
				-        app_globals.messenger.send(rsrc_uri, **metadata)
			
 
				+        env.app_globals.messenger.send(rsrc_uri, **metadata)
			
 
				 
			
 
				 
			
 
				 ### API METHODS ###
			
 
				 
			
 
				+@transaction()
			
 
				+def exists(uid):
			
 
				+    '''
			
 
				+    Return whether a resource exists (is stored) in the repository.
			
 
				+
			
 
				+    @param uid (string) Resource UID.
			
 
				+    '''
			
 
				+    try:
			
 
				+        exists = LdpFactory.from_stored(uid).is_stored
			
 
				+    except ResourceNotExistsError:
			
 
				+        exists = False
			
 
				+    return exists
			
 
				+
			
 
				+
			
 
				+@transaction()
			
 
				+def get_metadata(uid):
			
 
				+    '''
			
 
				+    Get metadata (admin triples) of an LDPR resource.
			
 
				+
			
 
				+    @param uid (string) Resource UID.
			
 
				+    '''
			
 
				+    return LdpFactory.from_stored(uid).metadata
			
 
				+
			
 
				+
			
 
				 @transaction()
			
 
				 def get(uid, repr_options={}):
			
 
				     '''
			
@@ -263,11 +286,11 @@ def delete(uid, soft=True):
 
				     '''
			
 
				     # If referential integrity is enforced, grab all inbound relationships
			
 
				     # to break them.
			
 
				-    refint = app_globals.rdfly.config['referential_integrity']
			
 
				+    refint = env.app_globals.rdfly.config['referential_integrity']
			
 
				     inbound = True if refint else inbound
			
 
				     repr_opts = {'incl_inbound' : True} if refint else {}
			
 
				 
			
 
				-    children = app_globals.rdfly.get_descendants(uid)
			
 
				+    children = env.app_globals.rdfly.get_descendants(uid)
			
 
				 
			
 
				     if soft:
			
 
				         rsrc = LdpFactory.from_stored(uid, repr_opts)
			
@@ -276,16 +299,16 @@ def delete(uid, soft=True):
 
				         for child_uri in children:
			
 
				             try:
			
 
				                 child_rsrc = LdpFactory.from_stored(
			
 
				-                    app_globals.rdfly.uri_to_uid(child_uri),
			
 
				+                    env.app_globals.rdfly.uri_to_uid(child_uri),
			
 
				                     repr_opts={'incl_children' : False})
			
 
				             except (TombstoneError, ResourceNotExistsError):
			
 
				                 continue
			
 
				             child_rsrc.bury_rsrc(inbound, tstone_pointer=rsrc.uri)
			
 
				     else:
			
 
				-        ret = app_globals.rdfly.forget_rsrc(uid, inbound)
			
 
				+        ret = env.app_globals.rdfly.forget_rsrc(uid, inbound)
			
 
				         for child_uri in children:
			
 
				-            child_uid = app_globals.rdfly.uri_to_uid(child_uri)
			
 
				-            ret = app_globals.rdfly.forget_rsrc(child_uid, inbound)
			
 
				+            child_uid = env.app_globals.rdfly.uri_to_uid(child_uri)
			
 
				+            ret = env.app_globals.rdfly.forget_rsrc(child_uid, inbound)
			
 
				 
			
 
				     return ret
			
 
				 
			
--- a/lakesuperior/migrator.py
+++ b/lakesuperior/migrator.py
@@ -0,0 +1,245 @@
 
				+import logging
			
 
				+import shutil
			
 
				+
			
 
				+from io import BytesIO
			
 
				+from contextlib import ContextDecorator
			
 
				+from os import path
			
 
				+
			
 
				+import lmdb
			
 
				+import requests
			
 
				+import yaml
			
 
				+
			
 
				+from rdflib import Graph, URIRef
			
 
				+
			
 
				+from lakesuperior.dictionaries.namespaces import ns_collection as nsc
			
 
				+from lakesuperior.env import env
			
 
				+from lakesuperior.globals import AppGlobals
			
 
				+from lakesuperior.config_parser import parse_config
			
 
				+from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
			
 
				+
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class StoreWrapper(ContextDecorator):
			
 
				+    '''
			
 
				+    Open and close a store.
			
 
				+    '''
			
 
				+    def __init__(self, store):
			
 
				+        self.store = store
			
 
				+
			
 
				+    def __enter__(self):
			
 
				+        self.store.open(
			
 
				+                env.config['application']['store']['ldp_rs'])
			
 
				+
			
 
				+    def __exit__(self, *exc):
			
 
				+        self.store.close()
			
 
				+
			
 
				+
			
 
				+class Migrator:
			
 
				+    """
			
 
				+    Class to handle a database migration.
			
 
				+
			
 
				+    This class holds state of progress and shared variables as it crawls
			
 
				+    through linked resources in an LDP server.
			
 
				+
			
 
				+    Since a repository migration can be a very long operation but it is
			
 
				+    impossible to know the number of the resources to gather by LDP interaction
			
 
				+    alone, a progress ticker outputs the number of processed resources at
			
 
				+    regular intervals.
			
 
				+    """
			
 
				+
			
 
				+    """
			
 
				+    LMDB database parameters.
			
 
				+
			
 
				+    See :meth:`lmdb.Environment.__init__`
			
 
				+    """
			
 
				+    db_params = {
			
 
				+        'map_size': 1024 ** 4,
			
 
				+        'metasync': False,
			
 
				+        'readahead': False,
			
 
				+        'meminit': False,
			
 
				+    }
			
 
				+
			
 
				+    """List of predicates to ignore when looking for links."""
			
 
				+    ignored_preds = (
			
 
				+        nsc['fcrepo'].hasParent,
			
 
				+        nsc['fcrepo'].hasTransactionProvider,
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+    def __init__(
			
 
				+            self, src, dest, start_pts, binary_handling='include',
			
 
				+            compact_uris=False):
			
 
				+        """
			
 
				+        Set up base paths and clean up existing directories.
			
 
				+
			
 
				+        :param src: (URIRef) Webroot of source repository. This must
			
 
				+        correspond to the LDP root node (for Fedora it can be e.g.
			
 
				+        ``http://localhost:8080fcrepo/rest/``) and is used to determine if URIs
			
 
				+        retrieved are managed by this repository.
			
 
				+        :param dest: (str) Destination repository path. If the location exists
			
 
				+        it must be a writable directory. It will be deleted and recreated. If
			
 
				+        it does not exist, it will be created along with its parents if
			
 
				+        missing.
			
 
				+        :param start_pts: (tuple|list) List of starting points to retrieve
			
 
				+        resources from. It would typically be the repository root in case of a
			
 
				+        full dump or one or more resources in the repository for a partial one.
			
 
				+        :param binary_handling: (string) One of ``include``, ``truncate`` or
			
 
				+        ``split``.
			
 
				+        :param compact_uris: (bool) NOT IMPLEMENTED. Whether the process should
			
 
				+        attempt to compact URIs generated with broken up path segments. If the
			
 
				+        UID matches a pattern such as `/12/34/56/123456...` it is converted to
			
 
				+        `/123456...`. This would remove a lot of cruft caused by the pairtree
			
 
				+        segments. Note that this will change the publicly exposed URIs. If
			
 
				+        durability is a concern, a rewrite directive can be added to the HTTP
			
 
				+        server that proxies the WSGI endpoint.
			
 
				+        """
			
 
				+        # Set up repo folder structure and copy default configuration to
			
 
				+        # destination file.
			
 
				+        cur_dir = path.dirname(path.dirname(path.abspath(__file__)))
			
 
				+        self.dbpath = '{}/data/ldprs_store'.format(dest)
			
 
				+        self.fpath = '{}/data/ldpnr_store'.format(dest)
			
 
				+        config_dir = '{}/etc'.format(dest)
			
 
				+
			
 
				+        shutil.rmtree(dest, ignore_errors=True)
			
 
				+        shutil.copytree(
			
 
				+                '{}/etc.defaults'.format(cur_dir), config_dir)
			
 
				+
			
 
				+        # Modify and overwrite destination configuration.
			
 
				+        orig_config = parse_config(config_dir)
			
 
				+        orig_config['application']['store']['ldp_rs']['location'] = self.dbpath
			
 
				+        orig_config['application']['store']['ldp_nr']['path'] = self.fpath
			
 
				+        # This sets a "hidden" configuration property that bypasses all server
			
 
				+        # management on resource load: referential integrity, server-managed
			
 
				+        # triples, etc. This will be removed at the end of the migration.
			
 
				+        orig_config['application']['store']['ldp_rs']['disable_checks'] = True
			
 
				+
			
 
				+        with open('{}/application.yml'.format(config_dir), 'w') as config_file:
			
 
				+            config_file.write(yaml.dump(orig_config['application']))
			
 
				+
			
 
				+        env.config = parse_config(config_dir)
			
 
				+        env.app_globals = AppGlobals(env.config)
			
 
				+
			
 
				+        with TxnManager(env.app_globals.rdf_store, write=True) as txn:
			
 
				+            env.app_globals.rdfly.bootstrap()
			
 
				+            env.app_globals.rdfly.store.close()
			
 
				+        env.app_globals.nonrdfly.bootstrap()
			
 
				+
			
 
				+        self.src = src.rstrip('/')
			
 
				+        self.start_pts = start_pts
			
 
				+
			
 
				+        from lakesuperior.api import resource as rsrc_api
			
 
				+        self.rsrc_api = rsrc_api
			
 
				+        print('Environment: {}'.format(env))
			
 
				+        print('Resource API Environment: {}'.format(self.rsrc_api.env))
			
 
				+
			
 
				+
			
 
				+
			
 
				+    def migrate(self):
			
 
				+        """
			
 
				+        Migrate the database.
			
 
				+
			
 
				+        This method creates a fully functional and configured LAKEsuperior
			
 
				+        environment contained in a folder from an LDP repository.
			
 
				+        """
			
 
				+        self._ct = 0
			
 
				+        with StoreWrapper(env.app_globals.rdfly.store):
			
 
				+            for start in self.start_pts:
			
 
				+                if not start.startswith('/'):
			
 
				+                    raise ValueError(
			
 
				+                            'Starting point {} does not begin with a slash.'
			
 
				+                            .format(start))
			
 
				+
			
 
				+                self._crawl(start)
			
 
				+        #self._remove_temp_options()
			
 
				+        logger.info('Dumped {} resources.'.format(self._ct))
			
 
				+
			
 
				+        return self._ct
			
 
				+
			
 
				+
			
 
				+    def _crawl(self, uid):
			
 
				+        """
			
 
				+        Get the contents of a resource and its relationships recursively.
			
 
				+
			
 
				+        This method recurses into itself each time a reference to a resource
			
 
				+        managed by the repository is encountered.
			
 
				+
			
 
				+        @param uid (string) The path relative to the source server webroot
			
 
				+        pointing to the resource to crawl, effectively the resource UID.
			
 
				+        """
			
 
				+        ibase = str(nsc['fcres'])
			
 
				+        # Public URI of source repo.
			
 
				+        uri = self.src + uid
			
 
				+        # Internal URI of destination.
			
 
				+        iuri = ibase + uid
			
 
				+
			
 
				+        rsp = requests.head(uri)
			
 
				+        rsp.raise_for_status()
			
 
				+
			
 
				+        # Determine LDP type.
			
 
				+        ldp_type = 'ldp_nr'
			
 
				+        try:
			
 
				+            for link in requests.utils.parse_header_links(
			
 
				+                    rsp.headers.get('link')):
			
 
				+                if (
			
 
				+                        link.get('rel') == 'type'
			
 
				+                        and link.get('url') == str(nsc['ldp'].RDFSource)):
			
 
				+                    # Resource is an LDP-RS.
			
 
				+                    ldp_type = 'ldp_rs'
			
 
				+                    break
			
 
				+        except TypeError:
			
 
				+            raise ValueError('URI {} is not an LDP resource.'.format(uri))
			
 
				+
			
 
				+        # Get the whole RDF document now because we have to know all outbound
			
 
				+        # links.
			
 
				+        get_uri = (
			
 
				+                uri if ldp_type == 'ldp_rs' else '{}/fcr:metadata'.format(uri))
			
 
				+        get_req = requests.get(get_uri)
			
 
				+        get_req.raise_for_status()
			
 
				+
			
 
				+        data = get_req.content.replace(
			
 
				+                self.src.encode('utf-8'), ibase.encode('utf-8'))
			
 
				+        #logger.debug('Localized data: {}'.format(data.decode('utf-8')))
			
 
				+        gr = Graph(identifier=iuri).parse(data=data, format='turtle')
			
 
				+
			
 
				+        # Grab binary and set new resource parameters.
			
 
				+        if ldp_type == 'ldp_nr':
			
 
				+            bin_resp = requests.get('{}/fcr:content'.format(uri))
			
 
				+            bin_resp.raise_for_status()
			
 
				+            data = bin_resp.content
			
 
				+            mimetype = bin_resp.headers.get('content-type')
			
 
				+        else:
			
 
				+            mimetype = 'text/turtle'
			
 
				+
			
 
				+        # Store the resource, so when we recurse, a resource referring back
			
 
				+        # to this resource will skip it as already existing and avoid an
			
 
				+        # infinite loop.
			
 
				+        # @TODO This can be improved by creating a resource API method for
			
 
				+        # creating a resource from an RDFLib graph. Here we had to deserialize
			
 
				+        # the RDF data to gather information but have to pass the original
			
 
				+        # serialized stream, which has to be deserialized again in the model.
			
 
				+        self.rsrc_api.create_or_replace(
			
 
				+                uid, mimetype=mimetype, stream=BytesIO(data))
			
 
				+
			
 
				+        self._ct += 1
			
 
				+        if self._ct % 10 ==0:
			
 
				+            print('{} resources processed.'.format(self._ct))
			
 
				+
			
 
				+        # Now, crawl through outbound links.
			
 
				+        # LDP-NR fcr:metadata must be checked too.
			
 
				+        for pred, obj in gr.predicate_objects():
			
 
				+            uid = obj.replace(ibase, '')
			
 
				+            if (
			
 
				+                    isinstance(obj, URIRef)
			
 
				+                    and obj.startswith(iuri)
			
 
				+                    and not self.rsrc_api.exists(uid) # Avoid ∞
			
 
				+                    and pred not in self.ignored_preds):
			
 
				+                self._crawl(uid)
			
 
				+
			
 
				+
			
 
				+    def _remove_temp_options(self):
			
 
				+        """Remove temporary options in configuration."""
			
 
				+        with open('{}/application.yml'.format(config_dir), 'w') as config_file:
			
 
				+            config['application']['store']['ldp_rs']['disable_checks'] = True
			
 
				+            config_file.write(yaml.dump(orig_config['application']))
			
--- a/lakesuperior/model/ldp_factory.py
+++ b/lakesuperior/model/ldp_factory.py
@@ -87,6 +87,7 @@ class LdpFactory:
 
				         @param stream (IOStream | None) The provided data stream. This can be
			
 
				         RDF or non-RDF content, or None. In the latter case, an empty container
			
 
				         is created.
			
 
				+        @param **kwargs Arguments passed to the LDP class constructor.
			
 
				         '''
			
 
				         uri = nsc['fcres'][uid]
			
 
				 
			
--- a/lakesuperior/model/ldpr.py
+++ b/lakesuperior/model/ldpr.py
@@ -121,6 +121,9 @@ class Ldpr(metaclass=ABCMeta):
 
				 
			
 
				         self.provided_imr = provided_imr
			
 
				 
			
 
				+        # Disable all internal checks e.g. for raw I/O.
			
 
				+        self.disable_checks = rdfly.config.get('disable_checks', False)
			
 
				+
			
 
				 
			
 
				     @property
			
 
				     def rsrc(self):
			
@@ -345,22 +348,29 @@ class Ldpr(metaclass=ABCMeta):
 
				         @param create_only (boolean) Whether this is a create-only operation.
			
 
				         '''
			
 
				         create = create_only or not self.is_stored
			
 
				-        ev_type = RES_CREATED if create else RES_UPDATED
			
 
				 
			
 
				-        self._add_srv_mgd_triples(create)
			
 
				-        ref_int = rdfly.config['referential_integrity']
			
 
				-        if ref_int:
			
 
				-            self._check_ref_int(ref_int)
			
 
				+        if not self.disable_checks:
			
 
				+            ev_type = RES_CREATED if create else RES_UPDATED
			
 
				+            self._add_srv_mgd_triples(create)
			
 
				+            ref_int = rdfly.config['referential_integrity']
			
 
				+            if ref_int:
			
 
				+                self._check_ref_int(ref_int)
			
 
				 
			
 
				-        # Delete existing triples if replacing.
			
 
				-        if not create:
			
 
				-            rdfly.truncate_rsrc(self.uid)
			
 
				+            # Delete existing triples if replacing.
			
 
				+            if not create:
			
 
				+                rdfly.truncate_rsrc(self.uid)
			
 
				 
			
 
				-        remove_trp = {
			
 
				-            (self.uri, nsc['fcrepo'].lastModified, None),
			
 
				-            (self.uri, nsc['fcrepo'].lastModifiedBy, None),
			
 
				-        }
			
 
				-        add_trp = set(self.provided_imr.graph) | self._containment_rel(create)
			
 
				+            remove_trp = {
			
 
				+                (self.uri, nsc['fcrepo'].lastModified, None),
			
 
				+                (self.uri, nsc['fcrepo'].lastModifiedBy, None),
			
 
				+            }
			
 
				+            add_trp = (
			
 
				+                    set(self.provided_imr.graph)
			
 
				+                    | self._containment_rel(create))
			
 
				+        else:
			
 
				+            remove_trp = set()
			
 
				+            add_trp = self.provided_imr.graph
			
 
				+            ev_type = None
			
 
				 
			
 
				         self._modify_rsrc(ev_type, remove_trp, add_trp)
			
 
				         new_gr = Graph()
			
--- a/lsup-admin
+++ b/lsup-admin
@@ -6,20 +6,14 @@ import logging
 
				 import os
			
 
				 import sys
			
 
				 
			
 
				-import lakesuperior.env_setup
			
 
				-
			
 
				 from lakesuperior.api import admin as admin_api
			
 
				 from lakesuperior.config_parser import config
			
 
				-from lakesuperior.globals import AppGlobals
			
 
				 from lakesuperior.env import env
			
 
				 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
			
 
				 
			
 
				 logger = logging.getLogger(__name__)
			
 
				 click_log.basic_config(logger)
			
 
				 
			
 
				-rdfly = env.app_globals.rdfly
			
 
				-nonrdfly = env.app_globals.nonrdfly
			
 
				-
			
 
				 
			
 
				 @click.group()
			
 
				 def admin():
			
@@ -36,6 +30,9 @@ def bootstrap():
 
				 
			
 
				     Additional scaffolding files may be parsed to create initial contents.
			
 
				     '''
			
 
				+    rdfly = env.app_globals.rdfly
			
 
				+    nonrdfly = env.app_globals.nonrdfly
			
 
				+
			
 
				     click.echo(
			
 
				             click.style(
			
 
				                 'WARNING: This operation will WIPE ALL YOUR DATA.\n',
			
@@ -46,6 +43,8 @@ def bootstrap():
 
				         click.echo('Aborting.')
			
 
				         sys.exit(1)
			
 
				 
			
 
				+    import lakesuperior.env_setup
			
 
				+
			
 
				     click.echo('Initializing graph store at {}'.format(rdfly.store.path))
			
 
				     with TxnManager(env.app_globals.rdf_store, write=True) as txn:
			
 
				         rdfly.bootstrap()
			
@@ -135,7 +134,7 @@ def copy():
 
				     'folder structure. If set to `skip`, binaries are not exported. Data '
			
 
				     'folders are not created.')
			
 
				 @click_log.simple_verbosity_option(logger)
			
 
				-def dump(src, dest, start, binaries):
			
 
				+def migrate(src, dest, start, binaries):
			
 
				     '''
			
 
				     Dump a repository or parts of it to disk.
			
 
				 
			
@@ -143,7 +142,8 @@ def dump(src, dest, start, binaries):
 
				     another LDP-compatible implementation.
			
 
				     '''
			
 
				     logger.info('Dumping database.')
			
 
				-    entries = admin_api.dump(src, dest, start, binaries)
			
 
				+    entries = admin_api.migrate(
			
 
				+            src, dest, start=start, binary_handling=binaries)
			
 
				     logger.info('Dumped {} resources.'.format(entries))
			
 
				 
			
 
				 
			
@@ -165,8 +165,8 @@ admin.add_command(check_fixity)
 
				 admin.add_command(check_refint)
			
 
				 admin.add_command(cleanup)
			
 
				 admin.add_command(copy)
			
 
				-admin.add_command(dump)
			
 
				 admin.add_command(load)
			
 
				+admin.add_command(migrate)
			
 
				 admin.add_command(stats)
			
 
				 
			
 
				 if __name__ == '__main__':