Stefano Cossu преди 6 години
родител
ревизия
bfea22b044

+ 0 - 33
lakesuperior/model/ldpr.py

@@ -604,39 +604,6 @@ class Ldpr(metaclass=ABCMeta):
         return self.RES_CREATED if create else self.RES_UPDATED
 
 
-    #def _create_rsrc(self):
-    #    '''
-    #    Create a new resource by comparing an empty graph with the provided
-    #    IMR graph.
-    #    '''
-    #    self._modify_rsrc(self.RES_CREATED, add_trp=self.provided_imr.graph)
-
-    #    # Set the IMR contents to the "add" triples.
-    #    #self.imr = self.provided_imr.graph
-
-    #    return self.RES_CREATED
-
-
-    #def _replace_rsrc(self):
-    #    '''
-    #    Replace a resource.
-
-    #    The existing resource graph is removed except for the protected terms.
-    #    '''
-    #    # The extracted IMR is used as a "minus" delta, so protected predicates
-    #    # must be removed.
-    #    for p in self.protected_pred:
-    #        self.imr.remove(p)
-
-    #    delta = self._dedup_deltas(self.imr.graph, self.provided_imr.graph)
-    #    self._modify_rsrc(self.RES_UPDATED, *delta)
-
-    #    # Set the IMR contents to the "add" triples.
-    #    #self.imr = delta[1]
-
-    #    return self.RES_UPDATED
-
-
     def _bury_rsrc(self, inbound, tstone_pointer=None):
         '''
         Delete a single resource and create a tombstone.

+ 0 - 156
lakesuperior/store_layouts/ldp_rs/base_rdf_layout.py

@@ -1,156 +0,0 @@
-import logging
-
-from abc import ABCMeta, abstractmethod
-
-from flask import current_app
-from rdflib import Graph
-from rdflib.namespace import RDF
-from rdflib.query import ResultException
-from rdflib.resource import Resource
-from rdflib.term import URIRef
-
-from lakesuperior.dictionaries.namespaces import ns_collection as nsc
-from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
-from lakesuperior.exceptions import ResourceNotExistsError
-from lakesuperior.store_layouts.ldp_rs.bdb_connector import BdbConnector
-from lakesuperior.store_layouts.ldp_rs.sqlite_connector import SqliteConnector
-from lakesuperior.toolbox import Toolbox
-
-
-
-class BaseRdfLayout(metaclass=ABCMeta):
-    '''
-    This class exposes an interface to build graph store layouts. It also
-    provides the basics of the triplestore connection.
-
-    Some store layouts are provided. New ones aimed at specific uses
-    and optimizations of the repository may be developed by extending this
-    class and implementing all its abstract methods.
-
-    A layout is implemented via application configuration. However, once
-    contents are ingested in a repository, changing a layout will most likely
-    require a migration.
-
-    The custom layout must be in the lakesuperior.store_layouts.rdf
-    package and the class implementing the layout must be called
-    `StoreLayout`. The module name is the one defined in the app
-    configuration.
-
-    E.g. if the configuration indicates `simple_layout` the application will
-    look for
-    `lakesuperior.store_layouts.rdf.simple_layout.SimpleLayout`.
-
-    Some method naming conventions:
-
-    - Methods starting with `get_` return a resource.
-    - Methods starting with `list_` return an iterable or generator of URIs.
-    - Methods starting with `select_` return an iterable or generator with
-      table-like data such as from a SELECT statement.
-    - Methods starting with `ask_` return a boolean value.
-    '''
-
-    _logger = logging.getLogger(__name__)
-
-
-    ## MAGIC METHODS ##
-
-    def __init__(self, conn, config):
-        '''Initialize the graph store and a layout.
-
-        NOTE: `rdflib.Dataset` requires a RDF 1.1 compliant store with support
-        for Graph Store HTTP protocol
-        (https://www.w3.org/TR/sparql11-http-rdf-update/). Blazegraph supports
-        this only in the (currently unreleased) 2.2 branch. It works with Jena,
-        which is currently the reference implementation.
-        '''
-        self.config = config
-        self._conn = conn
-        self.store = self._conn.store
-
-        #self.UNION_GRAPH_URI = self._conn.UNION_GRAPH_URI
-        self.ds = self._conn.ds
-        self.ds.namespace_manager = nsm
-
-
-    ## INTERFACE METHODS ##
-
-    # Implementers of custom layouts should look into these methods to
-    # implement.
-
-    @abstractmethod
-    def extract_imr(self, uri, strict=True, incl_inbound=False,
-                incl_children=True, embed_children=False, incl_srv_mgd=True):
-        '''
-        Extract an in-memory resource from the dataset restricted to a subject.
-
-        some filtering operations are carried out in this method for
-        performance purposes (e.g. `incl_children` and `embed_children`, i.e.
-        the IMR will never have those properties). Others, such as
-        server-managed triples, are kept in the IMR until they are filtered out
-        when the graph is output with `Ldpr.out_graph`.
-
-        @param uri (URIRef) Resource URI.
-        @param strict (boolean) If set to True, an empty result graph will
-        raise a `ResourceNotExistsError`; if a tombstone is found, a
-        `TombstoneError` is raised. Otherwise, the raw graph is returned.
-        @param incl_inbound (boolean) Whether to pull triples that have the
-        resource URI as their object.
-        @param incl_children (boolean) Whether to include all children
-        indicated by `ldp:contains`. This is only effective if `incl_srv_mgd`
-        is True.
-        @param embed_children (boolean) If this and `incl_children` are True,
-        the full graph is retrieved for each of the children.
-        @param incl_srv_mgd (boolean) Whether to include server-managed
-        triples.
-        '''
-        pass
-
-
-    #@abstractmethod
-    def get_version_info(self, urn):
-        '''
-        Get version information about a resource (`fcr:versions`)
-        '''
-        pass
-
-
-    #@abstractmethod
-    def get_version(self, urn):
-        '''
-        Get a historic snapshot (version) of a resource.
-        '''
-        pass
-
-
-    @abstractmethod
-    def ask_rsrc_exists(self, urn):
-        '''
-        Ask if a resource is stored in the graph store.
-
-        @param uri (rdflib.term.URIRef) The internal URN of the resource to be
-        queried.
-
-        @return boolean
-        '''
-        pass
-
-
-    #@abstractmethod
-    def modify_dataset(self, remove_trp=Graph(), add_trp=Graph(),
-            types=set()):
-        '''
-        Adds and/or removes triples from the persistent data set.
-
-        NOTE: This method can apply to an arbitrary graph including multiple
-        resources.
-
-        @param remove_trp (rdflib.Graph) Triples to be removed.
-        @param add_trp (rdflib.Graph) Triples to be added.
-        @param types (iterable(rdflib.term.URIRef)) RDF types of the resource
-        that may be relevant to the layout strategy. These can be anything
-        since they are just used to inform the layout and not actually stored.
-        If this is an empty set, the merge graph is used.
-        '''
-        pass
-
-

+ 0 - 237
lakesuperior/store_layouts/ldp_rs/default_layout.py

@@ -1,237 +0,0 @@
-from copy import deepcopy
-from pprint import pformat
-
-from flask import current_app, g, request
-from rdflib import Graph
-from rdflib.namespace import RDF, XSD
-from rdflib.query import ResultException
-from rdflib.resource import Resource
-from rdflib.term import Literal, URIRef, Variable
-
-from lakesuperior.dictionaries.namespaces import ns_collection as nsc
-from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
-from lakesuperior.dictionaries.srv_mgd_terms import (srv_mgd_subjects,
-        srv_mgd_predicates, srv_mgd_types)
-from lakesuperior.exceptions import (InvalidResourceError, InvalidTripleError,
-        ResourceNotExistsError, TombstoneError)
-from lakesuperior.store_layouts.ldp_rs.base_rdf_layout import BaseRdfLayout
-
-
-class DefaultLayout(BaseRdfLayout):
-    '''
-    This is the default layout.
-
-    Main triples are stored in a `main` graph; metadata in the `meta` graph;
-    and historic snapshots (versions) in `historic`.
-    '''
-
-    HIST_GRAPH_URI = nsc['fcg'].historic
-    MAIN_GRAPH_URI = nsc['fcg'].main
-    META_GRAPH_URI = nsc['fcg'].metadata
-
-    # @TODO This will allow routing triples to certain named graphs depending
-    # on predicates and types.
-    term_routes = {
-        'p': {
-            nsc['fcrepo'].contains: META_GRAPH_URI,
-            nsc['fcrepo'].hasVersion: META_GRAPH_URI,
-            nsc['fcrepo'].hasVersionLabel: META_GRAPH_URI,
-            nsc['fcsystem'].fragmentOf: META_GRAPH_URI,
-            nsc['premis'].hasMessageDigest: META_GRAPH_URI,
-        },
-        't': {
-        },
-    }
-
-
-    def extract_imr(self, uri, strict=True, incl_inbound=False,
-                incl_children=True, embed_children=False, incl_srv_mgd=True):
-        '''
-        See base_rdf_layout.extract_imr.
-        '''
-        inbound_construct = '\n?s1 ?p1 ?s .' if incl_inbound else ''
-        inbound_qry = '\nOPTIONAL { ?s1 ?p1 ?s . } .' if incl_inbound else ''
-
-        # Include and/or embed children.
-        embed_children_trp = embed_children_qry = ''
-        if incl_srv_mgd and incl_children:
-            incl_children_qry = ''
-
-            # Embed children.
-            if embed_children:
-                embed_children_trp = '?c ?cp ?co .'
-                embed_children_qry = '''
-                OPTIONAL {{
-                  ?s ldp:contains ?c .
-                  {}
-                }}
-                '''.format(embed_children_trp)
-        else:
-            incl_children_qry = '\nFILTER ( ?p != ldp:contains )' \
-
-        q = '''
-        CONSTRUCT {{
-            ?s ?p ?o .{inb_cnst}
-            {embed_chld_t}
-            ?s fcrepo:writable true .
-            ?f ?fp ?fo .
-        }}
-        WHERE {{
-          GRAPH ?g {{
-            ?s ?p ?o .{inb_qry}{incl_chld}{embed_chld}
-            OPTIONAL {{
-              ?f fcsystem:fragmentOf ?s ;
-                ?fp ?fo .
-            }}
-          }}
-        }}
-        '''.format(inb_cnst=inbound_construct,
-                inb_qry=inbound_qry, incl_chld=incl_children_qry,
-                embed_chld_t=embed_children_trp, embed_chld=embed_children_qry)
-
-        try:
-            qres = self._conn.query(q, initBindings={'s': uri})
-        except ResultException:
-            # RDFlib bug: https://github.com/RDFLib/rdflib/issues/775
-            gr = Graph()
-        else:
-            gr = qres.graph
-
-        #self._logger.debug('Found resource: {}'.format(
-        #        gr.serialize(format='turtle').decode('utf-8')))
-        if strict and not len(gr):
-            raise ResourceNotExistsError(uri)
-
-        rsrc = Resource(gr, uri)
-
-        # Check if resource is a tombstone.
-        if rsrc[RDF.type : nsc['fcsystem'].Tombstone]:
-            if strict:
-                raise TombstoneError(
-                        g.tbox.uri_to_uuid(rsrc.identifier),
-                        rsrc.value(nsc['fcrepo'].created))
-            else:
-                self._logger.info('No resource found: {}'.format(uri))
-        elif rsrc.value(nsc['fcsystem'].tombstone):
-            if strict:
-                raise TombstoneError(
-                        g.tbox.uri_to_uuid(
-                            rsrc.value(nsc['fcsystem'].tombstone).identifier),
-                        rsrc.value(nsc['fcrepo'].created))
-            else:
-                self._logger.info('Tombstone found: {}'.format(uri))
-
-        return rsrc
-
-
-    def ask_rsrc_exists(self, urn):
-        '''
-        See base_rdf_layout.ask_rsrc_exists.
-        '''
-        self._logger.info('Checking if resource exists: {}'.format(urn))
-
-        return bool(self._conn.query(
-            'ASK { GRAPH ?g { ?s a fcrepo:Resource . }}', initBindings={
-                's': urn, 'g': self.MAIN_GRAPH_URI}))
-
-
-    def get_version_info(self, urn):
-        '''
-        See base_rdf_layout.get_version_info.
-        '''
-        q = '''
-        CONSTRUCT {
-          ?s fcrepo:hasVersion ?v .
-          ?v ?p ?o .
-        } WHERE {
-          GRAPH fcg:metadata {
-            ?s fcrepo:hasVersion ?v .
-            ?v ?p ?o .
-          }
-        }
-        '''
-        try:
-            rsp = self.ds.query(q, initBindings={'s': urn})
-        except ResultException:
-            # RDFlib bug: https://github.com/RDFLib/rdflib/issues/775
-            rsp = Graph()
-
-        if not len(rsp):
-            raise ResourceNotExistsError(
-                    urn, 'No version found for this resource.')
-        else:
-            return rsp.graph
-
-
-    def get_version(self, urn, ver_uid):
-        '''
-        See base_rdf_layout.get_version.
-        '''
-        q = '''
-        CONSTRUCT {
-          ?v ?p ?o .
-        } WHERE {
-          GRAPH fcg:metadata {
-            ?s fcrepo:hasVersion ?v .
-            ?v fcrepo:hasVersionLabel ?uid .
-          }
-          GRAPH fcg:historic {
-            ?v ?p ?o .
-          }
-        }
-        '''
-        try:
-            rsp = self.ds.query(q, initBindings={
-                's': urn, 'uid': Literal(ver_uid)})
-        except ResultException:
-            # RDFlib bug: https://github.com/RDFLib/rdflib/issues/775
-            rsp = Graph()
-
-        if not len(rsp):
-            raise ResourceNotExistsError(
-                urn,
-                'No version found for this resource with the given label.')
-        else:
-            return rsp.graph
-
-
-    def modify_dataset(self, remove_trp=Graph(), add_trp=Graph(),
-            types={nsc['fcrepo'].Resource}):
-        '''
-        See base_rdf_layout.update_rsrc.
-        '''
-        #self._logger.debug('Remove triples: {}'.format(pformat(
-        #        set(remove_trp))))
-        #self._logger.debug('Add triples: {}'.format(pformat(
-        #        set(add_trp))))
-
-        if not types:
-            # @FIXME This is terrible, but I can't get Fuseki to update the
-            # default graph without using a variable.
-            #target_gr = self.ds.graph(self.UNION_GRAPH_URI)
-            target_gr = {
-                self.ds.graph(self.HIST_GRAPH_URI),
-                self.ds.graph(self.META_GRAPH_URI),
-                self.ds.graph(self.MAIN_GRAPH_URI),
-            }
-        elif nsc['fcrepo'].Metadata in types:
-            target_gr = {self.ds.graph(self.META_GRAPH_URI)}
-        elif nsc['fcrepo'].Version in types:
-            target_gr = {self.ds.graph(self.HIST_GRAPH_URI)}
-        else:
-            target_gr = {self.ds.graph(self.MAIN_GRAPH_URI)}
-
-        for gr in target_gr:
-            gr -= remove_trp
-            gr += add_trp
-
-        # @TODO Override by triple.
-        #for t in add_trp:
-        #    # Override target graph by triple.
-        #    if t[1] in self.term_routes['p']:
-        #        trp_target_gr = self.ds.graph(self.term_routes['p'][t[1]])
-        #    elif t[1] == RDF.type and t[2] in self.term_routes['t']:
-        #        trp_target_gr = self.ds.graph(self.term_routes['t'][t[2]])
-        #    else:
-        #        trp_target_gr = target_gr
-        #    trp_target_gr.add(t)

+ 0 - 170
lakesuperior/store_layouts/ldp_rs/full_provenance_layout.py

@@ -1,170 +0,0 @@
-import arrow
-
-from uuid import uuid4
-
-from rdflib import Dataset, Graph
-from rdflib.namespace import FOAF, RDF, XSD
-from rdflib.plugins.sparql import prepareQuery
-from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
-from rdflib.term import URIRef, Literal
-
-from lakesuperior.dictionaries.namespaces import ns_collection as nsc
-from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
-from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout
-
-
-class FullProvenanceLayout(BaseRdfLayout):
-    '''This is an implementation of the
-    [graph-per-resource pattern](http://patterns.dataincubator.org/book/graph-per-resource.html)
-    which stores each LDP resource in a separate graph, with a "main" graph
-    to keep track of resource metadata.
-    '''
-
-    DEFAULT_AGENT_URI = nsc['lake'].defaultAgent
-    MAIN_GRAPH_URI = nsc['fcg'].meta
-
-
-    ## MAGIC METHODS ##
-
-    def __init__(self):
-        self.main_graph = self.ds.graph(self.MAIN_GRAPH_URI)
-
-
-    ## PUBLIC METHODS ##
-
-    def ask_rsrc_exists(self, uuid):
-        '''Return whether the resource exists.
-
-        @param uuid Resource UUID.
-
-        @retrn boolean
-        '''
-        res = self.ds.graph(self.UNION_GRAPH_URI).resource(nsc['fcres'][uuid])
-
-        return len(res) > 0
-
-
-    def get_rsrc(self, uuid):
-        '''Get a resource graph.
-        '''
-        res = self.ds.graph(self.UNION_GRAPH_URI).query(
-            'CONSTRUCT WHERE { ?s ?p ?o }',
-            initBindings={'s' : nsc['fcres'][uuid]}
-        )
-
-        return self.globalize_graph(res.graph)
-
-
-    def put_rsrc(self, uuid, data, format='text/turtle', base_types=None,
-            agent=None):
-        '''Create a resource graph.
-
-        If the resource UUID exists already, it is either overwritten or a
-        version snapshot is created, depending on the parameters.
-        '''
-        if agent is None:
-            agent = self.DEFAULT_AGENT_URI
-
-        res_urn = nsc['fcres'][uuid]
-
-        # If there is a statement by this agent about this resource, replace
-        # its contents.
-        if self._get_res_stmt_by_agent(res_urn, agent):
-            pass # @TODO
-
-
-        # If the graph URI does not exist, create a new resource.
-        else:
-            # Create a new UUID for the statement set.
-            stmset_uri = nsc['stmset'][str(uuid4())]
-
-            # Create a temp graph to store the loaded data. For some reason,
-            # loading directly into the stored graph throws an assertion error.
-            tmp_g = Graph()
-            tmp_g.parse(data=data.decode('utf-8'), format=format,
-                    publicID=str(res_urn))
-
-            # Create the graph and add the data.
-            g = self.ds.graph(stmset_uri)
-            g += tmp_g
-
-            # Add metadata.
-            ts = arrow.utcnow()
-            main_graph = self.ds.graph(self.MAIN_GRAPH_URI)
-
-            main_graph.add((stmset_uri, FOAF.primaryTopic, res_urn))
-            main_graph.add((stmset_uri, RDF.type, nsc['prov'].Entity))
-            main_graph.add(
-                    (stmset_uri, nsc['prov'].generatedAtTime,
-                    Literal(ts, datatype=XSD.dateTime)))
-            main_graph.add(
-                    (stmset_uri, nsc['prov'].wasAttributedTo, agent))
-
-
-        #self.create_version(res_urn)
-
-        if base_types:
-            for type_uri in self.base_types:
-                main_graph.add((stmset_uri, RDF.type, type_uri))
-
-        # @TODO Create containment triples
-
-        self.conn.store.commit()
-
-
-
-    #def create_version(self, res_urn):
-    #    '''Swap out previous version if existing, and create new version
-    #    dependency.'''
-    #    main_graph = ds.graph(URIRef('urn:lake:' + self.MAIN_GRAPH_NAME))
-    #    prv_res_urn = self.select_current_graph_for_res(res_urn)
-
-    #    if prv_res_urn:
-    #        main_graph.remove((prv_res_urn, RDF.type, nsc['lake'].Resource))
-    #        main_graph.add((prv_res_urn, RDF.type, nsc['lake'].Snapshot))
-
-    #        main_graph.add((res_urn, RDF.type, nsc['lake'].Resource))
-    #        main_graph.add((res_urn, nsc['lake'].previousVersion, prv_res_urn))
-
-
-    #def select_current_graph_for_res(self, urn):
-    #    '''Select the current graph URI for a given resource.'''
-    #    qry = '''
-    #    SELECT ?g {
-    #      GRAPH ?mg { ?g a ?gt . }
-    #      GRAPH ?g { ?s ?p ?o . }
-    #    }
-    #    LIMIT 1
-    #    '''
-    #    rsp = self.ds.query(qry, initBindings={
-    #        'mg' : URIRef('urn:lake:' + self.MAIN_GRAPH_NAME),
-    #        'gt' : RESOURCE_TYPE_URI,
-    #        's' : urn
-    #    })
-
-    #    return list(rsp[0][0])
-
-
-    def _ask_res_stmt_by_agent_exists(self, res_urn, agent):
-        '''Ask if any statements have been made by a certain agent about a
-        certain resource.
-
-        @param rdflib.term.URIRef res_urn Resource URN.
-        @param rdflib.term.URIRef agent Agent URI.
-
-        @return boolean
-        '''
-        return self.query('''
-        ASK {
-          GRAPH ?mg {
-              ?g prov:wasAttributedTo ?a .
-          }
-          GRAPH ?g {
-              ?s ?p ?o .
-          }
-        }
-        ''', initBindings={
-            'a' : agent,
-            's' : res_urn,
-        })
-

+ 2 - 7
lakesuperior/store_layouts/ldp_rs/rsrc_centric_layout.py

@@ -1,22 +1,17 @@
 import logging
 
 from collections import defaultdict
-from copy import deepcopy
-from urllib.parse import quote
-
-import requests
 
 from flask import g
 from rdflib import Graph
 from rdflib.namespace import RDF
 from rdflib.query import ResultException
 from rdflib.resource import Resource
-from rdflib.term import URIRef, Literal
+from rdflib.term import Literal
 
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
-from lakesuperior.dictionaries.namespaces import ns_pfx_sparql
-from lakesuperior.exceptions import (InvalidResourceError, InvalidTripleError,
+from lakesuperior.exceptions import (InvalidResourceError,
         ResourceNotExistsError, TombstoneError)
 
 

+ 14 - 0
tests/data/sparql_update/illegal_update.sparql

@@ -0,0 +1,14 @@
+PREFIX dc: <http://purl.org/dc/elements/1.1/>
+
+DELETE {
+  <> dc:title "Hello" .
+}
+INSERT {
+  <> dc:title "Ciao" .
+}
+WHERE {
+  <> dc:title ?t .
+}
+
+
+

+ 3 - 0
tests/siege.txt

@@ -0,0 +1,3 @@
+# Use with Siege, e.g.: siege -f tests/siege.txt -t25 -c8
+
+http://localhost:8000/ldp/pomegranate