7 tahun lalu · 8ebb7f94a4
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -9,6 +9,13 @@ commands (i.e. they are in the virtualenv ``PATH``).
 
				 The tools are currently not directly available on Docker instances (*TODO add
			
 
				 instructions and/or code changes to access them*).
			
 
				 
			
 
				+``lsup-server``
			
 
				+---------------
			
 
				+
			
 
				+Single-threaded server. Use this for testing, debugging, or to multiplex via
			
 
				+WSGI in a Windows environment. For non-Windows production environments, use
			
 
				+``fcrepo``.
			
 
				+
			
 
				 ``fcrepo``
			
 
				 ----------
			
 
				 
			
@@ -27,25 +34,34 @@ In the case an init script is used, ``coilmq`` (belonging to a 3rd party
 
				 package) needs to be launched as well; unless a message broker is already set
			
 
				 up, or if messaging is disabled in the configuration.
			
 
				 
			
 
				+**Note:** This command is not available in Windows because GUnicorn is not
			
 
				+available in Windows. Windows users should look for alternative WSGI servers
			
 
				+to run the single-threaded service (``lsup-server``) over multiple processes
			
 
				+and/or threads.
			
 
				+
			
 
				+**Note:** This is the only command line tool that is not added to the ``PATH``
			
 
				+environment variable in Unix systems (beecause it is not cross-platform). It
			
 
				+must be invoked by using its full path.
			
 
				+
			
 
				 ``lsup-admin``
			
 
				 --------------
			
 
				 
			
 
				 ``lsup-admin`` is the principal repository management tool. It is
			
 
				 self-documented, so this is just a redundant overview::
			
 
				 
			
 
				-    $ lsup-admin
			
 
				-    Usage: lsup-admin [OPTIONS] COMMAND [ARGS]...
			
 
				+   $ lsup-admin
			
 
				+   Usage: lsup-admin [OPTIONS] COMMAND [ARGS]...
			
 
				 
			
 
				-    Options:
			
 
				-      --help  Show this message and exit.
			
 
				+   Options:
			
 
				+     --help  Show this message and exit.
			
 
				 
			
 
				-    Commands:
			
 
				-      bootstrap     Bootstrap binary and graph stores.
			
 
				-      check_fixity  [STUB] Check fixity of a resource.
			
 
				-      check_refint  Check referential integrity.
			
 
				-      cleanup       [STUB] Clean up orphan database items.
			
 
				-      migrate       Migrate an LDP repository to Lakesuperior.
			
 
				-      stats         Print repository statistics.
			
 
				+   Commands:
			
 
				+     bootstrap     Bootstrap binary and graph stores.
			
 
				+     check_fixity  [STUB] Check fixity of a resource.
			
 
				+     check_refint  Check referential integrity.
			
 
				+     cleanup       [STUB] Clean up orphan database items.
			
 
				+     migrate       Migrate an LDP repository to Lakesuperior.
			
 
				+     stats         Print repository statistics.
			
 
				 
			
 
				 All entries marked ``[STUB]`` are not yet implemented, however the
			
 
				 ``lsup_admin <command> --help`` command will issue a description of what
			
@@ -59,18 +75,38 @@ native Python API.
 
				 ``lsup-benchmark``
			
 
				 ------------------
			
 
				 
			
 
				-``lsup-benchmark`` is used to run performance tests in a predictable way.
			
 
				-
			
 
				-The command has no options but prompts the user for a few settings
			
 
				-interactively (N.B. this may change in favor of parameters).
			
 
				+This command is used to run performance tests in a predictable way.
			
 
				+
			
 
				+The command line options can be queried with the ``--help`` option::
			
 
				+
			
 
				+   Usage: lsup-benchmark [OPTIONS]
			
 
				+
			
 
				+   Options:
			
 
				+   -e, --endpoint TEXT       LDP endpoint. Default: http://localhost:8000/ldp
			
 
				+   -c, --count INTEGER       Number of resources to ingest. Default: {def_ct}
			
 
				+   -p, --parent TEXT         Path to the container resource under which the new
			
 
				+                             resources will be created. It must begin with a
			
 
				+                             slash (`/`) character. Default: /pomegranate
			
 
				+   -d, --delete-container    Delete container resource and its children if
			
 
				+                             already existing. By default, the container is not
			
 
				+                             deleted and new resources are added to it.
			
 
				+   -m, --method TEXT         HTTP method to use. Case insensitive. Either PUT
			
 
				+                             or POST. Default: PUT
			
 
				+   -s, --graph-size INTEGER  Number of triples in each graph. Default: 200
			
 
				+   -t, --resource-type TEXT  Type of resources to ingest. One of `r` (only LDP-
			
 
				+                             RS, i.e. RDF), `n` (only  LDP-NR, i.e. binaries),
			
 
				+                             or `b` (50/50% of both). Default: r
			
 
				+   -p, --plot                Plot a graph of ingest timings. The graph figure
			
 
				+                             is displayed on screen with basic manipulation and
			
 
				+                             save options.
			
 
				+   --help                    Show this message and exit.
			
 
				 
			
 
				 The benchmark tool is able to create RDF sources, or non-RDF, or an equal mix
			
 
				-of them, via POST or PUT, in the currently running Lakesuperior server. It
			
 
				-runs single-threaded.
			
 
				+of them, via POST or PUT, in a given lDP endpoint. It runs single threaded.
			
 
				 
			
 
				 The RDF sources are randomly generated graphs of consistent size and
			
 
				 complexity. They include a mix of in-repository references, literals, and
			
 
				-external URIs. Each graph has 200 triples.
			
 
				+external URIs. Each graph has 200 triples by default.
			
 
				 
			
 
				 The non-RDF sources are randomly generated 1024x1024 pixel PNG images.
			
 
				 
			
--- a/lakesuperior/util/benchmark.py
+++ b/lakesuperior/util/benchmark.py
@@ -1,12 +1,15 @@
 
				-#!/usr/bin/env python
			
 
				+#!/usr/bin/env python3
			
 
				+
			
 
				 import sys
			
 
				-sys.path.append('.')
			
 
				 
			
 
				 from uuid import uuid4
			
 
				 
			
 
				 import arrow
			
 
				+import click
			
 
				 import requests
			
 
				 
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				 from lakesuperior.util.generators import (
			
 
				         random_image, random_graph, random_utf8_string)
			
 
				 
			
@@ -14,49 +17,82 @@ __doc__ = '''
 
				 Benchmark script to measure write performance.
			
 
				 '''
			
 
				 
			
 
				-default_n = 10000
			
 
				-#webroot = 'http://localhost:8080/rest'
			
 
				-webroot = 'http://localhost:8000/ldp'
			
 
				-#webroot = 'http://localhost:5000/ldp'
			
 
				-container_uri = webroot + '/pomegranate'
			
 
				-
			
 
				-def run():
			
 
				-    sys.stdout.write('How many children? [{}] >'.format(default_n))
			
 
				-    choice = input().lower()
			
 
				-    n = int(choice) if choice else default_n
			
 
				-
			
 
				-    sys.stdout.write('Delete container? [n] >')
			
 
				-    choice = input().lower()
			
 
				-    del_cont = choice or 'n'
			
 
				-
			
 
				-    sys.stdout.write('POST or PUT? [PUT] >')
			
 
				-    choice = input().lower()
			
 
				-    if choice and choice.lower() not in ('post', 'put'):
			
 
				-        raise ValueError('Not a valid verb.')
			
 
				-    method = choice.lower() or 'put'
			
 
				-
			
 
				-    sys.stdout.write('RDF Sources (r), Non-RDF (n), or Both 50/50 (b)? [r] >')
			
 
				-    choice = input().lower()
			
 
				-    res_type = choice or 'r'
			
 
				-
			
 
				-    if del_cont  == 'y':
			
 
				+def_endpoint = 'http://localhost:8000/ldp'
			
 
				+def_ct = 10000
			
 
				+def_parent = '/pomegranate'
			
 
				+def_gr_size = 200
			
 
				+
			
 
				+
			
 
				+@click.command()
			
 
				+@click.option(
			
 
				+    '--endpoint', '-e', default=def_endpoint,
			
 
				+    help=f'LDP endpoint. Default: {def_endpoint}')
			
 
				+@click.option(
			
 
				+    '--count', '-c', default=def_ct,
			
 
				+    help='Number of resources to ingest. Default: {def_ct}')
			
 
				+@click.option(
			
 
				+    '--parent', '-p', default=def_parent,
			
 
				+    help='Path to the container resource under which the new resources will be '
			
 
				+        'created. It must begin with a slash (`/`) character. '
			
 
				+        f'Default: {def_parent}')
			
 
				+@click.option(
			
 
				+    '--delete-container', '-d', is_flag=True,
			
 
				+    help='Delete container resource and its children if already existing. By '
			
 
				+    'default, the container is not deleted and new resources are added to it.')
			
 
				+@click.option(
			
 
				+    '--method', '-m', default='put',
			
 
				+    help='HTTP method to use. Case insensitive. Either PUT '
			
 
				+    f'or POST. Default: PUT')
			
 
				+@click.option(
			
 
				+    '--graph-size', '-s', default=def_gr_size,
			
 
				+    help=f'Number of triples in each graph. Default: {def_gr_size}')
			
 
				+@click.option(
			
 
				+    '--resource-type', '-t', default='r',
			
 
				+    help='Type of resources to ingest. One of `r` (only LDP-RS, i.e. RDF), '
			
 
				+    '`n` (only  LDP-NR, i.e. binaries), or `b` (50/50% of both). '
			
 
				+    'Default: r')
			
 
				+@click.option(
			
 
				+    '--graph', '-g', is_flag=True, help='Plot a graph of ingest timings. '
			
 
				+    'The graph figure is displayed on screen with basic manipulation and save '
			
 
				+    'options.')
			
 
				+
			
 
				+def run(
			
 
				+        endpoint, count, parent, method, delete_container,
			
 
				+        graph_size, resource_type, graph):
			
 
				+
			
 
				+    container_uri = endpoint + parent
			
 
				+
			
 
				+    method = method.lower()
			
 
				+    if method not in ('post', 'put'):
			
 
				+        raise ValueError(f'HTTP method not supported: {method}')
			
 
				+
			
 
				+    if delete_container:
			
 
				         requests.delete(container_uri, headers={'prefer': 'no-tombstone'})
			
 
				     requests.put(container_uri)
			
 
				 
			
 
				-    print('Inserting {} children.'.format(n))
			
 
				+    print(f'Inserting {count} children under {container_uri}.')
			
 
				 
			
 
				-    # URI used to establish an in-repo relationship.
			
 
				+    # URI used to establish an in-repo relationship. This is set to
			
 
				+    # the most recently created resource in each loop.
			
 
				     ref = container_uri
			
 
				-    size = 200 # Size of graph.
			
 
				 
			
 
				     wclock_start = arrow.utcnow()
			
 
				+    if graph:
			
 
				+        print('Results will be plotted.')
			
 
				+        # Plot coordinates: X is request count, Y is request timing.
			
 
				+        px = []
			
 
				+        py = []
			
 
				+        plt.xlabel('Requests')
			
 
				+        plt.ylabel('ms per request')
			
 
				+        plt.title('FCREPO Benchmark')
			
 
				+
			
 
				     try:
			
 
				-        for i in range(1, n + 1):
			
 
				+        for i in range(1, count + 1):
			
 
				             url = '{}/{}'.format(container_uri, uuid4()) if method == 'put' \
			
 
				                     else container_uri
			
 
				 
			
 
				-            if res_type == 'r' or (res_type == 'b' and i % 2 == 0):
			
 
				-                data = random_graph(size, ref).serialize(format='ttl')
			
 
				+            if resource_type == 'r' or (resource_type == 'b' and i % 2 == 0):
			
 
				+                data = random_graph(graph_size, ref).serialize(format='ttl')
			
 
				                 headers = {'content-type': 'text/turtle'}
			
 
				             else:
			
 
				                 img = random_image(name=uuid4(), ts=16, ims=512)
			
@@ -81,10 +117,17 @@ def run():
 
				             rsp.raise_for_status()
			
 
				             ref = rsp.headers['location']
			
 
				             if i % 10 == 0:
			
 
				+                avg10 = (tcounter - prev_tcounter) / 10
			
 
				                 print(
			
 
				                     f'Record: {i}\tTime elapsed: {tcounter}\t'
			
 
				-                    f'Per resource: {(tcounter - prev_tcounter) / 10}')
			
 
				+                    f'Per resource: {avg10}')
			
 
				                 prev_tcounter = tcounter
			
 
				+
			
 
				+                if graph:
			
 
				+                    px.append(i)
			
 
				+                    # Divide by 1000 for µs → ms
			
 
				+                    py.append(avg10.microseconds // 1000)
			
 
				+
			
 
				     except KeyboardInterrupt:
			
 
				         print('Interrupted after {} iterations.'.format(i))
			
 
				 
			
@@ -93,5 +136,20 @@ def run():
 
				     print(f'Total time spent ingesting resources: {tcounter}')
			
 
				     print(f'Average time per resource: {tcounter.total_seconds()/i}')
			
 
				 
			
 
				+    if graph:
			
 
				+        if resource_type == 'r':
			
 
				+            type_label = 'LDP-RS'
			
 
				+        elif resource_type == 'n':
			
 
				+            type_label = 'LDP-NR'
			
 
				+        else:
			
 
				+            type_label = 'LDP-RS + LDP-NR'
			
 
				+        label = (
			
 
				+            f'{container_uri}; {method.upper()}; {graph_size} trp/graph; '
			
 
				+            f'{type_label}')
			
 
				+        plt.plot(px, py, label=label)
			
 
				+        plt.legend()
			
 
				+        plt.show()
			
 
				+
			
 
				+
			
 
				 if __name__ == '__main__':
			
 
				     run()