浏览代码

Update LC performance tests and some doc details.

scossu 2 周之前
父节点
当前提交
0001c8ab00

+ 6 - 5
README.md

@@ -153,15 +153,16 @@ be used. The directory must exist.
 If unspecified, it is set to 3.
 
 `LSUP_MDB_MAPSIZE` Virtual memory map size. It is recommended to leave this
-alone. By default, it is set to 1Tb for 64-bit systems and 4Gb for 32-bit
-systems. The map size by itself does not use up any extra resources.
+alone, unless you are running Valgrind or other tools that limit memory usage.
+The map size by itself does not preallocate any resources and is safe to
+increase beyond the physical capacity of the host system. By default, it is set
+to 1Tb for 64-bit systems and 4Gb for 32-bit systems.
 
 
 ### C API Documentation
 
-Almost all header files are documented. Run `doxygen` (see
-[Doxygen](https://www.doxygen.nl/index.html)) to generate HTML documentation in
-`docs/html`.
+Run `doxygen` (see [Doxygen](https://www.doxygen.nl/index.html)) to generate
+HTML documentation in `docs/html`.
 
 
 ### Python API Documentation

+ 16 - 18
docs/dev/NOTES.md

@@ -1,32 +1,30 @@
 # Performance test data
 
-VMWare running ArchLinux on MacBook Pro, 4-core Intel(R) Core(TM) i7-1068NG7
-CPU @ 2.30GHz, 8Gb RAM, SSD
+ArchLinux on a Dell Optiplex 3020 workstation, 4-core Intel(R) Core(TM) i5-4590
+CPU @ 3.30GHz, 16Gb RAM, SSD
 
-Decode
-LoC Children's Subjects SKOS/RDF
+Decode LoC Children's Subjects SKOS/RDF
 [NT](https://id.loc.gov/download/authorities/childrensSubjects.skosrdf.nt.gz)
 and
 [TTL](https://id.loc.gov/download/authorities/childrensSubjects.skosrdf.ttl.gz)
 (379,163 triples):
 
-NT format: 19s
-TTL format: 25s
+NT format: 1.08"
+TTL format: 1.6"
 
-Decode LoC Subject Headings (10,116,071 triples)
-
-NT: 8'33"
-TTL: 11'4"
+Decode LoC Subject Headings
+[NT](https://id.loc.gov/download/authorities/subjects.skosrdf.nt.gz)
+and
+[TTL](https://id.loc.gov/download/authorities/subjects.skosrdf.ttl.gz)
+(10,116,071 triples):
 
-TTL takes 29%-31% longer than NT.
+NT: 29.5"
+TTL: 43.2"
 
-Script (replace `<file path>` and `<format>` accordingly):
+These measurements have been taken using Python scripts in the `test` folder:
 
 ```
-from lsup_rdf import env_init
-from lsup_rdf.graph import Graph
-
-env_init()
-with open("<file path>") as fh:
-    gr = Graph.from_rdf(fh, "<format>")
+time python test/massif<n>_<nt|ttl>.py
 ```
+
+Each script simply parses the RDF file and loads it into an in-memory graph.

+ 32 - 32
docs/dev/deps.dot

@@ -5,45 +5,45 @@ digraph "source tree" {
     fontsize="16";
     fontname="Helvetica";
 	clusterrank="local";
-	"py_lsup_rdf" -> "py_graph"
-	"term" -> "namespace"
-	"store" -> "store_mdb"
-	"store_htable" -> "buffer"
-	"namespace" -> "hashmap"
-	"profile" -> "lsup_rdf"
-	"py_graph" -> "codec_ttl"
+	"environment" -> "term"
+	"lsup_rdf" -> "codec_ttl"
 	"store_mdb" -> "store_interface"
-	"py_term" -> "py_namespace"
 	"grammar_nt" -> "codec"
-	"py_namespace" -> "namespace"
-	"term" -> "buffer"
-	"parser_nt" -> "codec"
-	"term" -> "tpl"
-	"store_mdb" -> "buffer"
-	"environment" -> "bootstrap"
+	"lsup_rdf" -> "codec_nt"
+	"namespace" -> "hashmap"
 	"py_graph" -> "graph"
-	"lsup_rdf" -> "codec_ttl"
-	"graph" -> "store"
-	"parser_nt" -> "tokens_nt"
-	"graph" -> "environment"
-	"py_term" -> "term"
-	"codec_ttl" -> "parser_ttl"
-	"store_htable" -> "hashmap"
-	"grammar_ttl" -> "codec"
+	"codec_nt" -> "parser_nt"
 	"buffer" -> "core"
+	"term" -> "buffer"
 	"graph" -> "term"
-	"lsup_rdf" -> "codec_nt"
+	"store_htable" -> "store_interface"
+	"py_graph" -> "codec_ttl"
+	"parser_ttl" -> "tokens_ttl"
+	"store_htable" -> "hashmap"
 	"py_graph" -> "codec_nt"
-	"core" -> "log"
-	"py_triple" -> "py_term"
+	"py_lsup_rdf" -> "py_graph"
 	"py_graph" -> "py_triple"
-	"codec_nt" -> "parser_nt"
-	"codec" -> "graph"
-	"store" -> "store_htable"
+	"store_htable" -> "buffer"
+	"store" -> "store_mdb"
+	"codec_ttl" -> "parser_ttl"
+	"term" -> "tpl"
 	"store_interface" -> "environment"
-	"parser_ttl" -> "codec"
-	"store_htable" -> "store_interface"
 	"namespace" -> "core"
-	"environment" -> "term"
-	"parser_ttl" -> "tokens_ttl"
+	"store" -> "store_htable"
+	"grammar_ttl" -> "codec"
+	"py_term" -> "py_namespace"
+	"graph" -> "environment"
+	"term" -> "namespace"
+	"store_mdb" -> "buffer"
+	"graph" -> "store"
+	"core" -> "log"
+	"parser_ttl" -> "codec"
+	"py_triple" -> "py_term"
+	"parser_nt" -> "codec"
+	"profile" -> "lsup_rdf"
+	"codec" -> "graph"
+	"parser_nt" -> "tokens_nt"
+	"py_namespace" -> "namespace"
+	"environment" -> "bootstrap"
+	"py_term" -> "term"
 }

二进制
docs/dev/deps.pdf


二进制
docs/massif_decode_loc_childrensSubjects_skos_nt.png


二进制
docs/massif_decode_loc_childrensSubjects_skos_ttl.png


二进制
docs/massif_decode_loc_subjects_skos_nt.png


二进制
docs/massif_decode_loc_subjects_skos_ttl.png


+ 1 - 1
include/graph.h

@@ -195,7 +195,7 @@ LSUP_GraphIterator *
 LSUP_graph_add_init_txn (void *txn, LSUP_Graph *gr);
 
 
-/// Non-transactional version of #LSUP_graph_init_txn.
+/// Non-transactional version of #LSUP_graph_init_txn().
 #define LSUP_graph_add_init(...) LSUP_graph_add_init_txn (NULL, __VA_ARGS__)
 
 

+ 6 - 0
test/massif1_nt.py

@@ -0,0 +1,6 @@
+from lsup_rdf import env_init
+from lsup_rdf.graph import Graph
+
+env_init()
+with open("/data/lsup/loc/childrensSubjects.skosrdf.nt") as fh:
+    gr = Graph.from_rdf(fh, "nt")

+ 6 - 0
test/massif1_ttl.py

@@ -0,0 +1,6 @@
+from lsup_rdf import env_init
+from lsup_rdf.graph import Graph
+
+env_init()
+with open("/data/lsup/loc/childrensSubjects.skosrdf.ttl") as fh:
+    gr = Graph.from_rdf(fh, "ttl")

+ 6 - 0
test/massif2_nt.py

@@ -0,0 +1,6 @@
+from lsup_rdf import env_init
+from lsup_rdf.graph import Graph
+
+env_init()
+with open("/data/lsup/loc/subjects.skosrdf.nt") as fh:
+    gr = Graph.from_rdf(fh, "nt")

+ 6 - 0
test/massif2_ttl.py

@@ -0,0 +1,6 @@
+from lsup_rdf import env_init
+from lsup_rdf.graph import Graph
+
+env_init()
+with open("/data/lsup/loc/subjects.skosrdf.ttl") as fh:
+    gr = Graph.from_rdf(fh, "ttl")