swh.indexer.metadata_dictionary.npm module#
- class swh.indexer.metadata_dictionary.npm.NpmMapping(log_suffix='')[source]#
Bases:
JsonMapping
,SingleFileIntrinsicMapping
dedicated class for NPM (package.json) mapping and translation
- name = 'npm'#
- mapping = {'author': rdflib.term.URIRef('http://schema.org/author'), 'author.email': rdflib.term.URIRef('http://schema.org/email'), 'author.name': rdflib.term.URIRef('http://schema.org/name'), 'bugs': rdflib.term.URIRef('https://codemeta.github.io/terms/issueTracker'), 'contributors': rdflib.term.URIRef('http://schema.org/contributor'), 'cpu': rdflib.term.URIRef('http://schema.org/processorRequirements'), 'description': rdflib.term.URIRef('http://schema.org/description'), 'engines': rdflib.term.URIRef('http://schema.org/runtimePlatform'), 'homepage': rdflib.term.URIRef('http://schema.org/url'), 'keywords': rdflib.term.URIRef('http://schema.org/keywords'), 'license': rdflib.term.URIRef('http://schema.org/license'), 'name': rdflib.term.URIRef('http://schema.org/name'), 'os': rdflib.term.URIRef('http://schema.org/operatingSystem'), 'repository': rdflib.term.URIRef('http://schema.org/codeRepository'), 'version': rdflib.term.URIRef('http://schema.org/version')}#
- string_fields: List[str] = ['name', 'version', 'description', 'email']#
List of fields that are simple strings, and don’t need any normalization.
- uri_fields: List[str] = ['homepage']#
List of fields that are simple URIs, and don’t need any normalization.
- normalize_repository(d)[source]#
https://docs.npmjs.com/cli/v11/configuring-npm/package-json#repository
>>> NpmMapping().normalize_repository({ ... 'type': 'git', ... 'url': 'https://example.org/foo.git' ... }) rdflib.term.URIRef('git+https://example.org/foo.git') >>> NpmMapping().normalize_repository( ... 'gitlab:foo/bar') rdflib.term.URIRef('git+https://gitlab.com/foo/bar.git') >>> NpmMapping().normalize_repository( ... 'foo/bar') rdflib.term.URIRef('git+https://github.com/foo/bar.git')
- normalize_bugs(d)[source]#
https://docs.npmjs.com/cli/v11/configuring-npm/package-json#bugs
>>> NpmMapping().normalize_bugs({ ... 'url': 'https://example.org/bugs/', ... 'email': '[email protected]' ... }) rdflib.term.URIRef('https://example.org/bugs/') >>> NpmMapping().normalize_bugs( ... 'https://example.org/bugs/') rdflib.term.URIRef('https://example.org/bugs/')
- translate_author(graph: Graph, root, d)[source]#
-
>>> from pprint import pprint >>> root = URIRef("http://example.org/test-software") >>> graph = Graph() >>> NpmMapping().translate_author(graph, root, { ... 'name': 'John Doe', ... 'email': '[email protected]', ... 'url': 'https://example.org/~john.doe', ... }) >>> prettyprint_graph(graph, root) { "@id": ..., "http://schema.org/author": { "@list": [ { "@type": "http://schema.org/Person", "http://schema.org/email": "[email protected]", "http://schema.org/name": "John Doe", "http://schema.org/url": { "@id": "https://example.org/~john.doe" } } ] } } >>> graph = Graph() >>> NpmMapping().translate_author(graph, root, ... 'John Doe <[email protected]> (https://example.org/~john.doe)' ... ) >>> prettyprint_graph(graph, root) { "@id": ..., "http://schema.org/author": { "@list": [ { "@type": "http://schema.org/Person", "http://schema.org/email": "[email protected]", "http://schema.org/name": "John Doe", "http://schema.org/url": { "@id": "https://example.org/~john.doe" } } ] } } >>> graph = Graph() >>> NpmMapping().translate_author(graph, root, { ... 'name': 'John Doe', ... 'email': '[email protected]', ... 'url': 'https:\\\\example.invalid/~john.doe', ... }) >>> prettyprint_graph(graph, root) { "@id": ..., "http://schema.org/author": { "@list": [ { "@type": "http://schema.org/Person", "http://schema.org/email": "[email protected]", "http://schema.org/name": "John Doe" } ] } }
- normalize_description(description)[source]#
Try to re-decode
description
as UTF-16, as this is a somewhat common mistake that causes issues in the database because of null bytes in JSON.>>> NpmMapping().normalize_description("foo bar") rdflib.term.Literal('foo bar') >>> NpmMapping().normalize_description( ... "\ufffd\ufffd#\x00 \x00f\x00o\x00o\x00 \x00b\x00a\x00r\x00\r\x00 \x00" ... ) rdflib.term.Literal('foo bar') >>> NpmMapping().normalize_description( ... "\ufffd\ufffd\x00#\x00 \x00f\x00o\x00o\x00 \x00b\x00a\x00r\x00\r\x00 " ... ) rdflib.term.Literal('foo bar') >>> NpmMapping().normalize_description( ... # invalid UTF-16 and meaningless UTF-8: ... "\ufffd\ufffd\x00#\x00\x00\x00 \x00\x00\x00\x00f\x00\x00\x00\x00" ... ) is None True >>> NpmMapping().normalize_description( ... # ditto (ut looks like little-endian at first) ... "\ufffd\ufffd#\x00\x00\x00 \x00\x00\x00\x00f\x00\x00\x00\x00\x00" ... ) is None True >>> NpmMapping().normalize_description(None) is None True
- normalize_license(s)[source]#
https://docs.npmjs.com/cli/v11/configuring-npm/package-json#license
>>> NpmMapping().normalize_license('MIT') rdflib.term.URIRef('https://spdx.org/licenses/MIT')
- normalize_keywords(lst)[source]#
https://docs.npmjs.com/cli/v11/configuring-npm/package-json#homepage
>>> NpmMapping().normalize_keywords(['foo', 'bar']) [rdflib.term.Literal('foo'), rdflib.term.Literal('bar')]