# Copyright (c) 2009, Digital Enterprise Research Institute (DERI),
# NUI Galway
# All rights reserved.
# author: Cosmin Basca
# email: cosmin.basca@gmail.com
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with
# the distribution.
# * Neither the name of DERI nor the
# names of its contributors may be used to endorse or promote
# products derived from this software without specific prior
# written permission.
# THIS SOFTWARE IS PROVIDED BY DERI ''AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DERI BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
# OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.
# -*- coding: utf-8 -*-
import logging
from datetime import datetime, date, time
import decimal
import re
from urlparse import urlparse
from uuid import uuid4
from surf.namespace import get_namespace, get_namespace_url
from surf.namespace import get_fallback_namespace, SURF
from surf.rdf import BNode, Literal, Namespace, URIRef
__author__ = 'Cosmin Basca'
# ----------------------------------------------------------------------------------------------------------------------
#
# module constants
#
# ----------------------------------------------------------------------------------------------------------------------
#: the attribute regex pattern representing a direct edge or property: {{ATTRIBUTE_NAME}}
pattern_direct = re.compile('^[a-z0-9]+_[a-zA-Z0-9_\-]+$', re.DOTALL)
#: the attribute regex pattern representing an inverse edge or property: is_{{ATTRIBUTE_NAME}}_of
pattern_inverse = re.compile('^is_[a-z0-9]+_[a-zA-Z0-9_\-]+_of$', re.DOTALL)
DE_CAMEL_CASE_DEFAULT = 2 ** 0
DE_CAMEL_CASE_FORCE_LOWER_CASE = 2 ** 1
pattern = re.compile('([A-Z][A-Z][a-z])|([a-z][A-Z])')
# ----------------------------------------------------------------------------------------------------------------------
#
# module functions
#
# ----------------------------------------------------------------------------------------------------------------------
[docs]def namespace_split(uri):
"""
Same as :func:`uri_split`, but instead of the base of the uri, returns the
registered `namespace` for this uri
.. code-block:: python
>>> print namespace_split('http://mynamespace/ns#some_property')
(rdflib.URIRef('http://mynamespace/ns#'), 'some_property')
:param str uri: the uri
:return: a (namespace, predicate) tuple. Types: (:class:`rdflib.term.URIRef`, str)
:rtype: tuple
"""
sp = '#' if uri.rfind('#') != -1 else '/'
base, predicate = uri.rsplit(sp, 1)
return get_namespace('%s%s' % (base, sp))[1], predicate
[docs]def uri_split(uri):
"""
Split the `uri` into base path and remainder,
the base is everything that comes before the last *#*' or */* including it
.. code-block:: python
>>> print uri_split('http://mynamespace/ns#some_property')
('NS1', 'some_property')
:param uri: the uri
:type uri: :class:`rdflib.term.URIRef` or basestring
:return: a (base, remainder) tuple. Types: (str, str)
:rtype: tuple
"""
sp = '#' if uri.rfind('#') != -1 else'/'
base, predicate = uri.rsplit(sp, 1)
return get_namespace('%s%s' % (base, sp))[0], predicate
[docs]def uri_to_classname(uri):
"""
Handy function to convert a `uri` to a Python valid `class name`
.. code-block:: python
>>> # prints Ns1some_class, where Ns1 is the namespace (not registered, assigned automatically)
>>> print uri_to_classname('http://mynamespace/ns#some_class')
Ns1some_class
:param str uri: the uri
:return: a valid python class name for the given uri
:rtype: str
"""
ns_key, predicate = uri_split(uri)
return '%s%s' % (ns_key.title().replace('-', '_'), predicate)
[docs]def attr2rdf(attr_name):
"""
Convert an `attribute name` in the form:
.. code-block:: python
# direct predicate
instance1.foaf_name
# inverse predicate
instance2.if_foaf_title_of
to
.. code-block:: xml
<!-- direct predicate -->
<http://xmlns.com/foaf/spec/#term_name>
<!-- inverse predicate -->
<http://xmlns.com/foaf/spec/#term_title>
:param str attr_name: the attribute name to convert to *RDF*
:return: a (uri representation, True if it's a direct predicate or False if its an inverse predicate) tuple.
:rtype: tuple
"""
def to_rdf(attr_name):
prefix, predicate = attr_name.split('_', 1)
ns = get_namespace_url(prefix)
try:
return ns[predicate]
except:
return None
if pattern_inverse.match(attr_name):
return to_rdf(attr_name.replace('is_', '').replace('_of', '')), False
elif pattern_direct.match(attr_name):
return to_rdf(attr_name), True
return None, None
[docs]def rdf2attr(uri, direct):
"""
Inverse of `attr2rdf`, return the attribute name, given the `uri` and whether it is `direct` or not.
.. code-block:: python
>>> print rdf2attr('http://xmlns.com/foaf/spec/#term_name',True)
foaf_name
>>> print rdf2attr('http://xmlns.com/foaf/spec/#term_title',False)
if_foaf_title_of
:param uri: the given `uri`
:type uri: :class:`rdflib.term.URIRef` or str
:param bool direct: whether this is a direct or inverse edge or property
:return: the python attribute name
:rtype: str
"""
ns, predicate = uri_split(uri)
attribute = '%s_%s' % (ns.lower(), predicate)
return direct and attribute or 'is_%s_of' % attribute
[docs]def is_attr_direct(attr_name):
"""
Checks whether this is a direct or inverse edge / property. The naming convention defined by the
:attr:`pattern_direct` and :attr:`pattern_inverse` regex patterns.
.. code-block:: python
>>> is_attr_direct('foaf_name')
True
>>> is_attr_direct('is_foaf_name_of')
False
:param str attr_name: the attribute name to convert to *RDF*
:return: True if `attr_name` is a direct edge / property
:rtype: bool
"""
return not pattern_inverse.match(attr_name)
[docs]def uri_to_class(uri):
"""
returns a `class object` from the supplied `uri`. A valid class name is retrieved using the
:func:`uri_to_classname` method.
.. code-block:: python
>>> print uri_to_class('http://mynamespace/ns#some_class')
surf.util.Ns1some_class
:param str uri: the given `uri`
:return: the python class for the given `uri`
:rtype: type
"""
return type(str(uri_to_classname(uri)), (), {'uri': uri})
[docs]def uuid_subject(namespace=None):
"""
This function generates a unique subject in the provided `namespace` based on the :func:`uuid.uuid4()` method,
If `namespace` is not specified than the default `SURF` namespace is used
.. code-block:: python
>>> from surf import namespace as ns
>>> print uuid_subject(ns.SIOC)
http://rdfs.org/sioc/ns#1b6ca1d5-41ed-4768-b86a-42185169faff
:param namespace: the given namespace
:type namespace: None or :class:`rdflib.namespace.Namespace` or str or unicode
:return: the *RDF* subject identifier in the specified namespace
:rtype: :class:`rdflib.term.URIRef`
"""
if not namespace:
namespace = get_fallback_namespace()
if not isinstance(namespace, Namespace):
namespace = Namespace(namespace)
return namespace[str(uuid4())]
[docs]def de_camel_case(camel_case, delim=' ', method=DE_CAMEL_CASE_FORCE_LOWER_CASE):
"""
Adds spaces to a camel case string. Failure to space out string returns the original string.
:param str camel_case: the camel cased string
:param str delim: the delimiter
:param int method: the method
:return: the normalized string
:rtype: str
"""
if camel_case is None:
return None
def normalize(string):
if method == DE_CAMEL_CASE_FORCE_LOWER_CASE:
return string.lower()
return string
return normalize(pattern.sub(lambda m: m.group()[:1] + delim + m.group()[1:], camel_case))
[docs]def is_uri(uri):
"""
Checks whether the given `uri` is a *URI* reference
:param str uri: the given `uri`
:return: True if a *URI* reference
:rtype: bool
"""
scheme, netloc, path, params, query, fragment = urlparse(uri)
if scheme and netloc and path:
return True
return False
[docs]def pretty_rdf(uri):
"""
Returns a string of the given URI under the form `namespace:symbol`, if `namespace` is registered,
else returns an empty string
:param str uri: the given `uri`
:return: the python prettified `uri` representation
:rtype: str
"""
if hasattr(uri, 'subject'):
uri = uri.subject
if type(uri) is URIRef:
NS, symbol = uri_split(uri)
if unicode(NS).startswith('NS'):
pretty = symbol
else:
pretty = NS.lower() + ':' + symbol
return pretty
return ''
[docs]def value_to_rdf(value):
"""
Convert the value to an :mod:`rdflib` compatible type if appropriate.
:param object value: the value
:return: the converted value (if possible)
:rtype: :class:`rdflib.term.Literal` or :class:`rdflib.term.BNode` or :class:`rdflib.term.URIRef` or object
"""
if isinstance(value, (URIRef, BNode)):
return value
elif isinstance(value, (basestring, str, unicode, float, int, long, bool, datetime, date, time, decimal.Decimal)):
if type(value) is basestring and string_conforms_to_base64(value):
return Literal(value, datatype=URIRef('http://www.w3.org/2001/XMLSchema#base64Binary'))
return Literal(value)
elif isinstance(value, (list, tuple)):
language = value[1] if len(value) > 1 else None
datatype = value[2] if len(value) > 2 else None
return Literal(value[0], lang=language, datatype=datatype)
elif isinstance(value, dict):
val = value.get("value")
language = value.get("language")
datatype = value.get("datatype")
if val:
return Literal(val, lang=language, datatype=datatype)
return value
return value
[docs]def json_to_rdflib(json_object):
"""
Convert a json result entry to an :mod:`rdfLib` type.
:param dict json_object: the *JSON* object
:return: the converted value (if possible)
:rtype: :class:`rdflib.term.Literal` or :class:`rdflib.term.BNode` or :class:`rdflib.term.URIRef` or None
"""
try:
type = json_object["type"]
except KeyError:
raise ValueError("No type specified")
if type == 'uri':
return URIRef(json_object["value"])
elif type == 'literal':
if "xml:lang" in json_object:
return Literal(json_object["value"], lang=json_object['xml:lang'])
else:
return Literal(json_object["value"])
elif type == 'typed-literal':
return Literal(json_object["value"], datatype=URIRef(json_object['datatype']))
elif type == 'bnode':
return BNode(json_object["value"])
else:
return None
[docs]class Single(object):
"""
Descriptor for easy access to attributes with single value.
"""
def __init__(self, attr):
if isinstance(attr, URIRef):
attr = rdf2attr(attr, True)
self.attr = attr
def __get__(self, obj, type=None):
return getattr(obj, self.attr).first
def __set__(self, obj, value):
setattr(obj, self.attr, value)
def __delete__(self, obj):
setattr(obj, self.attr, [])
[docs]def single(attr):
"""
alias for :class:`Single`
:param attr: the given attribute
:type attr: :class:`rdflib.term.URIRef` or str
:return: a :class:`Single` instance
:rtype: :class:`Single`
"""
return Single(attr)