"""
R help system.
"""
import os
from collections import namedtuple
import re
import sqlite3
import typing
import warnings
import rpy2.rinterface as rinterface
from rpy2.rinterface import StrSexpVector
from rpy2.robjects.packages_utils import (get_packagepath,
_libpaths,
_packages)
from collections import OrderedDict
tmp = rinterface.baseenv['R.Version']()
tmp_major = int(tmp[tmp.do_slot('names').index('major')][0])
tmp_minor = float(tmp[tmp.do_slot('names').index('minor')][0])
readRDS = rinterface.baseenv['readRDS']
del tmp
del tmp_major
del tmp_minor
_eval = rinterface.baseenv['eval']
NON_UNIQUE_TAGS = set((r'\alias', r'\keyword', r'\section'))
def quiet_require(name: str, lib_loc: typing.Optional[str] = None) -> bool:
""" Load an R package /quietly/ (suppressing messages to the console). """
if lib_loc is None:
lib_loc = "NULL"
expr_txt = ('suppressPackageStartupMessages(base::require(%s, lib.loc=%s))'
% (name, lib_loc))
expr = rinterface.parse(expr_txt)
ok = _eval(expr)
return ok
quiet_require('tools')
_get_namespace = rinterface.baseenv['getNamespace']
_lazyload_dbfetch = rinterface.baseenv['lazyLoadDBfetch']
tools_ns = _get_namespace(StrSexpVector(('tools',)))
_Rd_db = tools_ns['Rd_db']
_Rd_deparse = tools_ns['.Rd_deparse']
__rd_meta = os.path.join('Meta', 'Rd.rds')
__package_meta = os.path.join('Meta', 'package.rds')
p_newarg = re.compile(r'^\s*([a-zA-Z\._][a-zA-Z0-9\._]*?)\s*:\s*(.+?)\s*$')
p_desc = re.compile(r'^\s+([^\s]+.*?)\s*$')
def _Rd2txt(section_doc):
tempfilename = rinterface.baseenv['tempfile']()[0]
filecon = rinterface.baseenv['file'](tempfilename, open='w')
try:
tools_ns['Rd2txt'](
section_doc, out=filecon, fragment=True
)[0].split('\n')
rinterface.baseenv['flush'](filecon)
rinterface.baseenv['close'](filecon)
with open(tempfilename) as fh:
section_rows = fh.readlines()
finally:
os.unlink(tempfilename)
return section_rows
def create_metaRd_db(dbcon) -> None:
""" Create an database to store R help pages.
dbcon: database connection (assumed to be SQLite - may or may not work
with other databases)
"""
dbcon.execute('''
CREATE TABLE package (
name TEXT UNIQUE,
title TEXT,
version TEXT,
description TEXT
);
''')
dbcon.execute('''
CREATE TABLE rd_meta (
id INTEGER, file TEXT UNIQUE, name TEXT, type TEXT, title TEXT, encoding TEXT,
package_rowid INTEGER
);
''')
dbcon.execute('''
CREATE INDEX type_idx ON rd_meta (type);
''')
dbcon.execute('''
CREATE TABLE rd_alias_meta (
rd_meta_rowid INTEGER, alias TEXT
);
''')
dbcon.execute('''
CREATE INDEX alias_idx ON rd_alias_meta (alias);
''')
dbcon.commit()
def populate_metaRd_db(package_name: str, dbcon,
package_path: typing.Optional[str] = None) -> None:
""" Populate a database with the meta-information
associated with an R package: version, description, title, and
aliases (those are what the R help system is organised around).
- package_name: a string
- dbcon: a database connection
- package_path: path the R package installation (default: None)
"""
if package_path is None:
package_path = get_packagepath(package_name)
rpath = StrSexpVector((os.path.join(package_path,
__package_meta),))
rds = readRDS(rpath)
desc = rds[rds.do_slot('names').index('DESCRIPTION')]
db_res = dbcon.execute('insert into package values (?,?,?,?)',
(desc[desc.do_slot('names').index('Package')],
desc[desc.do_slot('names').index('Title')],
desc[desc.do_slot('names').index('Version')],
desc[desc.do_slot('names').index('Description')],
))
package_rowid = db_res.lastrowid
rpath = StrSexpVector((os.path.join(package_path,
__rd_meta),))
rds = readRDS(rpath)
FILE_I = rds.do_slot("names").index('File')
NAME_I = rds.do_slot("names").index('Name')
TYPE_I = rds.do_slot("names").index('Type')
TITLE_I = rds.do_slot("names").index('Title')
ENCODING_I = rds.do_slot("names").index('Encoding')
ALIAS_I = rds.do_slot("names").index('Aliases')
for row_i in range(len(rds[0])):
db_res = dbcon.execute('insert into rd_meta values (?,?,?,?,?,?,?)',
(row_i,
rds[FILE_I][row_i],
rds[NAME_I][row_i],
rds[TYPE_I][row_i],
rds[TITLE_I][row_i],
rds[ENCODING_I][row_i],
package_rowid))
rd_rowid = db_res.lastrowid
for alias in rds[ALIAS_I][row_i]:
dbcon.execute('insert into rd_alias_meta values (?,?)',
(rd_rowid, alias))
Item = namedtuple('Item', 'name value')
[docs]
class Page(object):
""" An R documentation page.
The original R structure is a nested sequence of components,
corresponding to the latex-like .Rd file
An help page is divided into sections, the names for the sections
are the keys for the dict attribute 'sections', and a given section
can be extracted with the square-bracket operator.
In R, the S3 class 'Rd' is the closest entity to this class.
"""
def __init__(self, struct_rdb: rinterface.ListSexpVector,
_type: str = ''):
sections = OrderedDict()
for elt_i in range(len(struct_rdb)):
elt = rinterface.baseenv['['](struct_rdb, elt_i+1)
rd_tag = elt[0].do_slot("Rd_tag")[0]
if rd_tag in sections and rd_tag not in NON_UNIQUE_TAGS:
warnings.warn('Section of the R doc duplicated: %s' % rd_tag)
sections[rd_tag] = elt
self._sections = sections
self._type = _type
def _section_get(self):
return self._sections
sections = property(_section_get, None, None,
'Sections in the in help page, as a dict.')
def __getitem__(self, item):
""" Get a section """
return self.sections[item]
[docs]
def arguments(self) -> typing.List[Item]:
""" Get the arguments and descriptions as a list of Item objects. """
section_doc = self._sections.get(r'\arguments')
res: typing.List[Item] = list()
if section_doc is None:
return res
else:
arg_name = None
arg_desc = None
section_rows = _Rd2txt(section_doc)
if len(section_rows) < 3:
return res
for row in section_rows[2:]:
if arg_name is None:
m = p_newarg.match(row)
if m:
arg_name = m.groups()[0]
arg_desc = [m.groups()[1]]
else:
if p_desc.match(row):
arg_desc.append(row.strip())
else:
res.append(
Item(arg_name, arg_desc)
)
arg_name = None
arg_desc = None
if arg_name is not None:
res.append(
Item(arg_name, arg_desc)
)
return res
def _get_section(self, section: str):
section_doc = self._sections.get(section, None)
if section_doc is None:
res = ''
else:
res = _Rd2txt(section_doc)
return res
[docs]
def description(self) -> str:
""" Get the description of the entry """
return self._get_section(r'\description')
[docs]
def details(self) -> str:
""" Get the section Details for the documentation entry."""
return self._get_section(r'\details')
[docs]
def title(self) -> str:
""" Get the title """
return self._get_section(r'\title')
[docs]
def value(self) -> str:
""" Get the value returned """
return self._get_section(r'\value')
[docs]
def seealso(self) -> str:
""" Get the other documentation entries recommended """
return self._get_section(r'\seealso')
[docs]
def usage(self) -> str:
""" Get the usage for the object """
return self._get_section(r'\usage')
[docs]
def items(self):
""" iterator through the sections names and content
in the documentation Page. """
return self.sections.items()
[docs]
def iteritems(self):
""" iterator through the sections names and content
in the documentation Page. (deprecated, use items()) """
warnings.warn('Use the method items().', DeprecationWarning)
return self.sections.items()
[docs]
def to_docstring(
self,
section_names: typing.Optional[typing.Tuple[str, ...]] = None
) -> str:
""" section_names: list of section names to consider. If None
all sections are used.
Returns a string that can be used as a Python docstring. """
s = []
if section_names is None:
section_names = self.sections.keys()
def walk(tree):
if not isinstance(tree, str):
for elt in tree:
walk(elt)
else:
s.append(tree)
s.append(' ')
for name in section_names:
name_str = name[1:] if name.startswith('\\') else name
s.append(name_str)
s.append(os.linesep)
s.append('-' * len(name_str))
s.append(os.linesep)
s.append(os.linesep)
walk(self.sections[name])
s.append(os.linesep)
s.append(os.linesep)
return ''.join(s)
[docs]
class Package(object):
"""
The R documentation page (aka help) for a package.
"""
__package_path = None
__package_name = None
__aliases_info = 'aliases.rds'
__hsearch_meta = os.path.join('Meta', 'hsearch.rds')
__paths_info = 'paths.rds'
__anindex_info = 'AnIndex'
def __package_name_get(self):
return self.__package_name
name = property(__package_name_get, None, None,
'Name of the package as known by R')
def __init__(self, package_name: str,
package_path: typing.Optional[str] = None):
self.__package_name = package_name
if package_path is None:
package_path = get_packagepath(package_name)
self.__package_path = package_path
rd_meta_dbcon = sqlite3.connect(':memory:')
create_metaRd_db(rd_meta_dbcon)
populate_metaRd_db(package_name,
rd_meta_dbcon,
package_path=package_path)
self._dbcon = rd_meta_dbcon
path = os.path.join(package_path, 'help', package_name + '.rdx')
self._rdx = readRDS(StrSexpVector((path, )))
[docs]
def fetch(self, alias: str) -> Page:
""" Fetch the documentation page associated with a given alias.
For S4 classes, the class name is *often* suffixed with '-class'.
For example, the alias to the documentation for the class
AnnotatedDataFrame in the package Biobase is
'AnnotatedDataFrame-class'.
"""
c = self._dbcon.execute(
'SELECT rd_meta_rowid, alias FROM rd_alias_meta WHERE alias=?',
(alias, )
)
res_alias = c.fetchall()
if len(res_alias) == 0:
raise HelpNotFoundError(
'No help could be fetched',
topic=alias, package=self.__package_name
)
c = self._dbcon.execute(
'SELECT file, name, type FROM rd_meta WHERE rowid=?',
(res_alias[0][0], )
)
# since the selection is on a verified rowid we are sure to
# exactly get one row
res_all = c.fetchall()
rkey = StrSexpVector((res_all[0][0][:-3], ))
_type = res_all[0][2]
rpath = StrSexpVector(
(os.path.join(self.package_path,
'help',
f'{self.__package_name}.rdb'),)
)
rdx_variables = (
self._rdx[self._rdx.do_slot('names').index('variables')]
)
_eval = rinterface.baseenv['eval']
devnull_func = rinterface.parse('function(x) {}')
devnull_func = _eval(devnull_func)
res = _lazyload_dbfetch(
rdx_variables[rdx_variables.do_slot('names').index(rkey[0])],
rpath,
self._rdx[self._rdx.do_slot('names').index("compressed")],
devnull_func
)
p_res = Page(res, _type=_type)
return p_res
package_path = property(lambda self: str(self.__package_path),
None, None,
'Path to the installed R package')
def __repr__(self):
r = 'R package %s %s' % (self.__package_name,
super(Package, self).__repr__())
return r
class HelpNotFoundError(KeyError):
""" Exception raised when an help topic cannot be found. """
def __init__(self, msg, topic=None, package=None):
super(HelpNotFoundError, self).__init__(msg)
self.topic = topic
self.package = package
[docs]
def pages(topic):
""" Get help pages corresponding to a given topic. """
res = list()
for path in _libpaths():
for name in _packages(**{'all.available': True,
'lib.loc': StrSexpVector((path,))}):
# TODO: what if the same package is installed
# at different locations ?
pack = Package(name)
try:
page = pack.fetch(topic)
res.append(page)
except HelpNotFoundError:
pass
return tuple(res)
def docstring(
package: Package, alias: str,
sections: typing.Tuple[str, ...] = (r'\usage',
r'\arguments')) -> str:
"""Fetch the R documentation for an alias in a package."""
if not isinstance(package, Package):
package = Package(package)
page = package.fetch(alias)
return page.to_docstring(sections)