Source code for rpy2.robjects.help

"""
R help system.

"""
import enum
import os
from collections import namedtuple
import re
import sqlite3
import typing
import warnings

import rpy2.rinterface as rinterface
from rpy2.rinterface import StrSexpVector

from rpy2.robjects.packages_utils import (get_packagepath,
                                          _libpaths,
                                          _packages)
from collections import OrderedDict

tmp = rinterface.baseenv['R.Version']()
tmp_major = int(tmp[tmp.do_slot('names').index('major')][0])
tmp_minor = float(tmp[tmp.do_slot('names').index('minor')][0])
readRDS = rinterface.baseenv['readRDS']

del(tmp)
del(tmp_major)
del(tmp_minor)

_eval = rinterface.baseenv['eval']

NON_UNIQUE_TAGS = set((r'\alias', r'\keyword', r'\section'))


def quiet_require(name: str, lib_loc: typing.Optional[str] = None) -> bool:
    """ Load an R package /quietly/ (suppressing messages to the console). """
    if lib_loc is None:
        lib_loc = "NULL"
    expr_txt = ('suppressPackageStartupMessages(base::require(%s, lib.loc=%s))'
                % (name, lib_loc))
    expr = rinterface.parse(expr_txt)
    ok = _eval(expr)
    return ok


quiet_require('tools')
_get_namespace = rinterface.baseenv['getNamespace']
_lazyload_dbfetch = rinterface.baseenv['lazyLoadDBfetch']

tools_ns = _get_namespace(StrSexpVector(('tools',)))
_Rd_db = tools_ns['Rd_db']
_Rd_deparse = tools_ns['.Rd_deparse']

__rd_meta = os.path.join('Meta', 'Rd.rds')
__package_meta = os.path.join('Meta', 'package.rds')

p_newarg = re.compile(r'^\s*([a-zA-Z\._][a-zA-Z0-9\._]*?)\s*:\s*(.+?)\s*$')
p_desc = re.compile(r'^\s+([^\s]+.*?)\s*$')


def _Rd2txt(section_doc):
    tempfilename = rinterface.baseenv['tempfile']()[0]
    filecon = rinterface.baseenv['file'](tempfilename, open='w')
    try:
        tools_ns['Rd2txt'](section_doc, out=filecon, fragment=True)[0].split('\n')
        rinterface.baseenv['flush'](filecon)
        rinterface.baseenv['close'](filecon)
        with open(tempfilename) as fh:
            section_rows = fh.readlines()
    finally:
        os.unlink(tempfilename)
    return section_rows


def create_metaRd_db(dbcon) -> None:
    """ Create an database to store R help pages.

    dbcon: database connection (assumed to be SQLite - may or may not work
           with other databases)
    """
    dbcon.execute('''
CREATE TABLE package (
name TEXT UNIQUE,
title TEXT,
version TEXT,
description TEXT
);
''')
    dbcon.execute('''
CREATE TABLE rd_meta (
id INTEGER, file TEXT UNIQUE, name TEXT, type TEXT, title TEXT, encoding TEXT,
package_rowid INTEGER
);
''')
    dbcon.execute('''
CREATE INDEX type_idx ON rd_meta (type);
''')
    dbcon.execute('''
CREATE TABLE rd_alias_meta (
rd_meta_rowid INTEGER, alias TEXT
);
''')
    dbcon.execute('''
CREATE INDEX alias_idx ON rd_alias_meta (alias);
''')
    dbcon.commit()


def populate_metaRd_db(package_name: str, dbcon,
                       package_path: typing.Optional[str] = None) -> None:
    """ Populate a database with the meta-information
    associated with an R package: version, description, title, and
    aliases (those are what the R help system is organised around).

    - package_name: a string
    - dbcon: a database connection
    - package_path: path the R package installation (default: None)
    """
    if package_path is None:
        package_path = get_packagepath(package_name)

    rpath = StrSexpVector((os.path.join(package_path,
                                        __package_meta),))

    rds = readRDS(rpath)
    desc = rds[rds.do_slot('names').index('DESCRIPTION')]
    db_res = dbcon.execute('insert into package values (?,?,?,?)',
                           (desc[desc.do_slot('names').index('Package')],
                            desc[desc.do_slot('names').index('Title')],
                            desc[desc.do_slot('names').index('Version')],
                            desc[desc.do_slot('names').index('Description')],
                            ))
    package_rowid = db_res.lastrowid

    rpath = StrSexpVector((os.path.join(package_path,
                                        __rd_meta),))

    rds = readRDS(rpath)
    FILE_I = rds.do_slot("names").index('File')
    NAME_I = rds.do_slot("names").index('Name')
    TYPE_I = rds.do_slot("names").index('Type')
    TITLE_I = rds.do_slot("names").index('Title')
    ENCODING_I = rds.do_slot("names").index('Encoding')
    ALIAS_I = rds.do_slot("names").index('Aliases')
    for row_i in range(len(rds[0])):
        db_res = dbcon.execute('insert into rd_meta values (?,?,?,?,?,?,?)',
                               (row_i,
                                rds[FILE_I][row_i],
                                rds[NAME_I][row_i],
                                rds[TYPE_I][row_i],
                                rds[TITLE_I][row_i],
                                rds[ENCODING_I][row_i],
                                package_rowid))
        rd_rowid = db_res.lastrowid
        for alias in rds[ALIAS_I][row_i]:
            dbcon.execute('insert into rd_alias_meta values (?,?)',
                          (rd_rowid, alias))


Item = namedtuple('Item', 'name value')


[docs]class Page(object):
    """ An R documentation page.
    The original R structure is a nested sequence of components,
    corresponding to the latex-like .Rd file

    An help page is divided into sections, the names for the sections
    are the keys for the dict attribute 'sections', and a given section
    can be extracted with the square-bracket operator.

    In R, the S3 class 'Rd' is the closest entity to this class.
    """

    def __init__(self, struct_rdb: rinterface.ListSexpVector,
                 _type: str = ''):
        sections = OrderedDict()
        for elt_i in range(len(struct_rdb)):
            elt = rinterface.baseenv['['](struct_rdb, elt_i+1)
            rd_tag = elt[0].do_slot("Rd_tag")[0]
            if rd_tag == r'\section':
                rd_section = rd_tag[0][2:]
            if rd_tag in sections and rd_tag not in NON_UNIQUE_TAGS:
                warnings.warn('Section of the R doc duplicated: %s' % rd_tag)
            sections[rd_tag] = elt
        self._sections = sections
        self._type = _type

    def _section_get(self):
        return self._sections

    sections = property(_section_get, None, None,
                        'Sections in the in help page, as a dict.')

    def __getitem__(self, item):
        """ Get a section """
        return self.sections[item]

[docs]    def arguments(self) -> typing.List[Item]:
        """ Get the arguments and descriptions as a list of Item objects. """
        section_doc = self._sections.get(r'\arguments')
        res = list()
        if section_doc is None:
            return res
        else:
            arg_name = None
            arg_desc = None
            section_rows = _Rd2txt(section_doc)
            if len(section_rows) < 3:
                return res
            for row in section_rows[2: ]:
                if arg_name is None:
                    m = p_newarg.match(row)
                    if m:
                        arg_name = m.groups()[0]
                        arg_desc = [m.groups()[1]]
                else:
                    if p_desc.match(row):
                        arg_desc.append(row.strip())
                    else:
                        res.append(
                            Item(arg_name, arg_desc)
                        )
                        arg_name = None
                        arg_desc = None

            if arg_name is not None:
                res.append(
                    Item(arg_name, arg_desc)
                )
        return res

    def _get_section(self, section: str):
        section_doc = self._sections.get(section, None)
        if section_doc is None:
            res = ''
        else:
            res = _Rd2txt(section_doc)
        return res

[docs]    def description(self) -> str:
        """ Get the description of the entry """
        return self._get_section(r'\description')

[docs]    def title(self) -> str:
        """ Get the title """
        return self._get_section(r'\title')

[docs]    def value(self) -> str:
        """ Get the value returned """
        return self._get_section(r'\value')

[docs]    def seealso(self) -> str:
        """ Get the other documentation entries recommended """
        return self._get_section(r'\seealso')

[docs]    def usage(self) -> str:
        """ Get the usage for the object """
        return self._get_section(r'\usage')

[docs]    def iteritems(self):
        """ iterator through the sections names and content
        in the documentation Page. """
        return self.sections.iteritems

[docs]    def to_docstring(self,
                     section_names: typing.Optional[typing.Tuple[str, ...]] = None
    ) -> str:
        """ section_names: list of section names to consider. If None
        all sections are used.

        Returns a string that can be used as a Python docstring. """
        s = []

        if section_names is None:
            section_names = self.sections.keys()

        def walk(tree):
            if not isinstance(tree, str):
                for elt in tree:
                    walk(elt)
            else:
                s.append(tree)
                s.append(' ')

        for name in section_names:
            name_str = name[1:] if name.startswith('\\') else name
            s.append(name_str)
            s.append(os.linesep)
            s.append('-' * len(name_str))
            s.append(os.linesep)
            s.append(os.linesep)
            walk(self.sections[name])
            s.append(os.linesep)
            s.append(os.linesep)
        return ''.join(s)


[docs]class Package(object):
    """
    The R documentation page (aka help) for a package.
    """
    __package_path = None
    __package_name = None
    __aliases_info = 'aliases.rds'
    __hsearch_meta = os.path.join('Meta', 'hsearch.rds')
    __paths_info = 'paths.rds'
    __anindex_info = 'AnIndex'

    def __package_name_get(self):
        return self.__package_name

    name = property(__package_name_get, None, None,
                    'Name of the package as known by R')

    def __init__(self, package_name: str,
                 package_path: typing.Optional[str] = None):
        self.__package_name = package_name
        if package_path is None:
            package_path = get_packagepath(package_name)
        self.__package_path = package_path

        rd_meta_dbcon = sqlite3.connect(':memory:')
        create_metaRd_db(rd_meta_dbcon)
        populate_metaRd_db(package_name,
                           rd_meta_dbcon,
                           package_path=package_path)
        self._dbcon = rd_meta_dbcon

        path = os.path.join(package_path, 'help', package_name + '.rdx')
        self._rdx = readRDS(StrSexpVector((path, )))

[docs]    def fetch(self, alias: str) -> Page:
        """ Fetch the documentation page associated with a given alias.

        For S4 classes, the class name is *often* suffixed with '-class'.
        For example, the alias to the documentation for the class
        AnnotatedDataFrame in the package Biobase is
        'AnnotatedDataFrame-class'.
        """

        c = self._dbcon.execute(
            'SELECT rd_meta_rowid, alias FROM rd_alias_meta WHERE alias=?',
            (alias, )
        )
        res_alias = c.fetchall()
        if len(res_alias) == 0:
            raise HelpNotFoundError(
                'No help could be fetched',
                topic=alias, package=self.__package_name
            )

        c = self._dbcon.execute(
            'SELECT file, name, type FROM rd_meta WHERE rowid=?',
            (res_alias[0][0], )
        )
        # since the selection is on a verified rowid we are sure to
        # exactly get one row
        res = c.fetchall()
        rkey = StrSexpVector((res[0][0][:-3], ))
        _type = res[0][2]
        rpath = StrSexpVector((os.path.join(self.package_path,
                                            'help',
                                            self.__package_name + '.rdb'),))

        rdx_variables = (
            self._rdx[self._rdx.do_slot('names').index('variables')]
        )
        _eval = rinterface.baseenv['eval']
        devnull_func = rinterface.parse('function(x) {}')
        devnull_func = _eval(devnull_func)
        res = _lazyload_dbfetch(
            rdx_variables[rdx_variables.do_slot('names').index(rkey[0])],
            rpath,
            self._rdx[self._rdx.do_slot('names').index("compressed")],
            devnull_func
        )
        p_res = Page(res, _type=_type)
        return p_res

    package_path = property(lambda self: str(self.__package_path),
                            None, None,
                            'Path to the installed R package')

    def __repr__(self):
        r = 'R package %s %s' % (self.__package_name,
                                 super(Package, self).__repr__())
        return r


class HelpNotFoundError(KeyError):
    """ Exception raised when an help topic cannot be found. """
    def __init__(self, msg, topic=None, package=None):
        super(HelpNotFoundError, self).__init__(msg)
        self.topic = topic
        self.package = package


[docs]def pages(topic):
    """ Get help pages corresponding to a given topic. """
    res = list()

    for path in _libpaths():
        for name in _packages(**{'all.available': True,
                                 'lib.loc': StrSexpVector((path,))}):
            # TODO: what if the same package is installed
            #       at different locations ?
            pack = Package(name)
            try:
                page = pack.fetch(topic)
                res.append(page)
            except HelpNotFoundError as hnfe:
                pass

    return tuple(res)


def docstring(package: Package, alias: str,
              sections: typing.Tuple[str, ...] = (r'\usage',
                                                  r'\arguments')
) -> str:
    """Fetch the R documentation for an alias in a package."""
    if not isinstance(package, Package):
        package = Package(package)
    page = package.fetch(alias)
    return page.to_docstring(sections)