{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "9282de00",
"metadata": {
"execution": {
"iopub.execute_input": "2023-06-26T02:19:21.541196Z",
"iopub.status.busy": "2023-06-26T02:19:21.540803Z",
"iopub.status.idle": "2023-06-26T02:19:22.426627Z",
"shell.execute_reply": "2023-06-26T02:19:22.425887Z"
}
},
"outputs": [],
"source": [
"from functools import partial\n",
"from rpy2.ipython import html\n",
"html.html_rdataframe=partial(html.html_rdataframe, table_class=\"docutils\")"
]
},
{
"cell_type": "markdown",
"id": "7b2aab03",
"metadata": {},
"source": [
"# tidyr in Python"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "fa8a91a1",
"metadata": {
"execution": {
"iopub.execute_input": "2023-06-26T02:19:22.430570Z",
"iopub.status.busy": "2023-06-26T02:19:22.430114Z",
"iopub.status.idle": "2023-06-26T02:19:24.431882Z",
"shell.execute_reply": "2023-06-26T02:19:24.431206Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/rpy2/robjects/lib/dplyr.py:27: UserWarning: This was designed againt dplyr versions starting with 1.0 but you have 1.1.2\n",
" warnings.warn(\n",
"/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/rpy2/robjects/lib/tidyr.py:12: UserWarning: This was designed againt tidyr versions starting with 1.2. but you have 1.3.0\n",
" warnings.warn(\n"
]
}
],
"source": [
"from rpy2.robjects.lib.tidyr import DataFrame"
]
},
{
"cell_type": "markdown",
"id": "112d787b",
"metadata": {},
"source": [
"(note: `dplyr` is implicitly used by `tidyr`.)\n",
"\n",
"In addition to that, and because this tutorial is in a notebook,\n",
"we initialize HTML rendering for R objects (pretty display of\n",
"R data frames)."
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "44540102",
"metadata": {
"execution": {
"iopub.execute_input": "2023-06-26T02:19:24.435777Z",
"iopub.status.busy": "2023-06-26T02:19:24.435166Z",
"iopub.status.idle": "2023-06-26T02:19:24.440299Z",
"shell.execute_reply": "2023-06-26T02:19:24.439628Z"
}
},
"outputs": [],
"source": [
"import rpy2.ipython.html\n",
"rpy2.ipython.html.init_printing()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "e8d223de",
"metadata": {
"execution": {
"iopub.execute_input": "2023-06-26T02:19:24.443220Z",
"iopub.status.busy": "2023-06-26T02:19:24.442875Z",
"iopub.status.idle": "2023-06-26T02:19:24.454536Z",
"shell.execute_reply": "2023-06-26T02:19:24.453877Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"DataFrame with 3 rows and\n",
" 3 columns:\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" x | \n",
" y | \n",
" z | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" a | \n",
" 3 | \n",
" 6 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" b | \n",
" 4 | \n",
" 7 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" b | \n",
" 5 | \n",
" 8 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
" [RTYPES.VECSXP]\n",
"R classes: ('data.frame',)\n",
"[StrSexpVector, IntSexpVector, IntSexpVector]\n",
" x: \n",
" [RTYPES.STRSXP]\n",
" y: \n",
" [RTYPES.INTSXP]\n",
" z: \n",
" [RTYPES.INTSXP]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from collections import OrderedDict\n",
"from rpy2.robjects.vectors import (StrVector,\n",
" IntVector)\n",
"dataf = DataFrame(OrderedDict(x=StrVector((\"a\", \"b\", \"b\")),\n",
" y=IntVector((3, 4, 5)),\n",
"\t\t z=IntVector((6, 7, 8))))\n",
"dataf"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "e285d4b7",
"metadata": {
"execution": {
"iopub.execute_input": "2023-06-26T02:19:24.457653Z",
"iopub.status.busy": "2023-06-26T02:19:24.457310Z",
"iopub.status.idle": "2023-06-26T02:19:24.481327Z",
"shell.execute_reply": "2023-06-26T02:19:24.480629Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"DataFrame with 3 rows and\n",
" 3 columns:\n",
"\n",
" \n",
" \n",
" | \n",
" | \n",
" z | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 6 | \n",
" 3 | \n",
" NA_integer_ | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 7 | \n",
" NA_integer_ | \n",
" 4 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 8 | \n",
" NA_integer_ | \n",
" 5 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
" [RTYPES.VECSXP]\n",
"R classes: ('data.frame',)\n",
"[IntSexpVector, IntSexpVector, IntSexpVector]\n",
" z: \n",
" [RTYPES.INTSXP]\n",
" a: \n",
" [RTYPES.INTSXP]\n",
" b: \n",
" [RTYPES.INTSXP]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataf.spread('x', 'y')"
]
},
{
"cell_type": "markdown",
"id": "7c5b8eaf",
"metadata": {},
"source": [
"**Reuse. Get things done. Don't reimplement.**"
]
}
],
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.17"
}
},
"nbformat": 4,
"nbformat_minor": 5
}