library(reticulate)
library(ggplot2)
# github Actions workflow is with the current publish.yml no setting up a venv
# **uncomment** the following line if you run it in VSCode
#use_virtualenv("./.venv", required=TRUE) # tells R reticulate to use this Python virtual envivonment
Python and R together
Starting with R code
<- read.csv("./data/covid.csv")
df_r nrow(df_r)
[1] 20780
Some Python code
import pandas as pd
= pd.read_csv("./data/covid.csv")
df_python_covid df_python_covid.shape
(20780, 6)
sum() df_python_covid.isna().
date 0
state 2153
tests 0
cases 0
hospitalizations 0
deaths 0
dtype: int64
=True)
df_python_covid.dropna(inplacesum() df_python_covid.isna().
date 0
state 0
tests 0
cases 0
hospitalizations 0
deaths 0
dtype: int64
= df_python_covid.groupby('state')['deaths'].sum()
df_python_covid_agg 5) df_python_covid_agg.head(
state
AK 305
AL 10148
AR 5319
AZ 16328
CA 54124
Name: deaths, dtype: int64
df_python_covid_agg.index
Index(['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'IA',
'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN', 'MO',
'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK',
'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI',
'WV', 'WY'],
dtype='object', name='state')
How to exchange dataframes between Python and R
# R code
nrow(reticulate::py$df_python_covid)
[1] 18627
= reticulate::py$df_python_covid_agg
back_to_r head(back_to_r, 5)
AK AL AR AZ CA
305 10148 5319 16328 54124
# Python code
type(r.df_r)
<class 'pandas.core.frame.DataFrame'>
Multi-index example
mpg - miles per gallon - is a dataset included in R
# R code
head(mpg,5)
# A tibble: 5 × 11
manufacturer model displ year cyl trans drv cty hwy fl class
<chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compa…
2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compa…
3 audi a4 2 2008 4 manual(m6) f 20 31 p compa…
4 audi a4 2 2008 4 auto(av) f 21 30 p compa…
5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compa…
# Python code
= r.mpg
df_mpg #df_mpg.shape
#df_mpg.info()
= df_mpg.groupby(['manufacturer','class'])['year'].value_counts()
df_mpg_grouped = df_mpg_grouped.reset_index()
df_mpg_grouped_reset_index 5) df_mpg_grouped.head(
manufacturer class year
audi compact 1999 8
2008 7
midsize 2008 2
1999 1
chevrolet 2seater 2008 3
Name: count, dtype: int64
5) df_mpg_grouped_reset_index.head(
manufacturer class year count
0 audi compact 1999 8
1 audi compact 2008 7
2 audi midsize 2008 2
3 audi midsize 1999 1
4 chevrolet 2seater 2008 3
# R code
head(reticulate::py$df_mpg_grouped, 5) # multi-index
('audi', 'compact', 1999) ('audi', 'compact', 2008)
8 7
('audi', 'midsize', 2008) ('audi', 'midsize', 1999)
2 1
('chevrolet', '2seater', 2008)
3
head(reticulate::py$df_mpg_grouped_reset_index, 5) # collapsed multi-index
manufacturer class year count
1 audi compact 1999 8
2 audi compact 2008 7
3 audi midsize 2008 2
4 audi midsize 1999 1
5 chevrolet 2seater 2008 3
#reticulate::py_last_error()