User Guide: 2 Density-based filtering

‘ggpmisc’ 0.2.16

Pedro J. Aphalo

2017-09-17

Introduction

This vignette demonstrates the performance of the default arguments with artificial data sets of different sizes and drawn from different theoretical distributions. It used initially for testing but it also shows what to expect under different situations.

Preliminaries

library(ggpmisc)
library(ggplot2)
library(tibble)

We define functions to simplify the generation of random data sets.

make_data_tbl <- function(nrow = 100, rfun = rnorm, ...) {
  if (nrow %% 2) {
    nrow <- nrow + 1
  }
  
  set.seed(1001)
  
  tibble::tibble(
    x = rfun(nrow, ...),
    y = rfun(nrow, ...),
    group = rep(c("A", "B"), c(nrow / 2, nrow / 2))
  )
}

As we will draw many points on the plotting area we change the default theme to an uncluttered one.

old_theme <- theme_set(theme_bw())

Tests with different data sets

Number of observations

By default the fraction of observations kept is 1/10.

ggplot(data = make_data_tbl(6), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red")

ggplot(data = make_data_tbl(6), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", keep.fraction = 1/2)

ggplot(data = make_data_tbl(20), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red")

ggplot(data = make_data_tbl(100), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red")

ggplot(data = make_data_tbl(500), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red")

ggplot(data = make_data_tbl(2000), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red")

ggplot(data = make_data_tbl(2000), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", keep.fraction = 0.01)

ggplot(data = make_data_tbl(2000), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", 
                     keep.sparse = FALSE)

ggplot(data = make_data_tbl(2000), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", 
                     keep.sparse = FALSE)+
  stat_dens2d_filter(color = "blue")

ggplot(data = make_data_tbl(2000), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", 
                     keep.fraction = 0.01,
                     keep.sparse = FALSE)

ggplot(data = make_data_tbl(10000), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red")
ggplot(data = make_data_tbl(10000), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", keep.fraction = 0.01)

Random draws from different theoretical distributions

ggplot(data = make_data_tbl(1000, rfun = runif), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", keep.fraction = 0.1)

ggplot(data = make_data_tbl(1000, rfun = rgamma, shape = 2), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", keep.fraction = 0.1)

ggplot(data = make_data_tbl(1000, rfun = rgamma, shape = 6), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", keep.fraction = 0.1)

ggplot(data = make_data_tbl(1000, rfun = rbeta, shape1 = 3, shape2 = 12), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", keep.fraction = 0.1)

Transformed scales

ggplot(data = make_data_tbl(1000, rfun = rbeta, shape1 = 3, shape2 = 12), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", keep.fraction = 0.1) +
  scale_y_log10()