Skip to contents

Optimize threshold for clean data extraction.

Usage

optimal_threshold(
  refdata,
  outliers,
  var_col = NULL,
  warn = FALSE,
  verbose = FALSE,
  plot = FALSE
)

Arguments

refdata

dataframe. Species data frame from precleaned analysis.

outliers

datacleaner. Datacleaner output with outliers flagged in multidetect function.

var_col

string. A column with species names if dataset for species is a dataframe not a list. See pred_extract for extracting environmental data.

warn

logical. If TRUE, warning on whether absolute outliers obtained at a low threshold is indicated. Default TRUE.

verbose

logical. If true, then messages about the outlier flagging will be displayed.

plot

logical. to show plot of loess fitted function with minima and maxima (optimal threshold and clean data).

Value

Either a list or dataframe of cleaned records for multiple species.

Examples


if (FALSE) { # \dontrun{

data(jdsdata)
data(efidata)
matchdata <- match_datasets(datasets = list(jds = jdsdata, efi = efidata),
                            lats = 'lat',
                            lons = 'lon',
                            species = c('speciesname','scientificName'),
                            country= c('JDS4_site_ID'),
                            date=c('sampling_date', 'Date'))

datacheck <- check_names(matchdata, colsp= 'species', pct = 90, merge =TRUE)


db <- sf::st_read(system.file('extdata/danube/basinfinal.shp', package='specleanr'), quiet=TRUE)


worldclim <- terra::rast(system.file('extdata/worldclim.tiff', package='specleanr'))

rdata <- pred_extract(data = datacheck,
                      raster= worldclim ,
                      lat = 'decimalLatitude',
                      lon= 'decimalLongitude',
                      colsp = 'speciescheck',
                      bbox = db,
                      multiple = TRUE,
                      minpts = 10,
                      list=TRUE,
                      merge=F)


out_df <- multidetect(data = rdata, multiple = TRUE,
                      var = 'bio6',
                      output = 'outlier',
                      exclude = c('x','y'),
                      methods = c('zscore', 'adjbox','iqr', 'semiqr','hampel'))

#extracting optimal threshold for each species

threshopt <- optimal_threshold(refdata = rdata, outliers = out_df)
} # }