Optimize threshold for clean data extraction.
Usage
optimal_threshold(
refdata,
outliers,
var_col = NULL,
warn = FALSE,
verbose = FALSE,
plot = FALSE
)
Arguments
- refdata
dataframe
. Species data frame from precleaned analysis.- outliers
datacleaner
. Datacleaner output with outliers flagged inmultidetect
function.- var_col
string
. A column with species names ifdataset
for species is a dataframe not a list. Seepred_extract
for extracting environmental data.- warn
logical
. If TRUE, warning on whether absolute outliers obtained at a low threshold is indicated. Default TRUE.- verbose
logical
. If true, then messages about the outlier flagging will be displayed.- plot
logical
. to show plot of loess fitted function with minima and maxima (optimal threshold and clean data).
Examples
if (FALSE) { # \dontrun{
data(jdsdata)
data(efidata)
matchdata <- match_datasets(datasets = list(jds = jdsdata, efi = efidata),
lats = 'lat',
lons = 'lon',
species = c('speciesname','scientificName'),
country= c('JDS4_site_ID'),
date=c('sampling_date', 'Date'))
datacheck <- check_names(matchdata, colsp= 'species', pct = 90, merge =TRUE)
db <- sf::st_read(system.file('extdata/danube/basinfinal.shp', package='specleanr'), quiet=TRUE)
worldclim <- terra::rast(system.file('extdata/worldclim.tiff', package='specleanr'))
rdata <- pred_extract(data = datacheck,
raster= worldclim ,
lat = 'decimalLatitude',
lon= 'decimalLongitude',
colsp = 'speciescheck',
bbox = db,
multiple = TRUE,
minpts = 10,
list=TRUE,
merge=F)
out_df <- multidetect(data = rdata, multiple = TRUE,
var = 'bio6',
output = 'outlier',
exclude = c('x','y'),
methods = c('zscore', 'adjbox','iqr', 'semiqr','hampel'))
#extracting optimal threshold for each species
threshopt <- optimal_threshold(refdata = rdata, outliers = out_df)
} # }