Aims

By the end of this practical, you should feel comfortable:

Load data and packages

library(Distance)
## Loading required package: mrds
## This is mrds 2.2.3
## Built: R 4.0.2; ; 2020-08-01 10:33:56 UTC; unix
## 
## Attaching package: 'Distance'
## The following object is masked from 'package:mrds':
## 
##     create.bins
library(dsm)
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-33. For overview type 'help("mgcv-package")'.
## Loading required package: numDeriv
## This is dsm 2.3.1.9007
## Built: R 4.0.2; ; 2020-10-01 05:44:34 UTC; unix
library(ggplot2)
library(patchwork)
library(knitr)

Load the data and the fitted detection function objects from the previous exercises:

load("spermwhale.RData")
load("df-models.RData")

Exploratory analysis

We can plot the covariates together using the following code (don’t worry too much about understanding what that code is doing at the moment).

# make a list to hold our plots
p <- list()

# make a plot for each covariate
for(covname in c("Depth", "SST", "NPP", "DistToCAS", "EKE")){
  # make
  p[[covname]] <- ggplot() +
    # covariates are plotted as tiles
    geom_tile(aes_string(x="x", y="y", fill=covname), data=predgrid) + 
    geom_point(aes(x=x, y=y, size=size),
               alpha=0.6,
               data=subset(obs, size>0))+
    # remove grey background etc
    theme_minimal() +
    # remove axis labels and fiddle with the legend
    theme(axis.title.x=element_blank(),
          axis.text.x=element_blank(),
          axis.ticks.x=element_blank(),
          axis.title.y=element_blank(),
          axis.text.y=element_blank(),
          axis.ticks.y=element_blank(),
          legend.position="right", legend.key.width=unit(0.005, "npc")) +
    # make the fill scale be colourblind friendly
    scale_fill_viridis_c()
}

# using patchwork to stick the plots together
p[["Depth"]] + p[["SST"]] + p[["NPP"]] + p[["DistToCAS"]] + p[["EKE"]]  + plot_layout(ncol = 3, nrow=2)

We could improve these plots by adding a map of the USA but this will do for now!

Pre-model fitting

As we did in the previous exercise, we must remove the observations from the spatial data that we excluded when we fitted the detection function, i.e. those observations at distances greater than the truncation.

obs <- obs[obs$distance <= df_hn$ddf$meta.data$width, ]

Here we used the value of the truncation stored in the detection function object (df_hn$ddf), but we could also use the numeric value (which we can find by checking the model’s summary()).

Again note that if you want to fit DSMs using detection functions with different truncation distances, then you’ll need to reload the spermwhale.RData and do the truncation again for that detection function.

Our new friend +

We can build a really big model using + to include all the terms that we want in the model. We can check what covariates are available to us by using head() to look at the segment table:

head(segs)
##   Sample.Label          CenterTime SegmentID     Depth  DistToCAS      SST
## 1            1 2004-06-24 07:27:04         1  118.5027 14468.1533 15.54390
## 2            2 2004-06-24 08:08:04         2  119.4853 10262.9648 15.88358
## 3            3 2004-06-24 09:03:18         3  177.2779  6900.9829 16.21920
## 4            4 2004-06-24 09:51:27         4  527.9562  1055.4124 16.45468
## 5            5 2004-06-24 10:25:39         5  602.6378  1112.6293 16.62554
## 6            6 2004-06-24 11:00:22         6 1094.4402   707.5795 16.83725
##            EKE      NPP        x        y   Effort         X        Y  Survey
## 1 0.0014442616 1908.129 214544.0 689074.3 10288.91 -70.48980 40.18245 en04395
## 2 0.0014198086 1889.540 222654.3 682781.0 10288.91 -70.39681 40.12377 en04395
## 3 0.0011704842 1842.057 230279.9 675473.3 10288.91 -70.30994 40.05597 en04395
## 4 0.0004101589 1823.942 239328.9 666646.3 10288.91 -70.20708 39.97406 en04395
## 5 0.0002553244 1721.949 246686.5 659459.2 10288.91 -70.12361 39.90731 en04395
## 6 0.0006556266 1400.281 254307.0 652547.2 10288.91 -70.03713 39.84294 en04395
##        TransectID Beaufort
## 1 en0439520040624      1.4
## 2 en0439520040624      2.3
## 3 en0439520040624      1.2
## 4 en0439520040624      1.0
## 5 en0439520040624      1.2
## 6 en0439520040624      1.0

We can then fit a model with the available covariates in it, each as an s() term.

dsm_nb_xy_ms <- dsm(count~s(x,y, bs="ts") +
                          s(Depth, bs="ts") +
                          s(DistToCAS, bs="ts") +
                          s(SST, bs="ts") +
                          s(EKE, bs="ts") +
                          s(NPP, bs="ts"),
                    df_hn, segs, obs,
                    family=nb())
summary(dsm_nb_xy_ms)
## 
## Family: Negative Binomial(0.114) 
## Link function: log 
## 
## Formula:
## count ~ s(x, y, bs = "ts") + s(Depth, bs = "ts") + s(DistToCAS, 
##     bs = "ts") + s(SST, bs = "ts") + s(EKE, bs = "ts") + s(NPP, 
##     bs = "ts") + offset(off.set)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -20.7732     0.2295   -90.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                    edf Ref.df Chi.sq  p-value    
## s(x,y)       1.8636924     29 19.141 3.05e-05 ***
## s(Depth)     3.4176460      9 46.263  < 2e-16 ***
## s(DistToCAS) 0.0000801      9  0.000   0.9067    
## s(SST)       0.0002076      9  0.000   0.5403    
## s(EKE)       0.8563344      9  5.172   0.0134 *  
## s(NPP)       0.0001018      9  0.000   0.7822    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0947   Deviance explained = 39.3%
## -REML = 382.76  Scale est. = 1         n = 949

Notes:

  1. We are using bs="ts" to use the shrinkage thin plate regression spline. More technical detail on these smooths can be found on their manual page ?smooth.construct.ts.smooth.spec.
  2. We have not specified basis complexity (k) at the moment. Note that if you want to specify the same complexity for multiple terms, it’s often easier to create a variable that can then be provided to k (for example, specify k1 <- 15 and then set k=k1 in the required s() terms).

Plot

Let’s plot the smooths from this model:

plot(dsm_nb_xy_ms, pages=1)
Smooths for `dsm_nb_xy_ms`.

Smooths for dsm_nb_xy_ms.

Notes:

  1. Setting shade=TRUE gives prettier confidence bands (by default shade=FALSE).
  2. As with vis.gam() the response is on the link scale.
  3. scale=0 puts each plot on a different \(y\)-axis scale, making it easier to see the effects. Setting scale=-1 (the default) will put the plots on a common \(y\)-axis scale (so you can gauge relative importance).

We can also plot the bivariate smooth of x and y using vis.gam():

vis.gam(dsm_nb_xy_ms, view=c("x","y"), plot.type="contour", too.far=0.1, 
        main="s(x,y) (link scale)", asp=1)
Fitted surface with all environmental covariates, and neg-binomial response distribution.

Fitted surface with all environmental covariates, and neg-binomial response distribution.

Compare this plot to the equivalent plot generated in the previous exercise when only x and y were included in the model.


Comparison to previous s(x,y) model

Refitting the model from the previous exercise:

dsm_nb_xy <- dsm(count~s(x, y, k=25),
                 ddf.obj=df_hn, segment.data = segs, 
                 observation.data=obs,
                 family=nb())

Select terms

As was covered in the lectures, we can select terms by (approximate) \(p\)-values and by looking for terms that have EDFs significantly less than 1 (those which have been shrunk).

Remove the terms that are non-significant at this level and re-run the above checks, summaries and plots to see what happens. It’s helpful to make notes for yourself as you go.


Removing terms in the shrunk model

First let’s remove terms in dsm_nb_xy_ms via \(p\)-value first. I’ll insist that the \(p\)-values be significant at the 0.05 level (corresponding to looking for one or more * in the summary table):

summary(dsm_nb_xy_ms)
## 
## Family: Negative Binomial(0.114) 
## Link function: log 
## 
## Formula:
## count ~ s(x, y, bs = "ts") + s(Depth, bs = "ts") + s(DistToCAS, 
##     bs = "ts") + s(SST, bs = "ts") + s(EKE, bs = "ts") + s(NPP, 
##     bs = "ts") + offset(off.set)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -20.7732     0.2295   -90.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                    edf Ref.df Chi.sq  p-value    
## s(x,y)       1.8636924     29 19.141 3.05e-05 ***
## s(Depth)     3.4176460      9 46.263  < 2e-16 ***
## s(DistToCAS) 0.0000801      9  0.000   0.9067    
## s(SST)       0.0002076      9  0.000   0.5403    
## s(EKE)       0.8563344      9  5.172   0.0134 *  
## s(NPP)       0.0001018      9  0.000   0.7822    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0947   Deviance explained = 39.3%
## -REML = 382.76  Scale est. = 1         n = 949

First I’ll remove DistToCAS as a covariate that has a non-significant \(p\)-value:

dsm_nb_xy_ms_p1_2 <- dsm(count~s(x,y, bs="ts") +
                             s(Depth, bs="ts") +
                             #s(DistToCAS, bs="ts") +
                             s(SST, bs="ts") +
                             s(EKE, bs="ts") +
                             s(NPP, bs="ts"),
                      df_hn, segs, obs,
                      family=nb())
summary(dsm_nb_xy_ms_p1_2)
## 
## Family: Negative Binomial(0.114) 
## Link function: log 
## 
## Formula:
## count ~ s(x, y, bs = "ts") + s(Depth, bs = "ts") + s(SST, bs = "ts") + 
##     s(EKE, bs = "ts") + s(NPP, bs = "ts") + offset(off.set)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -20.7732     0.2295   -90.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                edf Ref.df Chi.sq  p-value    
## s(x,y)   1.8636872     29 19.141 3.03e-05 ***
## s(Depth) 3.4176347      9 46.261  < 2e-16 ***
## s(SST)   0.0008110      9  0.000   0.5402    
## s(EKE)   0.8563280      9  5.172   0.0134 *  
## s(NPP)   0.0000292      9  0.000   0.7823    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0947   Deviance explained = 39.3%
## -REML = 382.76  Scale est. = 1         n = 949

Now removing SST:

dsm_nb_xy_ms_p1_3 <- dsm(count~s(x,y, bs="ts") +
                             s(Depth, bs="ts") +
                             #s(DistToCAS, bs="ts") +
                             #s(SST, bs="ts") +
                             s(EKE, bs="ts") +
                             s(NPP, bs="ts"),
                      df_hn, segs, obs,
                      family=nb())
summary(dsm_nb_xy_ms_p1_3)
## 
## Family: Negative Binomial(0.114) 
## Link function: log 
## 
## Formula:
## count ~ s(x, y, bs = "ts") + s(Depth, bs = "ts") + s(EKE, bs = "ts") + 
##     s(NPP, bs = "ts") + offset(off.set)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -20.7732     0.2295   -90.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                edf Ref.df Chi.sq  p-value    
## s(x,y)   1.8636956     29 19.140 3.05e-05 ***
## s(Depth) 3.4176478      9 46.263  < 2e-16 ***
## s(EKE)   0.8563358      9  5.171   0.0134 *  
## s(NPP)   0.0002895      9  0.000   0.7822    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0947   Deviance explained = 39.3%
## -REML = 382.76  Scale est. = 1         n = 949

Finally NPP:

dsm_nb_xy_ms_p1_4 <- dsm(count~s(x,y, bs="ts") +
                             s(Depth, bs="ts") +
                             #s(DistToCAS, bs="ts") +
                             #s(SST, bs="ts") +
                             s(EKE, bs="ts"),# +
                             #s(NPP, bs="ts"),
                      df_hn, segs, obs,
                      family=nb())
summary(dsm_nb_xy_ms_p1_4)
## 
## Family: Negative Binomial(0.114) 
## Link function: log 
## 
## Formula:
## count ~ s(x, y, bs = "ts") + s(Depth, bs = "ts") + s(EKE, bs = "ts") + 
##     offset(off.set)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -20.7732     0.2295   -90.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##             edf Ref.df Chi.sq  p-value    
## s(x,y)   1.8637     29 19.145 3.06e-05 ***
## s(Depth) 3.4176      9 46.266  < 2e-16 ***
## s(EKE)   0.8563      9  5.171   0.0134 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0947   Deviance explained = 39.3%
## -REML = 382.76  Scale est. = 1         n = 949

Now we’re done! Note that the EDFs of the other terms don’t change as we remove the terms here since we’re removing terms that had almost zero EDF, so had very little influence on the model. We’ll see below if we remove a more wiggly smooth to begin with, other smooth’s EDFs (and shapes) will change when we refit.

We won’t see much difference if we removed the terms by EDF sequentially rather than using \(p\)-values. In that case we would remove NPP before SST (looking back to the output of summary(dsm_nb_xy_ms) above. Trying that, we see again that we should remove SST next and end up with the same model.

dsm_nb_xy_ms_p1_sstvar <- dsm(count~s(x,y, bs="ts") +
                              s(Depth, bs="ts") +
                              #s(DistToCAS, bs="ts") +
                              s(SST, bs="ts") +
                              s(EKE, bs="ts"),# +
                              #s(NPP, bs="ts"),
                      df_hn, segs, obs,
                      family=nb())
summary(dsm_nb_xy_ms_p1_sstvar)
## 
## Family: Negative Binomial(0.114) 
## Link function: log 
## 
## Formula:
## count ~ s(x, y, bs = "ts") + s(Depth, bs = "ts") + s(SST, bs = "ts") + 
##     s(EKE, bs = "ts") + offset(off.set)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -20.7732     0.2295   -90.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                edf Ref.df Chi.sq  p-value    
## s(x,y)   1.864e+00     29 19.145 3.05e-05 ***
## s(Depth) 3.418e+00      9 46.266  < 2e-16 ***
## s(SST)   3.518e-05      9  0.000   0.5403    
## s(EKE)   8.563e-01      9  5.172   0.0134 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0947   Deviance explained = 39.3%
## -REML = 382.76  Scale est. = 1         n = 949

Decide on a significance level that you’ll use to discard terms in the model. It’s easiest to either comment out the terms that are to be removed (using #) or by copying the code chunk above and pasting it below.

Having removed one smooth and reviewed your model, you may decide you wish to remove another. Repeat the process of removing a term and looking at plots and diagnostics again.

Try doing the same thing but using \(p\)-values. Are the resulting models different? Why?

Compare these results to the diagram in the lecture notes. Do your results differ?


Path dependency example

Let’s instead start with a model without s(x,y) (these models are sometimes referred to as “habitat models”)

dsm_nb_noxy_ms_p1_1 <- dsm(count~s(Depth, bs="ts") +
                             s(DistToCAS, bs="ts") +
                             s(SST, bs="ts") +
                             s(EKE, bs="ts") +
                             s(NPP, bs="ts"),
                      df_hn, segs, obs,
                      family=nb())
summary(dsm_nb_noxy_ms_p1_1)
## 
## Family: Negative Binomial(0.115) 
## Link function: log 
## 
## Formula:
## count ~ s(Depth, bs = "ts") + s(DistToCAS, bs = "ts") + s(SST, 
##     bs = "ts") + s(EKE, bs = "ts") + s(NPP, bs = "ts") + offset(off.set)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -20.7748     0.2281  -91.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                    edf Ref.df Chi.sq  p-value    
## s(Depth)     3.415e+00      9 37.049  < 2e-16 ***
## s(DistToCAS) 3.939e-04      9  0.000   0.5289    
## s(SST)       6.483e-05      9  0.000   0.7359    
## s(EKE)       8.659e-01      9  5.534   0.0106 *  
## s(NPP)       2.814e+00      9 26.034 1.76e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0965   Deviance explained = 40.7%
## -REML = 383.05  Scale est. = 1         n = 949

We can see in contrast to the models with s(x,y) in them we have NPP significant and with a much larger EDF. First I’ll remove DistToCAS as a covariate that has a non-significant \(p\)-value:

dsm_nb_noxy_ms_p1_2 <- dsm(count~s(Depth, bs="ts") +
                             #s(DistToCAS, bs="ts") +
                             s(SST, bs="ts") +
                             s(EKE, bs="ts") +
                             s(NPP, bs="ts"),
                      df_hn, segs, obs,
                      family=nb())
summary(dsm_nb_noxy_ms_p1_2)
## 
## Family: Negative Binomial(0.115) 
## Link function: log 
## 
## Formula:
## count ~ s(Depth, bs = "ts") + s(SST, bs = "ts") + s(EKE, bs = "ts") + 
##     s(NPP, bs = "ts") + offset(off.set)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -20.7748     0.2281  -91.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                edf Ref.df Chi.sq  p-value    
## s(Depth) 3.4147153      9 37.051  < 2e-16 ***
## s(SST)   0.0002467      9  0.000   0.7358    
## s(EKE)   0.8658841      9  5.534   0.0106 *  
## s(NPP)   2.8143394      9 26.038 1.74e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0965   Deviance explained = 40.7%
## -REML = 383.05  Scale est. = 1         n = 949

Now removing SST:

dsm_nb_noxy_ms_p1_3 <- dsm(count~s(Depth, bs="ts") +
                             #s(DistToCAS, bs="ts") +
                             #s(SST, bs="ts") +
                             s(EKE, bs="ts") +
                             s(NPP, bs="ts"),
                      df_hn, segs, obs,
                      family=nb())
summary(dsm_nb_noxy_ms_p1_3)
## 
## Family: Negative Binomial(0.115) 
## Link function: log 
## 
## Formula:
## count ~ s(Depth, bs = "ts") + s(EKE, bs = "ts") + s(NPP, bs = "ts") + 
##     offset(off.set)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -20.7748     0.2281  -91.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##             edf Ref.df Chi.sq  p-value    
## s(Depth) 3.4147      9 37.051  < 2e-16 ***
## s(EKE)   0.8659      9  5.534   0.0106 *  
## s(NPP)   2.8143      9 26.043 1.76e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0965   Deviance explained = 40.7%
## -REML = 383.04  Scale est. = 1         n = 949

Now we’re done! As we can see we have a rather different model. Let’s keep hold of this for when we look into predictions in the final practical.

As briefly mentioned in the lectures, we know that all of these covariates are related to each other in a non-linear way (so using a correlation coefficient-based cutoff may not reveal this issue).

We can look at the concurvity: the degree to which one term in the model can be explained by another (note we’re talking about the modelled smooths not the raw data here). As an example, we can easily see from the maps above that the covariates we’re using vary smoothly in space, so we could model them as a response with space (s(x,y)) as a predictor.

The vis.concurvity function in the dsm package lets us look at this by plotting a measure of concurvity between terms (on a scale from 0 to 1):

vis.concurvity(dsm_nb_xy_ms)

Darker reads indicate more concurvity, lighter colours less. We read the plot as follows: for a term on the \(x\)-axis, how much can we use that to explain each of the terms on the \(y\)-axis. So, for example s(x,y) can be used to explain almost all of the other covariate smooths very well, aside from s(EKE) (looking up the second column, dark reds). Where as each of the terms is fairly bad at explaining s(x,y) (first second row, light oranges). The para terms are for any “parametric” (i.e., non-smooth) terms in the model (if we included a factor or linear term for some reason).

We can also see from this plot that NPP and SST are really highly concurve, so we might want to be weary of these. One strategy in this case would be to include only one of them in our “base” models and compare the resulting final models after term selection.

Let’s just try that quickly here (I’ve omitted all of the output here, but commented the ordering of the removal). I’m using the same \(p\)-value rules as above.

dsm_nb_xy_ms_nosst <- dsm(count~s(x,y, bs="ts") +
                              s(Depth, bs="ts") +
                              #s(DistToCAS, bs="ts") + # 1
                              s(EKE, bs="ts"),# +
                              #s(NPP, bs="ts"), # 2
                      df_hn, segs, obs,
                      family=nb())
summary(dsm_nb_xy_ms_nosst)
## 
## Family: Negative Binomial(0.114) 
## Link function: log 
## 
## Formula:
## count ~ s(x, y, bs = "ts") + s(Depth, bs = "ts") + s(EKE, bs = "ts") + 
##     offset(off.set)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -20.7732     0.2295   -90.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##             edf Ref.df Chi.sq  p-value    
## s(x,y)   1.8637     29 19.145 3.06e-05 ***
## s(Depth) 3.4176      9 46.266  < 2e-16 ***
## s(EKE)   0.8563      9  5.171   0.0134 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0947   Deviance explained = 39.3%
## -REML = 382.76  Scale est. = 1         n = 949

Now doing the same removing NPP from the outset:

dsm_nb_xy_ms_nonpp <- dsm(count~s(x,y, bs="ts") +
                              s(Depth, bs="ts") +
                              #s(DistToCAS, bs="ts") + # 1
                              s(EKE, bs="ts"), # + 
                              #s(SST, bs="ts"), # 2
                      df_hn, segs, obs,
                      family=nb())
summary(dsm_nb_xy_ms_nonpp)
## 
## Family: Negative Binomial(0.114) 
## Link function: log 
## 
## Formula:
## count ~ s(x, y, bs = "ts") + s(Depth, bs = "ts") + s(EKE, bs = "ts") + 
##     offset(off.set)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -20.7732     0.2295   -90.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##             edf Ref.df Chi.sq  p-value    
## s(x,y)   1.8637     29 19.145 3.06e-05 ***
## s(Depth) 3.4176      9 46.266  < 2e-16 ***
## s(EKE)   0.8563      9  5.171   0.0134 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0947   Deviance explained = 39.3%
## -REML = 382.76  Scale est. = 1         n = 949

Okay, so no difference, but worth checking! This is probably more of an issue if you have terms that are more wiggly but are not significant in the model.


Selecting the response distribution

We can see how well a response distribution performs by comparing quantile-quantile plots (q-q plots). The qq.gam function can create these plots for you.

qq.gam(dsm_nb_xy_ms, asp=1, rep=100)

The rep argument gives us the grey “envelope” that allows us to determine how far away the points are from the line.

Try this out with your own models, comparing the results between two different response distributions (remember that you can change the response distribution using family= in the dsm() and our two usual options are tw() and nb()).


Comparing response distributions

First let’s fit a model using tw() as the response, I’ll again condense the term selection and comment the ordering of removal, using the same \(p\)-value rule as above.

dsm_tw_xy_ms <- dsm(count~s(x,y, bs="ts") +
                          s(Depth, bs="ts") +
                          #s(DistToCAS, bs="ts") + # 1
                          #s(SST, bs="ts") + # 2
                          s(EKE, bs="ts"),# +
                          #s(NPP, bs="ts"), # 3
                    df_hn, segs, obs,
                    family=tw())
summary(dsm_tw_xy_ms)
## 
## Family: Tweedie(p=1.279) 
## Link function: log 
## 
## Formula:
## count ~ s(x, y, bs = "ts") + s(Depth, bs = "ts") + s(EKE, bs = "ts") + 
##     offset(off.set)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -20.673      0.234  -88.33   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##             edf Ref.df     F  p-value    
## s(x,y)   1.8969     29 0.707 1.84e-05 ***
## s(Depth) 3.6949      9 5.024  < 2e-16 ***
## s(EKE)   0.8106      9 0.470   0.0216 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.105   Deviance explained = 34.9%
## -REML = 385.09  Scale est. = 4.5733    n = 949

Okay, so we’ve ended up with a model with the same smooths included. Now let’s compare quantile-quantile plots, side-by-side:

par(mfrow=c(1,2))

# set the random seed for this, see below for why
set.seed(1233)

qq.gam(dsm_tw_xy_ms, asp=1, rep=200, main="Tweedie")
qq.gam(dsm_nb_xy_ms, asp=1, rep=200, main="Negative binomial")

Note that each time you run this code you get different grey bands, as the process that generates the bands uses random sampling. For this reason I’m setting the seed so we are looking at the same picture. I think it’s fine to do this for e.g., publication provided, of course, you don’t fish through multiple seeds looking for an interesting plot!

I think from these I’m inclined to say I prefer the negative binomial, but only slightly. We’ll keep both models for comparison later.

Note I’m setting asp=1 for these plots to ensure that the aspect ratio is set. This makes the plots much easier to read.


Comparing models by AIC

As with the detection functions in practical 1, here is a quick function to generate model results tables with response distribution, smooth terms list and AIC:

# similar to yesterday, but let's also add
# in the edfs
summarize_dsm2 <- function(model){

  summ <- summary(model)

  
  dat <- data.frame(response = model$family$family,
                    AIC      = AIC(model))
  
  # take the smooth names
  smooths <- rownames(summ$s.table)
  # remove their ending bracket, add a comma
  smooths <- sub(")", ",", smooths)
  # paste in their edfs, rounded
  smooths <- paste(smooths, signif(summ$s.table[,1], 3))
  # close the bracket again
  smooths <- paste(smooths, ")", sep="")
  # paste them all together in a list into dat
  dat$terms <- paste(smooths, collapse=", ")
  
  return(dat)
}

We can make a list of the models and pass the list to the above function.


Comparing by AIC

I’ve modified the function to be a bit more fancy here. I’ll also load the models from our simple DSM fitting (the previous practical) for comparison.

load("dsms-xy.RData")
# add your models to this list!
model_list <- list(dsm_nb_xy_ms_p1_4, dsm_tw_xy_ms, dsm_nb_noxy_ms_p1_3, dsm_nb_xy, dsm_tw_xy)
# use plyr to go from list to data.frame via summarize_dsm
library(plyr)
summary_table <- ldply(model_list, summarize_dsm2)
# make the row names whatever you like
row.names(summary_table) <- c("`dsm_nb_xy_ms_p1_4` (p-value select))",
                              "`dsm_tw_xy_ms` (p-value model select)",
                              "`dsm_nb_noxy_ms_p1_3` (no xy)",
                              "`dsm_nb_xy` (xy only)",
                              "`dsm_tw_xy` (xy only)")
# sort that table by AIC
summary_table <- summary_table[order(summary_table$AIC),]
# put columns in a nicer order with AIC last
summary_table <- summary_table[, c("terms", "response", "AIC")]
# print it in a nice format
kable(summary_table, 
      caption = "Model selection table")
Model selection table
terms response AIC
dsm_nb_noxy_ms_p1_3 (no xy) s(Depth, 3.41), s(EKE, 0.866), s(NPP, 2.81) Negative Binomial(0.115) 751.1392
dsm_nb_xy_ms_p1_4 (p-value select)) s(x,y, 1.86), s(Depth, 3.42), s(EKE, 0.856) Negative Binomial(0.114) 754.0312
dsm_nb_xy (xy only) s(x,y, 13.9) Negative Binomial(0.093) 785.5534
dsm_tw_xy_ms (p-value model select) s(x,y, 1.9), s(Depth, 3.69), s(EKE, 0.811) Tweedie(p=1.279) 1225.1490
dsm_tw_xy (xy only) s(x,y, 14.5) Tweedie(p=1.292) 1249.1030

Again worth noting here that we can’t compare between the distributions using AIC this table only within distributions.


Extra credit: estimated abundance as a response

So far we have only looked at models with count as the response. Try using a detection function with observation-level covariates and use abundance.est, instead of count, as the response in the chunk below:


Fitting estimated abundance models

I’ll use the same procedure outlined above, using the df_hr_ss_size model that includes sea state and size in a hazard-rate model. I’ll do this for both Tweedie and negative binomial responses. Again I’ll condense the model fitting code and commenting as I go

First with negative binomial:

dsm_nb_xy_ms_ae <- dsm(abundance.est~s(x,y, bs="ts") +
                          s(Depth, bs="ts") +
                          #s(DistToCAS, bs="ts") + # 3
                          #s(SST, bs="ts") + # 1
                          s(EKE, bs="ts"),# +
                          #s(NPP, bs="ts"), # 2
                    df_hr_ss_size, segs, obs,
                    family=nb())
summary(dsm_nb_xy_ms_ae)
## 
## Family: Negative Binomial(0.047) 
## Link function: log 
## 
## Formula:
## abundance.est ~ s(x, y, bs = "ts") + s(Depth, bs = "ts") + s(EKE, 
##     bs = "ts") + offset(off.set)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -20.334      0.207  -98.21   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##             edf Ref.df Chi.sq  p-value    
## s(x,y)   1.3351     29 13.729 0.000151 ***
## s(Depth) 3.5027      9 54.624  < 2e-16 ***
## s(EKE)   0.7718      9  3.157 0.041693 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0669   Deviance explained = 33.1%
## -REML = 505.03  Scale est. = 1         n = 949

Note the different path that this model takes!

Then with Tweedie:

dsm_tw_xy_ms_ae <- dsm(abundance.est~s(x,y, bs="ts") +
                          s(Depth, bs="ts") +
                          #s(DistToCAS, bs="ts") + # 1
                          #s(SST, bs="ts") + # 2
                          s(EKE, bs="ts"),# +
                          #s(NPP, bs="ts"), # 3
                    df_hr_ss_size, segs, obs,
                    family=tw())
summary(dsm_tw_xy_ms_ae)
## 
## Family: Tweedie(p=1.228) 
## Link function: log 
## 
## Formula:
## abundance.est ~ s(x, y, bs = "ts") + s(Depth, bs = "ts") + s(EKE, 
##     bs = "ts") + offset(off.set)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -20.2653     0.2389  -84.82   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##             edf Ref.df     F  p-value    
## s(x,y)   1.9003     29 0.705 1.96e-05 ***
## s(Depth) 3.7719      9 4.832  < 2e-16 ***
## s(EKE)   0.8258      9 0.509   0.0178 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.118   Deviance explained = 34.9%
## -REML =  455.5  Scale est. = 8.6628    n = 949

Comparing quantile-quantile plots…

par(mfrow=c(1,2))

# set the random seed for this, see below for why
set.seed(1233)

qq.gam(dsm_tw_xy_ms_ae, asp=1, rep=200, main="Tweedie")
qq.gam(dsm_nb_xy_ms_ae, asp=1, rep=200, main="Negative binomial")

As above, there’s not much in it (maybe less?). I might slightly prefer negative binomial since the points look a bit closer (again comparing the axis scales). We’ll save both anyway for comparison.


Saving models

Now save the models that you’d like to use to check later: I recommend saving as many models as you can so you can compare the results later.

# add your models here
save(dsm_nb_xy_ms_p1_4, dsm_tw_xy_ms, dsm_nb_noxy_ms_p1_3,
     dsm_nb_xy, dsm_tw_xy, dsm_nb_xy_ms_ae, 
     dsm_tw_xy_ms_ae,
     file="dsms.RData")