\documentclass{beamer}

\usepackage[normalem]{ulem}
\usepackage{hyperref}
\usepackage{qrcode}
\usepackage{tabularx}

\newcommand{\BSD}{BSD}
\newcommand{\EuroBSDcon}{EuroBSDcon}
\newcommand{\NetBSD}{NetBSD}
\newcommand{\NetBSDcurrent}{\NetBSD-current}
\newcommand{\USB}{USB}

\title{How I learned to stop worrying and yank the \USB}
\author{Taylor R Campbell \\
  \texttt{riastradh@NetBSD.org}}
\date{\EuroBSDcon\ 2022 \\
  Vienna, Austria \\
  September 17, 2022}

\begin{document}

\frame{\titlepage}

\begin{frame}
  \frametitle{How I learned to stop worrying and yank the \USB}

  \centering

  \url{https://www.NetBSD.org/gallery/presentations/riastradh/eurobsdcon2022/opendetach.pdf}

  \vspace{\baselineskip}

  \qrcode[height=2in]{https://www.NetBSD.org/gallery/presentations/riastradh/eurobsdcon2022/opendetach.pdf}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Devices in \BSD: autoconf(9) and /dev nodes}

  \begin{itemize}
    \item autoconf(9) instances in kernel: pci0, pchb0, ppb1,
       wsdisplay0, xhci2, \dots
      \begin{itemize}
        \item Bundle of related driver state for a hardware device
        \item Organized in a tree based on hardware
        \item Discovered at boot by bus enumeration and on hotplug
           events
        \item match, attach, detach
      \end{itemize}
    \item /dev nodes (chardevs, blockdevs) for userland interface:
       /dev/uhid0, /dev/ttyU1, /dev/rsd3a, /dev/zero, \dots
      \begin{itemize}
        \item Software interface for userland (char) or file systems
           (block)
        \item State may be:
          \begin{itemize}
            \item backed by autoconf instance
            \item allocated in software: `cloning devices'
            \item stateless: /dev/zero, /dev/null, /dev/mem, \dots
          \end{itemize}
        \item Access bracketed by open and close as files
        \item open, read/write/ioctl/strategy/\dots, close
      \end{itemize}
  \end{itemize}
\end{frame}

\begin{frame}[fragile]
  \frametitle{autoconf example: ualea(4)}
\tiny
\begin{verbatim}
static int
ualea_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;

        if (usb_lookup(ualea_devs, uiaa->uiaa_vendor, uiaa->uiaa_product))
                return UMATCH_VENDOR_PRODUCT;

        return UMATCH_NONE;
}

static void
ualea_attach(device_t parent, device_t self, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;
        struct ualea_softc *sc = device_private(self);
        ...
}

static int
ualea_detach(device_t self, int flags)
{
        ...
}

CFATTACH_DECL_NEW(ualea, sizeof(struct ualea_softc),
    ualea_match, ualea_attach, ualea_detach, NULL);
\end{verbatim}
\end{frame}

\begin{frame}[fragile]
  \frametitle{cdevsw example: ulpt(4)}
\tiny
\begin{verbatim}
static int
ulptopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct ulpt_softc *sc = device_lookup_private(&ulpt_cd, ULPTUNIT(dev));

        if (sc == NULL)
                return ENXIO;
        ...
}

static int
ulptclose(dev_t dev, int flag, int mode, struct lwp *l)
{
        ...
}

static int
ulptread(dev_t dev, struct uio *uio, int flags)
{
        struct ulpt_softc *sc = device_lookup_private(&ulpt_cd, ULPTUNIT(dev));
        ...
}

...

const struct cdevsw ulpt_cdevsw = {
        .d_open = ulptopen,
        .d_close = ulptclose,
        .d_read = ulptread,
        .d_write = ulptwrite,
        ...
};
\end{verbatim}
\end{frame}

\begin{frame}[fragile]
\begin{verbatim}
pci0
  i915drmkms0
    intelfb0
      wsdisplay0 <- /dev/ttyE0
  xhci0
    usb0
      uhub0
        umass0
          scsibus0
            sd0 <- /dev/sd0a, /dev/sd0b, ...
        umass1
          scsibus1
            sd1 <- /dev/sd1a, /dev/sd1b, ...
    usb1
      uhub1
        uftdi0
          ucom0 <- /dev/ttyU0, /dev/dtyU0
  ...
\end{verbatim}
\end{frame}

\begin{frame}
  \begin{tabular}{ll}
    \textit{device\_t} & \textit{/dev node (amd64)} \\
    \\
    uhid$N$ & /dev/uhid$N$ (chr maj=66 min=$N$) \\
    \\
    ucom$N$ & /dev/ttyU$N$ (chr maj=66 min=$N$) \\
            & /dev/dtyU$N$
              (chr maj=66 min=$\mathtt{0x80000} \mathbin| N$) \\
    \\
    sd$N$ & /dev/sd$N$a (blk maj=4 min=$64N$) \\
          & /dev/sd$N$b (blk maj=4 min=$64N + 1$) \\
          & \quad\vdots \\
          & /dev/rsd$N$a (chr maj=13 min=$64N$) \\
          & /dev/rsd$N$b (chr maj=13 min=$64N + 1$) \\
          & \quad\vdots \\
    \\
    (cloning) & /dev/audio$N$
                (chr maj=42 min=$\mathtt{0x80} \mathbin| N$) \\
    \\
    (stateless) & /dev/null
  \end{tabular}
\end{frame}

\begin{frame}
  \frametitle{Easy timeline}

  \begin{enumerate}
    \item \texttt{foo\_attach} when device plugged in
    \item \texttt{foo\_open} when program opens /dev node
    \item \texttt{foo\_read}/\texttt{write}/\texttt{ioctl} when program
       does I/O on file
    \item \texttt{foo\_close} when program closes file
    \item \texttt{foo\_detach} when device unplugged after no longer in
       use
  \end{enumerate}
\end{frame}

\begin{frame}
  \frametitle{Easy timeline}

  \begin{enumerate}
    \item attach
    \item open
    \item read/write/ioctl
    \item close
    \item detach
  \end{enumerate}
\end{frame}

\begin{frame}
  \frametitle{\sout{Easy} Naive timeline}

  \begin{enumerate}
    \item attach
    \item open
    \item read/write/ioctl
    \item close
    \item detach
  \end{enumerate}
\end{frame}

\begin{frame}
  \frametitle{Complication: device yanked while open?}

  \begin{enumerate}
    \item attach
    \item open
    \item read/write/ioctl
    \item detach
    \item more read/write/ioctl
    \item close
  \end{enumerate}
\end{frame}

\begin{frame}
  \frametitle{Complication: no device to open?}

  \begin{enumerate}
    \item open \only<2->{$\Longrightarrow$ must fail}
    \item attach
    \item detach
    \item open \only<2->{$\Longrightarrow$ must fail}
  \end{enumerate}
\end{frame}

\begin{frame}
  \frametitle{Complication: device yanked in the middle of open?}

  \begin{enumerate}
    \item attach
    \item open called
    \item detach
    \item open returns
      \begin{itemize}
        \item<2-> success?
        \item<3-> failure?
        \item<4-> crash?
      \end{itemize}
  \end{enumerate}
\end{frame}

\begin{frame}
  \frametitle{Complication: concurrent open?}

  \begin{enumerate}
    \item attach
    \item
      \begin{minipage}[t]{0.5\linewidth}
        Thread 1
        \begin{enumerate}
          \item open
          \item read/write/ioctl
          \item close
        \end{enumerate}
      \end{minipage}%
      \begin{minipage}[t]{0.5\linewidth}
        Thread 2
        \begin{enumerate}
          \item open
            \begin{itemize}
              \item<2-> succeed? (multi-open?)
              \item<3-> fail? (exclusive only?)
              \item<4-> crash? (oops)
            \end{itemize}
        \end{enumerate}
      \end{minipage}%
    \item detach
  \end{enumerate}
\end{frame}

\begin{frame}
  \frametitle{Complication: concurrent open and close?}

  If opened multiple times, \texttt{struct cdevsw::d\_close} is called
   for \emph{last} close only, until next open.

  \begin{enumerate}
    \item attach
    \item T1: open
      \only<2->{$\Longrightarrow$ call \texttt{d\_open}}
    \item T2: open
      \only<3->{$\Longrightarrow$ call \texttt{d\_open} again}
    \item T1: close
      \only<4->{$\Longrightarrow$ no driver callback}
    \item T2: close
      \only<5->{$\Longrightarrow$ call \texttt{d\_close}}
    \item detach
  \end{enumerate}
\end{frame}

\begin{frame}
  \frametitle{Complication: open can fail}

  \begin{enumerate}
    \item attach
    \item open called
      \only<2->{$\Longrightarrow$ call \texttt{d\_open}}
    \item open fails
      \only<3->{$\Longrightarrow$ no driver callback---only on
         successful open}
    \item detach
  \end{enumerate}
\end{frame}

\begin{frame}
  \frametitle{Complication: concurrent open and close, but open fails?}

  \begin{enumerate}
    \item attach
    \item T1: open
      \only<2->{$\Longrightarrow$ call \texttt{d\_open}}
    \item T2: open called
      \only<3->{$\Longrightarrow$ call \texttt{d\_open} again}
    \item T1: close
      \only<4->{$\Longrightarrow$ no driver callback}
    \item T2: open fails
      \only<5->{$\Longrightarrow$ call \texttt{d\_close},
        despite failure in \emph{this thread}}
    \item detach
  \end{enumerate}
\end{frame}

\begin{frame}
  \frametitle{Detach}

  \begin{enumerate}
    \item Detach triggered by yanking removable device
    \item Must free resources allocated by attach
    \item<2-> But what if device is still open?
  \end{enumerate}
\end{frame}

\begin{frame}
  \frametitle{Clearing a road for repaving}

  How do you clear a road for repaving?

  \begin{enumerate}
    \item<2->
      \only<2>{Bulldoze it and lay rail for a tram line instead}
      \only<3>{\sout{Bulldoze it and lay rail for a tram line instead}}
      \only<4->{Close it off so no new cars can enter}
    \item<5-> If existing cars are parked, leave a note they need to move
    \item<6-> Wait for all the cars to leave
  \end{enumerate}

  \strut\only<7->{It is now safe to repave the road%
    \only<8->{ and put in a separated bike lane}.}
\end{frame}

\begin{frame}
  \frametitle{Freeing a resource in use}

  How do you free a resource that may be in use?

  \begin{enumerate}
    \item Close it off so no new users can start using it
    \item If existing users are sleeping indefinitely, wake them
    \item Wait for all the users to finish
  \end{enumerate}

  \strut It is now safe to free the resource.
\end{frame}

\begin{frame}
  \frametitle{Detaching an open device}

  How do you free resources of an autoconf instance with open device
   nodes using it?

  \begin{enumerate}
    \item Prevent new opens
    \item Interrupt pending I/O (read/write/ioctl)
    \item Wait for opens and I/O to finish
  \end{enumerate}

  It is now safe to free the resources.

  \pause Difficult---or impossible---to get right inside a driver.

  \pause Many drivers need this fixed.  Can we make it easy to fix
     them all?
\end{frame}

\begin{frame}[fragile]
  \frametitle{device\_t references}
\begin{verbatim}
dev_t dev;              // maj/min num of /dev node
device_t dv;            // autoconf instance ptr
struct foo_softc *sc;   // driver private state

dv = device_lookup(&foo_cd, FOOUNIT(dev));
if (dv == NULL)
        return ENXIO;
sc = device_private(dv);




\end{verbatim}
\end{frame}

\begin{frame}[fragile]
  \frametitle{device\_t references}
\begin{verbatim}
dev_t dev;              // maj/min num of /dev node
device_t dv;            // autoconf instance ptr
struct foo_softc *sc;   // driver private state

dv = device_lookup(&foo_cd, FOOUNIT(dev));
if (dv == NULL)
        return ENXIO;
sc = device_private(dv);

/* dv may be detached and sc freed at this point */


\end{verbatim}
\end{frame}

\begin{frame}[fragile]
  \frametitle{device\_t references}
\begin{verbatim}
dev_t dev;              // maj/min num of /dev node
device_t dv;            // autoconf instance ptr
struct foo_softc *sc;   // driver private state

dv = device_lookup_acquire(&foo_cd, FOOUNIT(dev));
if (dv == NULL)
        return ENXIO;
sc = device_private(dv);

/* dv cannot be detached nor sc freed here */

device_release(dv);
\end{verbatim}
\end{frame}

\begin{frame}[fragile]
  \frametitle{device\_t references and bdevsw/cdevsw d\_open}

\begin{verbatim}
const struct cdevsw foo_cdevsw = {
        .d_open = fooopen,
        ...
        .d_cfdriver = &foo_cd,
        .d_devtounit = dev_minor_unit,
        ...
};
\end{verbatim}
\end{frame}

\begin{frame}[fragile]
  \frametitle{device\_t references and bdevsw/cdevsw d\_open}

\begin{verbatim}
static int
fooopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        device_t dv = device_lookup(&foo_cd,
            dev_minor_unit(dev));
        struct foo_softc *sc;

        if (dv == NULL)
                return ENXIO;
        sc = device_private(dv);

        /* dv and sc stable until return */
        ...
}
\end{verbatim}
\end{frame}

\begin{frame}
  \frametitle{device\_t references and bdevsw/cdevsw d\_open}

  \begin{itemize}
    \item Minimal changes needed to drivers to make device\_lookup safe
       in d\_open:

      \begingroup
        \smallskip\advance\leftskip2em\advance\rightskip2em\relax
        Add d\_cfdriver and d\_devtounit to \makebox{struct cdevsw}.
        \par\smallskip
      \endgroup
    \item<2-> Note: d\_devtounit must match!
    \item<3-> Some prefab d\_devtounit functions:
      \begin{itemize}
        \item dev\_minor\_unit
        \item disklabel\_dev\_unit
        \item tty\_unit
      \end{itemize}
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Digression: revoke(2) and tty security}

  \begin{itemize}
    \item \BSD-specific syscall: revoke(2)
    \item<2-> On boot, getty(8) opens tty and calls login(1)
    \item<3-> On successful authentication, login(1) chowns tty to login
       user
    \item<4-> After logout, getty(8) chowns tty back to root\par
      \strut\only<5->{$\Longrightarrow$ user can't open tty
        \only<7->{\emph{anew}}}
    \item<6-> getty(8) then revokes tty\par
      \strut\only<7->{$\Longrightarrow$ user's \emph{existing} opens of
         tty cease to work}
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Detaching an open device: revoke}

  \begin{itemize}
    \item Detach function must revoke open instances before freeing
      \begin{itemize}
        \item via \texttt{vdevgone} on the device major number and
           minor number range
      \end{itemize}
    \item Forces d\_close to be called
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Closing an open file in use}

  What if read, write, or ioctl is still in progress when close
   happens?

  \pause

  \vspace{\baselineskip}
  Choices of semantics:

  \begin{description}
    \item[Linux] Driver state lingers indefinitely until all pending
       I/O completes.
    \item[\BSD] I/O is interrupted and fails immediately so driver
       state can be freed synchronously.
  \end{description}

  \pause

  Focus on \BSD\ semantics here, not merits of choice.
\end{frame}

\begin{frame}
  \frametitle{Closing an open file in use}

  Driver must:

  \begin{enumerate}
    \item Prevent new I/O operations
    \item Interrupt pending I/O operations
    \item Wait for I/O to finish
  \end{enumerate}

  It is now safe to free the driver state.
\end{frame}

\begin{frame}
  \frametitle{Closing an open file in use}

  \NetBSDcurrent\ helps with this.  Two approaches:

  \begin{itemize}
    \item Legacy drivers: d\_close only.
    \item Newer drivers: d\_cancel and d\_close.
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Legacy drivers: d\_close only}

  On close or revoke, \NetBSDcurrent\ will:

  \begin{itemize}
    \item prevent new I/O operations from starting (d\_open, d\_read,
       d\_write, \dots)
      \pause
    \item call d\_close, which must interrupt pending I/O and wait for
       it to complete.
  \end{itemize}

  \pause\vspace{\baselineskip}

  Problem: Most drivers don't wait.
  \begin{itemize}
    \item Stop-gap: after d\_close returns, \NetBSDcurrent\ will wait
       for any concurrent d\_open, d\_read, d\_write, d\_ioctl, etc.,
       before revoke(2) or vdevgone(9) returns.
  \end{itemize}

  \pause\vspace{\baselineskip}

  Note: for drivers where d\_open can hang indefinitely, such as ttys,
   d\_close must be able to interrupt hanging d\_open!
\end{frame}

\begin{frame}
  \frametitle{Newer drivers: d\_cancel and d\_close}

  On close or revoke, \NetBSDcurrent\ will:

  \begin{itemize}
    \item prevent new I/O operations from starting (d\_open, d\_read,
       d\_write, \dots)
      \pause
    \item call d\_cancel, which must interrupt I/O and return promptly
      \pause
    \item wait for any concurrent d\_open, d\_read, d\_write, d\_ioctl,
       etc., to return
      \pause
    \item call d\_close, which now has \emph{exclusive access} to this
       device (chr/blk, major, minor)
  \end{itemize}

  \pause

  This way, drivers don't need custom logic to wait for pending I/O to
   drain---generic kernel logic takes care of it.

  \pause\vspace{\baselineskip}

  Note: for drivers where d\_open can hang indefinitely, such as ttys,
   d\_cancel must be able to interrupt hanging d\_open!

  New \texttt{ttycancel} function can be used for d\_cancel in most or
   all tty drivers.
\end{frame}

\begin{frame}[fragile]
\small
\begin{verbatim}
static int
uhidread(dev_t dev, struct uio *uio, int flag)
{
        struct uhid_softc *sc =
            device_lookup_private(&uhid_cd, UHIDUNIT(dev));
        ...
        mutex_enter(&sc->sc_lock);
        while (sc->sc_q.c_cc == 0) {
                ...
                if (sc->sc_closing) {
                        mutex_exit(&sc->sc_lock);
                        return EIO;
                }
                error = cv_wait_sig(&sc->sc_cv,
                    &sc->sc_lock);
                if (error)
                        break;
        }
        ...
}
\end{verbatim}
\end{frame}

\begin{frame}[fragile]
\small
\begin{verbatim}
static int
uhidcancel(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct uhid_softc *sc =
            device_lookup_private(&uhid_cd, UHIDUNIT(dev));

        if (sc == NULL)
                return 0;

        /* Interrupt pending I/O, make it fail promptly. */
        mutex_enter(&sc->sc_lock);
        sc->sc_closing = true;
        cv_broadcast(&sc->sc_cv);
        mutex_exit(&sc->sc_lock);

        uhidev_stop(sc->sc_hdev);

        return 0;
}
\end{verbatim}
\end{frame}

\begin{frame}[fragile]
\begin{verbatim}
static int
uhid_detach(device_t self, int flags)
{
        struct uhid_softc *sc = device_private(self);
        int maj, mn;

        /* locate the major number */
        maj = cdevsw_lookup_major(&uhid_cdevsw);

        /* Forcibly close any open instances. */
        mn = device_unit(self);
        vdevgone(maj, mn, mn, VCHR);

        /* Safe to free resources now! */
        ...
}
\end{verbatim}
\end{frame}

\begin{frame}
  \frametitle{Interrupted open must restart}

  If d\_open sleeps, and d\_cancel or d\_close wakes it
   \pause (e.g., in a tty driver),
   \pause after wakeup, permissions checked before d\_open may have
    changed,
   \pause so d\_open \emph{must} return \texttt{ERESTART} to restart
    the system call and redo the permissions checks.
\end{frame}

\begin{frame}
  \frametitle{New driver contract: summary}

  Set d\_cfdriver and d\_devtounit to match device\_lookup use
   in d\_open; in exchange:
  \begin{itemize}
    \item detach prevents new d\_open from starting
    \item device\_lookup result in d\_open is stable
  \end{itemize}

  Set d\_cancel to interrupt pending I/O (including open) and return
   promptly; in exchange:
  \begin{itemize}
    \item d\_close has exclusive access to (chr/blk, maj, min) triple
       among concurrent devsw functions
    \item No further I/O (including d\_open) possible until d\_close
       returns
  \end{itemize}

  \pause
  (Lots of detailed edge cases handled behind the scenes in
   spec\_vnops.c---very hairy!)
\end{frame}

\begin{frame}
  \frametitle{Usage model}

  \begin{enumerate}
    \item attach
    \item while attached:
      \begin{enumerate}[(a)]
        \item d\_open on first open
          \begin{enumerate}[(i)]
            \item I/O: $(\text{d\_open} \mathbin| \text{d\_read} \mathbin|
              \text{d\_write} \mathbin| \text{d\_ioctl} \mathbin|
              \dots)^*$
            \item d\_cancel---then \NetBSD\ waits for existing I/O to
               finish
          \end{enumerate}
        \item d\_close on last close
      \end{enumerate}
    \item \texttt{vdevgone} returns in detach; no more I/O possible
  \end{enumerate}

  \vspace{2\baselineskip}

  (for drivers with d\_cancel)
\end{frame}

\begin{frame}
  \frametitle{Questions?}

  \centering\scalebox{10}{\rmfamily\large?}
\end{frame}

\end{document}
