/* $NetBSD: uipc_domain.c,v 1.112 2024/12/07 02:31:14 riastradh Exp $ */ /* * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_domain.c 8.3 (Berkeley) 2/14/95 */ #include __KERNEL_RCSID(0, "$NetBSD: uipc_domain.c,v 1.112 2024/12/07 02:31:14 riastradh Exp $"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DECLARE(M_SOCKADDR); MALLOC_DEFINE(M_SOCKADDR, "sockaddr", "socket endpoints"); void pffasttimo(void *); void pfslowtimo(void *); struct domainhead domains = STAILQ_HEAD_INITIALIZER(domains); static struct domain *domain_array[AF_MAX]; callout_t pffasttimo_ch, pfslowtimo_ch; /* * Current time values for fast and slow timeouts. We can use u_int * relatively safely. The fast timer will roll over in 27 years and * the slow timer in 68 years. */ u_int pfslowtimo_now; u_int pffasttimo_now; static struct sysctllog *domain_sysctllog; static void sysctl_net_setup(void); /* ensure successful linkage even without any domains in link sets */ static struct domain domain_dummy; __link_set_add_rodata(domains,domain_dummy); static void domain_init_timers(void) { callout_init(&pffasttimo_ch, CALLOUT_MPSAFE); callout_init(&pfslowtimo_ch, CALLOUT_MPSAFE); callout_reset(&pffasttimo_ch, 1, pffasttimo, NULL); callout_reset(&pfslowtimo_ch, 1, pfslowtimo, NULL); } void domaininit(bool attach) { __link_set_decl(domains, struct domain); struct domain * const * dpp; struct domain *rt_domain = NULL; sysctl_net_setup(); /* * Add all of the domains. Make sure the PF_ROUTE * domain is added last. */ if (attach) { __link_set_foreach(dpp, domains) { if (*dpp == &domain_dummy) continue; if ((*dpp)->dom_family == PF_ROUTE) rt_domain = *dpp; else domain_attach(*dpp); } if (rt_domain) domain_attach(rt_domain); domain_init_timers(); } } /* * Must be called only if domaininit has been called with false and * after all domains have been attached. */ void domaininit_post(void) { domain_init_timers(); } void domain_attach(struct domain *dp) { const struct protosw *pr; STAILQ_INSERT_TAIL(&domains, dp, dom_link); if (dp->dom_family < __arraycount(domain_array)) domain_array[dp->dom_family] = dp; if (dp->dom_init) (*dp->dom_init)(); #ifdef MBUFTRACE if (dp->dom_mowner.mo_name[0] == '\0') { strncpy(dp->dom_mowner.mo_name, dp->dom_name, sizeof(dp->dom_mowner.mo_name)); MOWNER_ATTACH(&dp->dom_mowner); } #endif for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { if (pr->pr_init) (*pr->pr_init)(); } if (max_linkhdr < 16) /* XXX */ max_linkhdr = 16; max_hdr = max_linkhdr + max_protohdr; max_datalen = MHLEN - max_hdr; } struct domain * pffinddomain(int family) { struct domain *dp; if (family < __arraycount(domain_array) && domain_array[family] != NULL) return domain_array[family]; DOMAIN_FOREACH(dp) if (dp->dom_family == family) return dp; return NULL; } const struct protosw * pffindtype(int family, int type) { struct domain *dp; const struct protosw *pr; dp = pffinddomain(family); if (dp == NULL) return NULL; for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_type && pr->pr_type == type) return pr; return NULL; } const struct protosw * pffindproto(int family, int protocol, int type) { struct domain *dp; const struct protosw *pr; const struct protosw *maybe = NULL; if (family == 0) return NULL; dp = pffinddomain(family); if (dp == NULL) return NULL; for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { if ((pr->pr_protocol == protocol) && (pr->pr_type == type)) return pr; if (type == SOCK_RAW && pr->pr_type == SOCK_RAW && pr->pr_protocol == 0 && maybe == NULL) maybe = pr; } return maybe; } void * sockaddr_addr(struct sockaddr *sa, socklen_t *slenp) { const struct domain *dom; if ((dom = pffinddomain(sa->sa_family)) == NULL || dom->dom_sockaddr_addr == NULL) return NULL; return (*dom->dom_sockaddr_addr)(sa, slenp); } const void * sockaddr_const_addr(const struct sockaddr *sa, socklen_t *slenp) { const struct domain *dom; if ((dom = pffinddomain(sa->sa_family)) == NULL || dom->dom_sockaddr_const_addr == NULL) return NULL; return (*dom->dom_sockaddr_const_addr)(sa, slenp); } const struct sockaddr * sockaddr_any_by_family(sa_family_t family) { const struct domain *dom; if ((dom = pffinddomain(family)) == NULL) return NULL; return dom->dom_sa_any; } const struct sockaddr * sockaddr_any(const struct sockaddr *sa) { return sockaddr_any_by_family(sa->sa_family); } const void * sockaddr_anyaddr(const struct sockaddr *sa, socklen_t *slenp) { const struct sockaddr *any; if ((any = sockaddr_any(sa)) == NULL) return NULL; return sockaddr_const_addr(any, slenp); } socklen_t sockaddr_getsize_by_family(sa_family_t af) { switch (af) { case AF_INET: return sizeof(struct sockaddr_in); case AF_INET6: return sizeof(struct sockaddr_in6); case AF_UNIX: return sizeof(struct sockaddr_un); case AF_LINK: return sizeof(struct sockaddr_dl); case AF_APPLETALK: return sizeof(struct sockaddr_at); default: #ifdef DIAGNOSTIC printf("%s: (%s:%u:%u) Unhandled address family=%hhu\n", __func__, curlwp->l_proc->p_comm, curlwp->l_proc->p_pid, curlwp->l_lid, af); #endif return 0; } } #ifdef DIAGNOSTIC static void sockaddr_checklen(const struct sockaddr *sa) { // Can't tell how much was allocated, if it was allocated. if (sa->sa_family == AF_LINK) return; socklen_t len = sockaddr_getsize_by_family(sa->sa_family); if (len == 0 || len == sa->sa_len) return; char buf[512]; sockaddr_format(sa, buf, sizeof(buf)); printf("%s: %p bad len af=%hhu socklen=%hhu len=%u [%s]\n", __func__, sa, sa->sa_family, sa->sa_len, (unsigned)len, buf); } #else #define sockaddr_checklen(sa) ((void)0) #endif struct sockaddr * sockaddr_alloc(sa_family_t af, socklen_t socklen, int flags) { struct sockaddr *sa; socklen_t reallen = MAX(socklen, offsetof(struct sockaddr, sa_data[0])); #ifdef DIAGNOSTIC /* * sockaddr_checklen passes sa to sockaddr_format which * requires it to be fully initialized. * * XXX This should be factored better. */ flags |= M_ZERO; #endif if ((sa = malloc(reallen, M_SOCKADDR, flags)) == NULL) return NULL; sa->sa_family = af; sa->sa_len = reallen; sockaddr_checklen(sa); return sa; } struct sockaddr * sockaddr_copy(struct sockaddr *dst, socklen_t socklen, const struct sockaddr *src) { if (__predict_false(socklen < src->sa_len)) { panic("%s: source too long, %d < %d bytes", __func__, socklen, src->sa_len); } sockaddr_checklen(src); return memcpy(dst, src, src->sa_len); } struct sockaddr * sockaddr_externalize(struct sockaddr *dst, socklen_t socklen, const struct sockaddr *src) { struct domain *dom; dom = pffinddomain(src->sa_family); if (dom != NULL && dom->dom_sockaddr_externalize != NULL) return (*dom->dom_sockaddr_externalize)(dst, socklen, src); return sockaddr_copy(dst, socklen, src); } int sockaddr_cmp(const struct sockaddr *sa1, const struct sockaddr *sa2) { int len, rc; struct domain *dom; if (sa1->sa_family != sa2->sa_family) return sa1->sa_family - sa2->sa_family; dom = pffinddomain(sa1->sa_family); if (dom != NULL && dom->dom_sockaddr_cmp != NULL) return (*dom->dom_sockaddr_cmp)(sa1, sa2); len = MIN(sa1->sa_len, sa2->sa_len); if (dom == NULL || dom->dom_sa_cmplen == 0) { if ((rc = memcmp(sa1, sa2, len)) != 0) return rc; return sa1->sa_len - sa2->sa_len; } if ((rc = memcmp((const char *)sa1 + dom->dom_sa_cmpofs, (const char *)sa2 + dom->dom_sa_cmpofs, MIN(dom->dom_sa_cmplen, len - MIN(len, dom->dom_sa_cmpofs)))) != 0) return rc; return MIN(dom->dom_sa_cmplen + dom->dom_sa_cmpofs, sa1->sa_len) - MIN(dom->dom_sa_cmplen + dom->dom_sa_cmpofs, sa2->sa_len); } struct sockaddr * sockaddr_dup(const struct sockaddr *src, int flags) { struct sockaddr *dst; if ((dst = sockaddr_alloc(src->sa_family, src->sa_len, flags)) == NULL) return NULL; return sockaddr_copy(dst, dst->sa_len, src); } void sockaddr_free(struct sockaddr *sa) { free(sa, M_SOCKADDR); } static int sun_print(char *buf, size_t len, const void *v) { const struct sockaddr_un *sun = v; size_t plen; KASSERT(sun->sun_len >= offsetof(struct sockaddr_un, sun_path[0])); plen = sun->sun_len - offsetof(struct sockaddr_un, sun_path[0]); len = MIN(len, plen); return snprintf(buf, len, "%s", sun->sun_path); } int sockaddr_format(const struct sockaddr *sa, char *buf, size_t len) { size_t plen = 0; if (sa == NULL) return strlcpy(buf, "(null)", len); switch (sa->sa_family) { case AF_LOCAL: plen = strlcpy(buf, "unix: ", len); break; case AF_INET: plen = strlcpy(buf, "inet: ", len); break; case AF_INET6: plen = strlcpy(buf, "inet6: ", len); break; case AF_LINK: plen = strlcpy(buf, "link: ", len); break; case AF_APPLETALK: plen = strlcpy(buf, "atalk: ", len); break; default: return snprintf(buf, len, "(unknown socket family %d)", (int)sa->sa_family); } buf += plen; if (plen > len) len = 0; else len -= plen; switch (sa->sa_family) { case AF_LOCAL: return sun_print(buf, len, sa); case AF_INET: return sin_print(buf, len, sa); case AF_INET6: return sin6_print(buf, len, sa); case AF_LINK: return sdl_print(buf, len, sa); case AF_APPLETALK: return sat_print(buf, len, sa); default: panic("bad family %hhu", sa->sa_family); } } /* * sysctl helper to stuff PF_LOCAL pcbs into sysctl structures */ static void sysctl_dounpcb(struct kinfo_pcb *pcb, const struct socket *so) { const bool allowaddr = get_expose_address(curproc); struct unpcb *unp = sotounpcb(so); struct sockaddr_un *un = unp->unp_addr; memset(pcb, 0, sizeof(*pcb)); pcb->ki_family = so->so_proto->pr_domain->dom_family; pcb->ki_type = so->so_proto->pr_type; pcb->ki_protocol = so->so_proto->pr_protocol; pcb->ki_pflags = unp->unp_flags; COND_SET_VALUE(pcb->ki_pcbaddr, PTRTOUINT64(unp), allowaddr); /* pcb->ki_ppcbaddr = unp has no ppcb... */ COND_SET_VALUE(pcb->ki_sockaddr, PTRTOUINT64(so), allowaddr); pcb->ki_sostate = so->so_state; /* pcb->ki_prstate = unp has no state... */ pcb->ki_rcvq = so->so_rcv.sb_cc; pcb->ki_sndq = so->so_snd.sb_cc; un = (struct sockaddr_un *)pcb->ki_spad; /* * local domain sockets may bind without having a local * endpoint. bleah! */ if (unp->unp_addr != NULL) { /* * We've added one to sun_len when allocating to * hold terminating NUL which we want here. See * makeun(). */ memcpy(un, unp->unp_addr, uimin(sizeof(pcb->ki_spad), unp->unp_addr->sun_len + 1)); } else { un->sun_len = offsetof(struct sockaddr_un, sun_path); un->sun_family = pcb->ki_family; } if (unp->unp_conn != NULL) { un = (struct sockaddr_un *)pcb->ki_dpad; if (unp->unp_conn->unp_addr != NULL) { memcpy(un, unp->unp_conn->unp_addr, uimin(sizeof(pcb->ki_dpad), unp->unp_conn->unp_addr->sun_len + 1)); } else { un->sun_len = offsetof(struct sockaddr_un, sun_path); un->sun_family = pcb->ki_family; } } pcb->ki_inode = unp->unp_ino; COND_SET_VALUE(pcb->ki_vnode, PTRTOUINT64(unp->unp_vnode), allowaddr); COND_SET_VALUE(pcb->ki_conn, PTRTOUINT64(unp->unp_conn), allowaddr); COND_SET_VALUE(pcb->ki_refs, PTRTOUINT64(unp->unp_refs), allowaddr); COND_SET_VALUE(pcb->ki_nextref, PTRTOUINT64(unp->unp_nextref), allowaddr); } static int sysctl_unpcblist(SYSCTLFN_ARGS) { struct file *fp, *np, *dfp; struct socket *so; struct kinfo_pcb pcb; char *dp; size_t len, needed, elem_size, out_size; int error, elem_count, pf, type; if (namelen == 1 && name[0] == CTL_QUERY) return sysctl_query(SYSCTLFN_CALL(rnode)); if (namelen != 4) return SET_ERROR(EINVAL); if (oldp != NULL) { len = *oldlenp; elem_size = name[2]; elem_count = name[3]; if (elem_size != sizeof(pcb)) return SET_ERROR(EINVAL); } else { len = 0; elem_size = sizeof(pcb); elem_count = INT_MAX; } error = 0; dp = oldp; out_size = elem_size; needed = 0; if (name - oname != 4) return SET_ERROR(EINVAL); pf = oname[1]; type = oname[2]; /* * allocate dummy file descriptor to make position in list. */ sysctl_unlock(); if ((dfp = fgetdummy()) == NULL) { sysctl_relock(); return SET_ERROR(ENOMEM); } /* * there's no "list" of local domain sockets, so we have * to walk the file list looking for them. :-/ */ mutex_enter(&filelist_lock); LIST_FOREACH_SAFE(fp, &filehead, f_list, np) { if (fp->f_count == 0 || fp->f_type != DTYPE_SOCKET || fp->f_socket == NULL) continue; so = fp->f_socket; if (so->so_type != type) continue; if (so->so_proto->pr_domain->dom_family != pf) continue; if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, KAUTH_REQ_NETWORK_SOCKET_CANSEE, so, NULL, NULL) != 0) continue; if (len >= elem_size && elem_count > 0) { mutex_enter(&fp->f_lock); /* * Do not add references, if the count reached 0. * Since the check above has been performed without * locking, it must be rechecked here as a concurrent * closef could have reduced it. */ if (fp->f_count == 0) { mutex_exit(&fp->f_lock); continue; } fp->f_count++; mutex_exit(&fp->f_lock); LIST_INSERT_AFTER(fp, dfp, f_list); mutex_exit(&filelist_lock); sysctl_dounpcb(&pcb, so); error = copyout(&pcb, dp, out_size); closef(fp); mutex_enter(&filelist_lock); np = LIST_NEXT(dfp, f_list); LIST_REMOVE(dfp, f_list); if (error) break; dp += elem_size; len -= elem_size; } needed += elem_size; if (elem_count > 0 && elem_count != INT_MAX) elem_count--; } mutex_exit(&filelist_lock); fputdummy(dfp); *oldlenp = needed; if (oldp == NULL) *oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb); sysctl_relock(); return error; } static void sysctl_net_setup(void) { KASSERT(domain_sysctllog == NULL); sysctl_createv(&domain_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "local", SYSCTL_DESCR("PF_LOCAL related settings"), NULL, 0, NULL, 0, CTL_NET, PF_LOCAL, CTL_EOL); sysctl_createv(&domain_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "stream", SYSCTL_DESCR("SOCK_STREAM settings"), NULL, 0, NULL, 0, CTL_NET, PF_LOCAL, SOCK_STREAM, CTL_EOL); sysctl_createv(&domain_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "seqpacket", SYSCTL_DESCR("SOCK_SEQPACKET settings"), NULL, 0, NULL, 0, CTL_NET, PF_LOCAL, SOCK_SEQPACKET, CTL_EOL); sysctl_createv(&domain_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "dgram", SYSCTL_DESCR("SOCK_DGRAM settings"), NULL, 0, NULL, 0, CTL_NET, PF_LOCAL, SOCK_DGRAM, CTL_EOL); sysctl_createv(&domain_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "pcblist", SYSCTL_DESCR("SOCK_STREAM protocol control block list"), sysctl_unpcblist, 0, NULL, 0, CTL_NET, PF_LOCAL, SOCK_STREAM, CTL_CREATE, CTL_EOL); sysctl_createv(&domain_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "pcblist", SYSCTL_DESCR("SOCK_SEQPACKET protocol control " "block list"), sysctl_unpcblist, 0, NULL, 0, CTL_NET, PF_LOCAL, SOCK_SEQPACKET, CTL_CREATE, CTL_EOL); sysctl_createv(&domain_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "pcblist", SYSCTL_DESCR("SOCK_DGRAM protocol control block list"), sysctl_unpcblist, 0, NULL, 0, CTL_NET, PF_LOCAL, SOCK_DGRAM, CTL_CREATE, CTL_EOL); } void pfctlinput(int cmd, const struct sockaddr *sa) { struct domain *dp; const struct protosw *pr; DOMAIN_FOREACH(dp) { for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { if (pr->pr_ctlinput != NULL) (*pr->pr_ctlinput)(cmd, sa, NULL); } } } void pfctlinput2(int cmd, const struct sockaddr *sa, void *ctlparam) { struct domain *dp; const struct protosw *pr; if (sa == NULL) return; DOMAIN_FOREACH(dp) { /* * the check must be made by xx_ctlinput() anyways, to * make sure we use data item pointed to by ctlparam in * correct way. the following check is made just for safety. */ if (dp->dom_family != sa->sa_family) continue; for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { if (pr->pr_ctlinput != NULL) (*pr->pr_ctlinput)(cmd, sa, ctlparam); } } } void pfslowtimo(void *arg) { struct domain *dp; const struct protosw *pr; pfslowtimo_now++; DOMAIN_FOREACH(dp) { for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_slowtimo) (*pr->pr_slowtimo)(); } callout_schedule(&pfslowtimo_ch, hz / PR_SLOWHZ); } void pffasttimo(void *arg) { struct domain *dp; const struct protosw *pr; pffasttimo_now++; DOMAIN_FOREACH(dp) { for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_fasttimo) (*pr->pr_fasttimo)(); } callout_schedule(&pffasttimo_ch, hz / PR_FASTHZ); }