/*- * Copyright (c) 2020 Mindaugas Rasiukevicius * Copyright (c) 2009-2013 The NetBSD Foundation, Inc. * All rights reserved. * * This material is based upon work partially supported by The * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * NPF packet handler. * * This is the main entry point to the NPF where packet processing happens. * There are some important synchronization rules: * * 1) Lookups into the connection database and configuration (ruleset, * tables, etc) are protected by Epoch-Based Reclamation (EBR); * * 2) The code in the critical path (protected by EBR) should generally * not block (that includes adaptive mutex acquisitions); * * 3) Where it will blocks, references should be acquired atomically, * while in the critical path, on the relevant objects. */ #ifdef _KERNEL #include __KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.49 2020/05/30 14:16:56 rmind Exp $"); #include #include #include #include #include #include #include #include #include #include #include #include #endif #include "npf_impl.h" #include "npf_conn.h" #if defined(_NPF_STANDALONE) #define m_freem(m) npf->mbufops->free(m) #define m_clear_flag(m,f) #else #define m_clear_flag(m,f) (m)->m_flags &= ~(f) #endif #ifndef INET6 #define ip6_reass_packet(x, y) ENOTSUP #endif static int npf_reassembly(npf_t *npf, npf_cache_t *npc, bool *mff) { nbuf_t *nbuf = npc->npc_nbuf; int error = EINVAL; struct mbuf *m; *mff = false; m = nbuf_head_mbuf(nbuf); if (npf_iscached(npc, NPC_IP4) && npf->ip4_reassembly) { error = ip_reass_packet(&m); } else if (npf_iscached(npc, NPC_IP6) && npf->ip6_reassembly) { error = ip6_reass_packet(&m, npc->npc_hlen); } else { /* * Reassembly is disabled: just pass the packet through * the ruleset for inspection. */ return 0; } if (error) { /* Reassembly failed; free the mbuf, clear the nbuf. */ npf_stats_inc(npf, NPF_STAT_REASSFAIL); m_freem(m); memset(nbuf, 0, sizeof(nbuf_t)); return error; } if (m == NULL) { /* More fragments should come. */ npf_stats_inc(npf, NPF_STAT_FRAGMENTS); *mff = true; return 0; } /* * Reassembly is complete, we have the final packet. * Cache again, since layer 4 data is accessible now. */ nbuf_init(npf, nbuf, m, nbuf->nb_ifp); npc->npc_info = 0; if (npf_cache_all(npc) & (NPC_IPFRAG|NPC_FMTERR)) { return EINVAL; } npf_stats_inc(npf, NPF_STAT_REASSEMBLY); return 0; } static inline bool npf_packet_bypass_tag_p(nbuf_t *nbuf) { uint32_t ntag; return nbuf_find_tag(nbuf, &ntag) == 0 && (ntag & NPF_NTAG_PASS) != 0; } /* * npfk_packet_handler: main packet handling routine for layer 3. * * Note: packet flow and inspection logic is in strict order. */ __dso_public int npfk_packet_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di) { nbuf_t nbuf; npf_cache_t npc; npf_conn_t *con; npf_rule_t *rl; npf_rproc_t *rp; int error, decision, flags; npf_match_info_t mi; bool mff; KASSERT(ifp != NULL); /* * Initialize packet information cache. * Note: it is enough to clear the info bits. */ nbuf_init(npf, &nbuf, *mp, ifp); memset(&npc, 0, sizeof(npf_cache_t)); npc.npc_ctx = npf; npc.npc_nbuf = &nbuf; mi.mi_di = di; mi.mi_rid = 0; mi.mi_retfl = 0; *mp = NULL; decision = NPF_DECISION_BLOCK; error = 0; rp = NULL; con = NULL; /* Cache everything. */ flags = npf_cache_all(&npc); /* Malformed packet, leave quickly. */ if (flags & NPC_FMTERR) { error = EINVAL; goto out; } /* Determine whether it is an IP fragment. */ if (__predict_false(flags & NPC_IPFRAG)) { /* Pass to IPv4/IPv6 reassembly mechanism. */ error = npf_reassembly(npf, &npc, &mff); if (error) { goto out; } if (mff) { /* More fragments should come. */ return 0; } } /* Just pass-through if specially tagged. */ if (npf_packet_bypass_tag_p(&nbuf)) { goto pass; } /* Inspect the list of connections (if found, acquires a reference). */ con = npf_conn_inspect(&npc, di, &error); /* If "passing" connection found - skip the ruleset inspection. */ if (con && npf_conn_pass(con, &mi, &rp)) { npf_stats_inc(npf, NPF_STAT_PASS_CONN); KASSERT(error == 0); goto pass; } if (__predict_false(error)) { if (error == ENETUNREACH) goto block; goto out; } /* Acquire the lock, inspect the ruleset using this packet. */ int slock = npf_config_read_enter(npf); npf_ruleset_t *rlset = npf_config_ruleset(npf); rl = npf_ruleset_inspect(&npc, rlset, di, NPF_LAYER_3); if (__predict_false(rl == NULL)) { const bool pass = npf_default_pass(npf); npf_config_read_exit(npf, slock); if (pass) { npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT); goto pass; } npf_stats_inc(npf, NPF_STAT_BLOCK_DEFAULT); goto block; } /* * Get the rule procedure (acquires a reference) for association * with a connection (if any) and execution. */ KASSERT(rp == NULL); rp = npf_rule_getrproc(rl); /* Conclude with the rule and release the lock. */ error = npf_rule_conclude(rl, &mi); npf_config_read_exit(npf, slock); if (error) { npf_stats_inc(npf, NPF_STAT_BLOCK_RULESET); goto block; } npf_stats_inc(npf, NPF_STAT_PASS_RULESET); /* * Establish a "pass" connection, if required. Just proceed if * connection creation fails (e.g. due to unsupported protocol). */ if ((mi.mi_retfl & NPF_RULE_STATEFUL) != 0 && !con) { con = npf_conn_establish(&npc, di, (mi.mi_retfl & NPF_RULE_GSTATEFUL) == 0); if (con) { /* * Note: the reference on the rule procedure is * transferred to the connection. It will be * released on connection destruction. */ npf_conn_setpass(con, &mi, rp); } } pass: decision = NPF_DECISION_PASS; KASSERT(error == 0); /* * Perform NAT. */ error = npf_do_nat(&npc, con, di); block: /* * Execute the rule procedure, if any is associated. * It may reverse the decision from pass to block. */ if (rp && !npf_rproc_run(&npc, rp, &mi, &decision)) { if (con) { npf_conn_release(con); } npf_rproc_release(rp); /* mbuf already freed */ return 0; } out: /* * Release the reference on a connection. Release the reference * on a rule procedure only if there was no association. */ if (con) { npf_conn_release(con); } else if (rp) { npf_rproc_release(rp); } /* Get the new mbuf pointer. */ if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) { return error ? error : ENOMEM; } /* Pass the packet if decided and there is no error. */ if (decision == NPF_DECISION_PASS && !error) { /* * XXX: Disable for now, it will be set accordingly later, * for optimisations (to reduce inspection). */ m_clear_flag(*mp, M_CANFASTFWD); return 0; } /* * Block the packet. ENETUNREACH is used to indicate blocking. * Depending on the flags and protocol, return TCP reset (RST) or * ICMP destination unreachable. */ if (mi.mi_retfl && npf_return_block(&npc, mi.mi_retfl)) { *mp = NULL; } if (!error) { error = ENETUNREACH; } if (*mp) { /* Free the mbuf chain. */ m_freem(*mp); *mp = NULL; } return error; }