From a839e463e84a02c0ea65ff61504b56a83e193078 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 13 Apr 2015 18:27:35 +0200 Subject: mac80211: Fix mac80211.h docbook comments A couple of enums in mac80211.h became structures recently, but the comments didn't follow suit, leading to errors like: Error(.//include/net/mac80211.h:367): Cannot parse enum! Documentation/DocBook/Makefile:93: recipe for target 'Documentation/DocBook/80211.xml' failed make[1]: *** [Documentation/DocBook/80211.xml] Error 1 Makefile:1361: recipe for target 'mandocs' failed make: *** [mandocs] Error 2 Fix the comments comments accordingly. Added a couple of other small comment fixes while I was there to silence other recently-added docbook warnings. Reported-by: Jim Davis Signed-off-by: Jonathan Corbet Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b4bef1152c05..38a5fd790366 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -354,7 +354,7 @@ enum ieee80211_rssi_event_data { }; /** - * enum ieee80211_rssi_event - data attached to an %RSSI_EVENT + * struct ieee80211_rssi_event - data attached to an %RSSI_EVENT * @data: See &enum ieee80211_rssi_event_data */ struct ieee80211_rssi_event { @@ -388,7 +388,7 @@ enum ieee80211_mlme_event_status { }; /** - * enum ieee80211_mlme_event - data attached to an %MLME_EVENT + * struct ieee80211_mlme_event - data attached to an %MLME_EVENT * @data: See &enum ieee80211_mlme_event_data * @status: See &enum ieee80211_mlme_event_status * @reason: the reason code if applicable @@ -401,9 +401,10 @@ struct ieee80211_mlme_event { /** * struct ieee80211_event - event to be sent to the driver - * @type The event itself. See &enum ieee80211_event_type. + * @type: The event itself. See &enum ieee80211_event_type. * @rssi: relevant if &type is %RSSI_EVENT * @mlme: relevant if &type is %AUTH_EVENT + * @u: union holding the above two fields */ struct ieee80211_event { enum ieee80211_event_type type; -- cgit v1.2.3 From 17c18bf880b2464851e5a2bca86521affc46c97e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 21 Mar 2015 15:25:43 +0100 Subject: mac80211: add TX fastpath In order to speed up mac80211's TX path, add the "fast-xmit" cache that will cache the data frame 802.11 header and other data to be able to build the frame more quickly. This cache is rebuilt when external triggers imply changes, but a lot of the checks done per packet today are simplified away to the check for the cache. There's also a more detailed description in the code. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 +- net/mac80211/cfg.c | 9 +- net/mac80211/chan.c | 6 + net/mac80211/ieee80211_i.h | 5 + net/mac80211/key.c | 2 + net/mac80211/rx.c | 2 + net/mac80211/sta_info.c | 6 + net/mac80211/sta_info.h | 27 +++ net/mac80211/tx.c | 420 ++++++++++++++++++++++++++++++++++++++++++++- 9 files changed, 480 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 38a5fd790366..9001bd685b1e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1796,6 +1796,10 @@ struct ieee80211_txq { * the driver returns 1. This also forces the driver to advertise its * supported cipher suites. * + * @IEEE80211_HW_SUPPORT_FAST_XMIT: The driver/hardware supports fast-xmit, + * this currently requires only the ability to calculate the duration + * for frames. + * * @IEEE80211_HW_QUEUE_CONTROL: The driver wants to control per-interface * queue mapping in order to use different queues (not just one per AC) * for different virtual interfaces. See the doc section on HW queue @@ -1844,7 +1848,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_WANT_MONITOR_VIF = 1<<14, IEEE80211_HW_NO_AUTO_VIF = 1<<15, IEEE80211_HW_SW_CRYPTO_CONTROL = 1<<16, - /* free slots */ + IEEE80211_HW_SUPPORT_FAST_XMIT = 1<<17, IEEE80211_HW_REPORTS_TX_ACK_STATUS = 1<<18, IEEE80211_HW_CONNECTION_MONITOR = 1<<19, IEEE80211_HW_QUEUE_CONTROL = 1<<20, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 265e42721a66..4aa5e893cbaa 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -137,6 +137,9 @@ static int ieee80211_set_noack_map(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); sdata->noack_map = noack_map; + + ieee80211_check_fast_xmit_iface(sdata); + return 0; } @@ -2099,10 +2102,14 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) int err; if (changed & WIPHY_PARAM_FRAG_THRESHOLD) { + ieee80211_check_fast_xmit_all(local); + err = drv_set_frag_threshold(local, wiphy->frag_threshold); - if (err) + if (err) { + ieee80211_check_fast_xmit_all(local); return err; + } } if ((changed & WIPHY_PARAM_COVERAGE_CLASS) || diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 5bcd4e5589d3..7e9b62475400 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -664,6 +664,8 @@ out: ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); + ieee80211_check_fast_xmit_iface(sdata); + return ret; } @@ -1030,6 +1032,8 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata) if (sdata->vif.type == NL80211_IFTYPE_AP) __ieee80211_vif_copy_chanctx_to_vlans(sdata, false); + ieee80211_check_fast_xmit_iface(sdata); + if (ieee80211_chanctx_refcount(local, old_ctx) == 0) ieee80211_free_chanctx(local, old_ctx); @@ -1376,6 +1380,8 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) __ieee80211_vif_copy_chanctx_to_vlans(sdata, false); + ieee80211_check_fast_xmit_iface(sdata); + sdata->radar_required = sdata->reserved_radar_required; if (sdata->vif.bss_conf.chandef.width != diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ab46ab4a7249..556051f68ad7 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1651,6 +1651,11 @@ struct sk_buff * ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 info_flags); +void ieee80211_check_fast_xmit(struct sta_info *sta); +void ieee80211_check_fast_xmit_all(struct ieee80211_local *local); +void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata); +void ieee80211_clear_fast_xmit(struct sta_info *sta); + /* HT */ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_ht_cap *ht_cap); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 2291cd730091..3e0b814f4db3 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -229,6 +229,7 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, if (uni) { rcu_assign_pointer(sdata->default_unicast_key, key); + ieee80211_check_fast_xmit_iface(sdata); drv_set_default_unicast_key(sdata->local, sdata, idx); } @@ -298,6 +299,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, if (pairwise) { rcu_assign_pointer(sta->ptk[idx], new); sta->ptk_idx = idx; + ieee80211_check_fast_xmit(sta); } else { rcu_assign_pointer(sta->gtk[idx], new); sta->gtk_idx = idx; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 260eed45b6d2..6e3b564b6dea 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1200,6 +1200,8 @@ static void sta_ps_start(struct sta_info *sta) ps_dbg(sdata, "STA %pM aid %d enters power save mode\n", sta->sta.addr, sta->sta.aid); + ieee80211_clear_fast_xmit(sta); + if (!sta->sta.txq[0]) return; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 0800e02cce05..737730abba6d 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1201,6 +1201,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) ps_dbg(sdata, "STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n", sta->sta.addr, sta->sta.aid, filtered, buffered); + + ieee80211_check_fast_xmit(sta); } static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, @@ -1599,6 +1601,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, if (block) { set_sta_flag(sta, WLAN_STA_PS_DRIVER); + ieee80211_clear_fast_xmit(sta); return; } @@ -1616,6 +1619,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, ieee80211_queue_work(hw, &sta->drv_deliver_wk); } else { clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + ieee80211_check_fast_xmit(sta); } } EXPORT_SYMBOL(ieee80211_sta_block_awake); @@ -1720,6 +1724,7 @@ int sta_info_move_state(struct sta_info *sta, !sta->sdata->u.vlan.sta)) atomic_dec(&sta->sdata->bss->num_mcast_sta); clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); + ieee80211_clear_fast_xmit(sta); } break; case IEEE80211_STA_AUTHORIZED: @@ -1729,6 +1734,7 @@ int sta_info_move_state(struct sta_info *sta, !sta->sdata->u.vlan.sta)) atomic_inc(&sta->sdata->bss->num_mcast_sta); set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); + ieee80211_check_fast_xmit(sta); } break; default: diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index e4c4f871ffee..0602363ff63b 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -241,6 +241,30 @@ struct sta_ampdu_mlme { /* Value to indicate no TID reservation */ #define IEEE80211_TID_UNRESERVED 0xff +/** + * struct ieee80211_fast_tx - TX fastpath information + * @key: key to use for hw crypto + * @hdr: the 802.11 header to put with the frame + * @hdr_len: actual 802.11 header length + * @sa_offs: offset of the SA + * @da_offs: offset of the DA + * @pn_offs: offset where to put PN for crypto (or 0 if not needed) + * @band: band this will be transmitted on, for tx_info + * @rcu_head: RCU head to free this struct + * + * Try to keep this struct small so it fits into a single cacheline. + */ +struct ieee80211_fast_tx { + struct ieee80211_key *key; + u8 hdr[30 + 2 + IEEE80211_CCMP_HDR_LEN + + sizeof(rfc1042_header)]; + u8 hdr_len; + u8 sa_offs, da_offs, pn_offs; + u8 band; + + struct rcu_head rcu_head; +}; + /** * struct sta_info - STA information * @@ -339,6 +363,7 @@ struct sta_ampdu_mlme { * using IEEE80211_NUM_TID entry for non-QoS frames * @rx_msdu: MSDUs received from this station, using IEEE80211_NUM_TID * entry for non-QoS frames + * @fast_tx: TX fastpath information */ struct sta_info { /* General information, mostly static */ @@ -356,6 +381,8 @@ struct sta_info { spinlock_t rate_ctrl_lock; spinlock_t lock; + struct ieee80211_fast_tx __rcu *fast_tx; + struct work_struct drv_deliver_wk; u16 listen_interval; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 667111ee6a20..160e1927323d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1600,7 +1600,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, if (skb_cloned(skb) && (!(local->hw.flags & IEEE80211_HW_SUPPORTS_CLONED_SKBS) || !skb_clone_writable(skb, ETH_HLEN) || - sdata->crypto_tx_tailroom_needed_cnt)) + (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt))) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); @@ -2387,6 +2387,414 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, return ERR_PTR(ret); } +/* + * fast-xmit overview + * + * The core idea of this fast-xmit is to remove per-packet checks by checking + * them out of band. ieee80211_check_fast_xmit() implements the out-of-band + * checks that are needed to get the sta->fast_tx pointer assigned, after which + * much less work can be done per packet. For example, fragmentation must be + * disabled or the fast_tx pointer will not be set. All the conditions are seen + * in the code here. + * + * Once assigned, the fast_tx data structure also caches the per-packet 802.11 + * header and other data to aid packet processing in ieee80211_xmit_fast(). + * + * The most difficult part of this is that when any of these assumptions + * change, an external trigger (i.e. a call to ieee80211_clear_fast_xmit(), + * ieee80211_check_fast_xmit() or friends) is required to reset the data, + * since the per-packet code no longer checks the conditions. This is reflected + * by the calls to these functions throughout the rest of the code, and must be + * maintained if any of the TX path checks change. + */ + +void ieee80211_check_fast_xmit(struct sta_info *sta) +{ + struct ieee80211_fast_tx build = {}, *fast_tx = NULL, *old; + struct ieee80211_local *local = sta->local; + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_hdr *hdr = (void *)build.hdr; + struct ieee80211_chanctx_conf *chanctx_conf; + __le16 fc; + + if (!(local->hw.flags & IEEE80211_HW_SUPPORT_FAST_XMIT)) + return; + + /* Locking here protects both the pointer itself, and against concurrent + * invocations winning data access races to, e.g., the key pointer that + * is used. + * Without it, the invocation of this function right after the key + * pointer changes wouldn't be sufficient, as another CPU could access + * the pointer, then stall, and then do the cache update after the CPU + * that invalidated the key. + * With the locking, such scenarios cannot happen as the check for the + * key and the fast-tx assignment are done atomically, so the CPU that + * modifies the key will either wait or other one will see the key + * cleared/changed already. + */ + spin_lock_bh(&sta->lock); + if (local->hw.flags & IEEE80211_HW_SUPPORTS_PS && + !(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) && + sdata->vif.type == NL80211_IFTYPE_STATION) + goto out; + + if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + goto out; + + if (test_sta_flag(sta, WLAN_STA_PS_STA) || + test_sta_flag(sta, WLAN_STA_PS_DRIVER) || + test_sta_flag(sta, WLAN_STA_PS_DELIVER)) + goto out; + + if (sdata->noack_map) + goto out; + + /* fast-xmit doesn't handle fragmentation at all */ + if (local->hw.wiphy->frag_threshold != (u32)-1) + goto out; + + rcu_read_lock(); + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + if (!chanctx_conf) { + rcu_read_unlock(); + goto out; + } + build.band = chanctx_conf->def.chan->band; + rcu_read_unlock(); + + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); + + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + /* DA SA BSSID */ + build.da_offs = offsetof(struct ieee80211_hdr, addr1); + build.sa_offs = offsetof(struct ieee80211_hdr, addr2); + memcpy(hdr->addr3, sdata->u.mgd.bssid, ETH_ALEN); + build.hdr_len = 24; + break; + } + + if (sdata->u.mgd.use_4addr) { + /* non-regular ethertype cannot use the fastpath */ + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | + IEEE80211_FCTL_TODS); + /* RA TA DA SA */ + memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + build.da_offs = offsetof(struct ieee80211_hdr, addr3); + build.sa_offs = offsetof(struct ieee80211_hdr, addr4); + build.hdr_len = 30; + break; + } + fc |= cpu_to_le16(IEEE80211_FCTL_TODS); + /* BSSID SA DA */ + memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN); + build.da_offs = offsetof(struct ieee80211_hdr, addr3); + build.sa_offs = offsetof(struct ieee80211_hdr, addr2); + build.hdr_len = 24; + break; + case NL80211_IFTYPE_AP_VLAN: + if (sdata->wdev.use_4addr) { + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | + IEEE80211_FCTL_TODS); + /* RA TA DA SA */ + memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + build.da_offs = offsetof(struct ieee80211_hdr, addr3); + build.sa_offs = offsetof(struct ieee80211_hdr, addr4); + build.hdr_len = 30; + break; + } + /* fall through */ + case NL80211_IFTYPE_AP: + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); + /* DA BSSID SA */ + build.da_offs = offsetof(struct ieee80211_hdr, addr1); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + build.sa_offs = offsetof(struct ieee80211_hdr, addr3); + build.hdr_len = 24; + break; + default: + /* not handled on fast-xmit */ + goto out; + } + + if (sta->sta.wme) { + build.hdr_len += 2; + fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA); + } + + /* We store the key here so there's no point in using rcu_dereference() + * but that's fine because the code that changes the pointers will call + * this function after doing so. For a single CPU that would be enough, + * for multiple see the comment above. + */ + build.key = rcu_access_pointer(sta->ptk[sta->ptk_idx]); + if (!build.key) + build.key = rcu_access_pointer(sdata->default_unicast_key); + if (build.key) { + bool gen_iv, iv_spc; + + gen_iv = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV; + iv_spc = build.key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE; + + /* don't handle software crypto */ + if (!(build.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + goto out; + + switch (build.key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + /* add fixed key ID */ + if (gen_iv) { + (build.hdr + build.hdr_len)[3] = + 0x20 | (build.key->conf.keyidx << 6); + build.pn_offs = build.hdr_len; + } + if (gen_iv || iv_spc) + build.hdr_len += IEEE80211_CCMP_HDR_LEN; + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + /* add fixed key ID */ + if (gen_iv) { + (build.hdr + build.hdr_len)[3] = + 0x20 | (build.key->conf.keyidx << 6); + build.pn_offs = build.hdr_len; + } + if (gen_iv || iv_spc) + build.hdr_len += IEEE80211_GCMP_HDR_LEN; + break; + default: + /* don't do fast-xmit for these ciphers (yet) */ + goto out; + } + + fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + } + + hdr->frame_control = fc; + + memcpy(build.hdr + build.hdr_len, + rfc1042_header, sizeof(rfc1042_header)); + build.hdr_len += sizeof(rfc1042_header); + + fast_tx = kmemdup(&build, sizeof(build), GFP_ATOMIC); + /* if the kmemdup fails, continue w/o fast_tx */ + if (!fast_tx) + goto out; + + out: + /* we might have raced against another call to this function */ + old = rcu_dereference_protected(sta->fast_tx, + lockdep_is_held(&sta->lock)); + rcu_assign_pointer(sta->fast_tx, fast_tx); + if (old) + kfree_rcu(old, rcu_head); + spin_unlock_bh(&sta->lock); +} + +void ieee80211_check_fast_xmit_all(struct ieee80211_local *local) +{ + struct sta_info *sta; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &local->sta_list, list) + ieee80211_check_fast_xmit(sta); + rcu_read_unlock(); +} + +void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + rcu_read_lock(); + + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sdata != sta->sdata && + (!sta->sdata->bss || sta->sdata->bss != sdata->bss)) + continue; + ieee80211_check_fast_xmit(sta); + } + + rcu_read_unlock(); +} + +void ieee80211_clear_fast_xmit(struct sta_info *sta) +{ + struct ieee80211_fast_tx *fast_tx; + + spin_lock_bh(&sta->lock); + fast_tx = rcu_dereference_protected(sta->fast_tx, + lockdep_is_held(&sta->lock)); + RCU_INIT_POINTER(sta->fast_tx, NULL); + spin_unlock_bh(&sta->lock); + + if (fast_tx) + kfree_rcu(fast_tx, rcu_head); +} + +static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, + struct net_device *dev, struct sta_info *sta, + struct ieee80211_fast_tx *fast_tx, + struct sk_buff *skb) +{ + struct ieee80211_local *local = sdata->local; + u16 ethertype = (skb->data[12] << 8) | skb->data[13]; + int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2); + int hw_headroom = sdata->local->hw.extra_tx_headroom; + struct ethhdr eth; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *)fast_tx->hdr; + struct ieee80211_tx_data tx; + ieee80211_tx_result r; + struct tid_ampdu_tx *tid_tx = NULL; + u8 tid = IEEE80211_NUM_TIDS; + + /* control port protocol needs a lot of special handling */ + if (cpu_to_be16(ethertype) == sdata->control_port_protocol) + return false; + + /* only RFC 1042 SNAP */ + if (ethertype < ETH_P_802_3_MIN) + return false; + + /* don't handle TX status request here either */ + if (skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) + return false; + + if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); + if (tid_tx && + !test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) + return false; + } + + /* after this point (skb is modified) we cannot return false */ + + if (skb_shared(skb)) { + struct sk_buff *tmp_skb = skb; + + skb = skb_clone(skb, GFP_ATOMIC); + kfree_skb(tmp_skb); + + if (!skb) + return true; + } + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len + extra_head; + dev->trans_start = jiffies; + + /* will not be crypto-handled beyond what we do here, so use false + * as the may-encrypt argument for the resize to not account for + * more room than we already have in 'extra_head' + */ + if (unlikely(ieee80211_skb_resize(sdata, skb, + max_t(int, extra_head + hw_headroom - + skb_headroom(skb), 0), + false))) { + kfree_skb(skb); + return true; + } + + memcpy(ð, skb->data, ETH_HLEN - 2); + hdr = (void *)skb_push(skb, extra_head); + memcpy(skb->data, fast_tx->hdr, fast_tx->hdr_len); + memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN); + memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN); + + memset(info, 0, sizeof(*info)); + info->band = fast_tx->band; + info->control.vif = &sdata->vif; + info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT | + IEEE80211_TX_CTL_DONTFRAG | + (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0); + + if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { + *ieee80211_get_qos_ctl(hdr) = tid; + hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); + } else { + info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; + hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number); + sdata->sequence_number += 0x10; + } + + sta->tx_msdu[tid]++; + + info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; + + __skb_queue_head_init(&tx.skbs); + + tx.flags = IEEE80211_TX_UNICAST; + tx.local = local; + tx.sdata = sdata; + tx.sta = sta; + tx.key = fast_tx->key; + + if (fast_tx->key) + info->control.hw_key = &fast_tx->key->conf; + + if (!(local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) { + tx.skb = skb; + r = ieee80211_tx_h_rate_ctrl(&tx); + skb = tx.skb; + tx.skb = NULL; + + if (r != TX_CONTINUE) { + if (r != TX_QUEUED) + kfree_skb(skb); + return true; + } + } + + /* statistics normally done by ieee80211_tx_h_stats (but that + * has to consider fragmentation, so is more complex) + */ + sta->tx_fragments++; + sta->tx_bytes[skb_get_queue_mapping(skb)] += skb->len; + sta->tx_packets[skb_get_queue_mapping(skb)]++; + + if (fast_tx->pn_offs) { + u64 pn; + u8 *crypto_hdr = skb->data + fast_tx->pn_offs; + + switch (fast_tx->key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + pn = atomic64_inc_return(&fast_tx->key->u.ccmp.tx_pn); + crypto_hdr[0] = pn; + crypto_hdr[1] = pn >> 8; + crypto_hdr[4] = pn >> 16; + crypto_hdr[5] = pn >> 24; + crypto_hdr[6] = pn >> 32; + crypto_hdr[7] = pn >> 40; + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + pn = atomic64_inc_return(&fast_tx->key->u.gcmp.tx_pn); + crypto_hdr[0] = pn; + crypto_hdr[1] = pn >> 8; + crypto_hdr[4] = pn >> 16; + crypto_hdr[5] = pn >> 24; + crypto_hdr[6] = pn >> 32; + crypto_hdr[7] = pn >> 40; + break; + } + } + + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, u.ap); + + __skb_queue_tail(&tx.skbs, skb); + ieee80211_tx_frags(local, &sdata->vif, &sta->sta, &tx.skbs, false); + return true; +} + void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags) @@ -2406,6 +2814,16 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, goto out; } + if (!IS_ERR_OR_NULL(sta)) { + struct ieee80211_fast_tx *fast_tx; + + fast_tx = rcu_dereference(sta->fast_tx); + + if (fast_tx && + ieee80211_xmit_fast(sdata, dev, sta, fast_tx, skb)) + goto out; + } + skb = ieee80211_build_hdr(sdata, skb, info_flags, sta); if (IS_ERR(skb)) goto out; -- cgit v1.2.3 From 680a0daba74fed0bf30530c9b3e7e706cf29855f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 13 Apr 2015 16:58:25 +0200 Subject: mac80211: allow drivers to support S/G If drivers want to support S/G (really just gather DMA on TX) then we can now easily support this on the fast-xmit path since it just needs to write to the ethernet header (and already has a check for that being possible.) However, disallow this on the regular TX path (which has to handle fragmentation, software crypto, etc.) by calling skb_linearize(). Also allow the related HIGHDMA since that's not interesting to the code in mac80211 at all anyway. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++-- net/mac80211/main.c | 2 +- net/mac80211/tx.c | 6 ++++++ 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9001bd685b1e..0af7464ef57b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1942,8 +1942,8 @@ enum ieee80211_hw_flags { * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_* values. * * @netdev_features: netdev features to be set in each netdev created - * from this HW. Note only HW checksum features are currently - * compatible with mac80211. Other feature bits will be rejected. + * from this HW. Note that not all features are usable with mac80211, + * other features will be rejected during HW registration. * * @uapsd_queues: This bitmap is included in (re)association frame to indicate * for each access category if it is uAPSD trigger-enabled and delivery- diff --git a/net/mac80211/main.c b/net/mac80211/main.c index df3051d96aff..6dd6dd47a9df 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -840,7 +840,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) /* Only HW csum features are currently compatible with mac80211 */ feature_whitelist = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_HW_CSUM; + NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA; if (WARN_ON(hw->netdev_features & ~feature_whitelist)) return -EINVAL; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 20a90b16eb4d..3a421a04cbec 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2864,6 +2864,12 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, goto out; } + /* we cannot process non-linear frames on this path */ + if (skb_linearize(skb)) { + kfree_skb(skb); + goto out; + } + /* the frame could be fragmented, software-encrypted, and other things * so we cannot really handle checksum offload with it - fix it up in * software before we handle anything else. -- cgit v1.2.3 From df1404650ccbfeb76a84f301f22316be0d00a864 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 22 Apr 2015 14:40:58 +0200 Subject: mac80211: remove support for IFF_PROMISC This support is essentially useless as typically networks are encrypted, frames will be filtered by hardware, and rate scaling will be done with the intended recipient in mind. For real monitoring of the network, the monitor mode support should be used instead. Removing it removes a lot of corner cases. Signed-off-by: Johannes Berg --- drivers/net/wireless/adm8211.c | 7 +---- drivers/net/wireless/at76c50x-usb.h | 2 +- drivers/net/wireless/ath/ar5523/ar5523.c | 3 +- drivers/net/wireless/ath/ath10k/mac.c | 4 +-- drivers/net/wireless/ath/ath5k/ath5k.h | 1 - drivers/net/wireless/ath/ath5k/mac80211-ops.c | 15 ++-------- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 3 +- drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 9 +----- drivers/net/wireless/ath/ath9k/main.c | 3 +- drivers/net/wireless/ath/ath9k/recv.c | 5 ---- drivers/net/wireless/ath/carl9170/fw.c | 3 +- drivers/net/wireless/ath/carl9170/main.c | 7 ++--- drivers/net/wireless/b43/main.c | 8 ++---- drivers/net/wireless/b43legacy/main.c | 8 ++---- .../net/wireless/brcm80211/brcmsmac/mac80211_if.c | 5 +--- drivers/net/wireless/brcm80211/brcmsmac/main.c | 2 +- drivers/net/wireless/cw1200/sta.c | 10 ++----- drivers/net/wireless/iwlegacy/3945-mac.c | 4 +-- drivers/net/wireless/iwlegacy/4965-mac.c | 4 +-- drivers/net/wireless/iwlwifi/dvm/mac80211.c | 4 +-- drivers/net/wireless/libertas_tf/main.c | 7 ++--- drivers/net/wireless/mac80211_hwsim.c | 2 -- drivers/net/wireless/mwl8k.c | 2 +- drivers/net/wireless/p54/fwio.c | 3 +- drivers/net/wireless/p54/main.c | 6 ++-- drivers/net/wireless/rt2x00/rt2400pci.c | 4 +-- drivers/net/wireless/rt2x00/rt2500pci.c | 4 +-- drivers/net/wireless/rt2x00/rt2500usb.c | 4 +-- drivers/net/wireless/rt2x00/rt2800lib.c | 3 +- drivers/net/wireless/rt2x00/rt2x00mac.c | 6 +--- drivers/net/wireless/rt2x00/rt61pci.c | 4 +-- drivers/net/wireless/rt2x00/rt73usb.c | 4 +-- drivers/net/wireless/rtlwifi/core.h | 3 +- drivers/net/wireless/ti/wl1251/main.c | 9 ++---- drivers/net/wireless/ti/wlcore/main.c | 3 +- drivers/net/wireless/zd1211rw/zd_mac.c | 4 +-- drivers/staging/vt6655/device_main.c | 11 +------- drivers/staging/vt6656/main_usb.c | 11 +------- include/net/mac80211.h | 9 ++---- net/mac80211/ieee80211_i.h | 6 ++-- net/mac80211/iface.c | 19 ++----------- net/mac80211/main.c | 3 -- net/mac80211/rx.c | 33 ++++++---------------- 43 files changed, 63 insertions(+), 204 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c index f07a61899545..413528295d72 100644 --- a/drivers/net/wireless/adm8211.c +++ b/drivers/net/wireless/adm8211.c @@ -1353,12 +1353,7 @@ static void adm8211_configure_filter(struct ieee80211_hw *dev, new_flags = 0; - if (*total_flags & FIF_PROMISC_IN_BSS) { - new_flags |= FIF_PROMISC_IN_BSS; - priv->nar |= ADM8211_NAR_PR; - priv->nar &= ~ADM8211_NAR_MM; - mc_filter[1] = mc_filter[0] = ~0; - } else if (*total_flags & FIF_ALLMULTI || multicast == ~(0ULL)) { + if (*total_flags & FIF_ALLMULTI || multicast == ~(0ULL)) { new_flags |= FIF_ALLMULTI; priv->nar &= ~ADM8211_NAR_PR; priv->nar |= ADM8211_NAR_MM; diff --git a/drivers/net/wireless/at76c50x-usb.h b/drivers/net/wireless/at76c50x-usb.h index 55090a38ac95..ae03271f878e 100644 --- a/drivers/net/wireless/at76c50x-usb.h +++ b/drivers/net/wireless/at76c50x-usb.h @@ -447,7 +447,7 @@ struct at76_priv { int mac80211_registered; }; -#define AT76_SUPPORTED_FILTERS FIF_PROMISC_IN_BSS +#define AT76_SUPPORTED_FILTERS 0 #define SCAN_POLL_INTERVAL (HZ / 4) diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 5147ebe4cd05..14937cbeca56 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -1319,8 +1319,7 @@ out_unlock: } -#define AR5523_SUPPORTED_FILTERS (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ +#define AR5523_SUPPORTED_FILTERS (FIF_ALLMULTI | \ FIF_FCSFAIL | \ FIF_OTHER_BSS) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 4120fe5fc7bc..fcd08b2f8d26 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -773,7 +773,6 @@ static int ath10k_monitor_recalc(struct ath10k *ar) lockdep_assert_held(&ar->conf_mutex); should_start = ar->monitor || - ar->filter_flags & FIF_PROMISC_IN_BSS || test_bit(ATH10K_CAC_RUNNING, &ar->dev_flags); ath10k_dbg(ar, ATH10K_DBG_MAC, @@ -3493,8 +3492,7 @@ static void ath10k_remove_interface(struct ieee80211_hw *hw, * FIXME: Has to be verified. */ #define SUPPORTED_FILTERS \ - (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ + (FIF_ALLMULTI | \ FIF_CONTROL | \ FIF_PSPOLL | \ FIF_OTHER_BSS | \ diff --git a/drivers/net/wireless/ath/ath5k/ath5k.h b/drivers/net/wireless/ath/ath5k/ath5k.h index 7ca0d6f930fd..e22b0e778927 100644 --- a/drivers/net/wireless/ath/ath5k/ath5k.h +++ b/drivers/net/wireless/ath/ath5k/ath5k.h @@ -1280,7 +1280,6 @@ struct ath5k_hw { DECLARE_BITMAP(status, 4); #define ATH_STAT_INVALID 0 /* disable hardware accesses */ -#define ATH_STAT_PROMISC 1 #define ATH_STAT_LEDSOFT 2 /* enable LED gpio status */ #define ATH_STAT_STARTED 3 /* opened & irqs enabled */ #define ATH_STAT_RESET 4 /* hw reset */ diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c index 3b4a6463d87a..0210630972d6 100644 --- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c +++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c @@ -369,7 +369,7 @@ ath5k_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, unsigned int *new_flags, u64 multicast) { #define SUPPORTED_FIF_FLAGS \ - (FIF_PROMISC_IN_BSS | FIF_ALLMULTI | FIF_FCSFAIL | \ + (FIF_ALLMULTI | FIF_FCSFAIL | \ FIF_PLCPFAIL | FIF_CONTROL | FIF_OTHER_BSS | \ FIF_BCN_PRBRESP_PROMISC) @@ -393,16 +393,6 @@ ath5k_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, (AR5K_RX_FILTER_UCAST | AR5K_RX_FILTER_BCAST | AR5K_RX_FILTER_MCAST); - if (changed_flags & (FIF_PROMISC_IN_BSS | FIF_OTHER_BSS)) { - if (*new_flags & FIF_PROMISC_IN_BSS) - __set_bit(ATH_STAT_PROMISC, ah->status); - else - __clear_bit(ATH_STAT_PROMISC, ah->status); - } - - if (test_bit(ATH_STAT_PROMISC, ah->status)) - rfilt |= AR5K_RX_FILTER_PROM; - /* Note, AR5K_RX_FILTER_MCAST is already enabled */ if (*new_flags & FIF_ALLMULTI) { mfilt[0] = ~0; @@ -418,8 +408,7 @@ ath5k_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, if ((*new_flags & FIF_BCN_PRBRESP_PROMISC) || (ah->nvifs > 1)) rfilt |= AR5K_RX_FILTER_BEACON; - /* FIF_CONTROL doc says that if FIF_PROMISC_IN_BSS is not - * set we should only pass on control frames for this + /* FIF_CONTROL doc says we should only pass on control frames for this * station. This needs testing. I believe right now this * enables *all* control frames, which is OK.. but * but we should see if we can improve on granularity */ diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index 564923c0df87..b71f3072fd9a 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -1238,8 +1238,7 @@ out: } #define SUPPORTED_FILTERS \ - (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ + (FIF_ALLMULTI | \ FIF_CONTROL | \ FIF_PSPOLL | \ FIF_OTHER_BSS | \ diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index a0f58e2aa553..cc9648f844ae 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -872,14 +872,7 @@ u32 ath9k_htc_calcrxfilter(struct ath9k_htc_priv *priv) if (priv->rxfilter & FIF_PROBE_REQ) rfilt |= ATH9K_RX_FILTER_PROBEREQ; - /* - * Set promiscuous mode when FIF_PROMISC_IN_BSS is enabled for station - * mode interface or when in monitor mode. AP mode does not need this - * since it receives all in-BSS frames anyway. - */ - if (((ah->opmode != NL80211_IFTYPE_AP) && - (priv->rxfilter & FIF_PROMISC_IN_BSS)) || - ah->is_monitoring) + if (ah->is_monitoring) rfilt |= ATH9K_RX_FILTER_PROM; if (priv->rxfilter & FIF_CONTROL) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index b0badef71ce7..d285e3a89853 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1442,8 +1442,7 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) } #define SUPPORTED_FILTERS \ - (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ + (FIF_ALLMULTI | \ FIF_CONTROL | \ FIF_PSPOLL | \ FIF_OTHER_BSS | \ diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c index 6fb40ef86fd6..6c75fb1ab77d 100644 --- a/drivers/net/wireless/ath/ath9k/recv.c +++ b/drivers/net/wireless/ath/ath9k/recv.c @@ -392,11 +392,6 @@ u32 ath_calcrxfilter(struct ath_softc *sc) if (sc->cur_chan->rxfilter & FIF_PROBE_REQ) rfilt |= ATH9K_RX_FILTER_PROBEREQ; - /* - * Set promiscuous mode when FIF_PROMISC_IN_BSS is enabled for station - * mode interface or when in monitor mode. AP mode does not need this - * since it receives all in-BSS frames anyway. - */ if (sc->sc_ah->is_monitoring) rfilt |= ATH9K_RX_FILTER_PROM; diff --git a/drivers/net/wireless/ath/carl9170/fw.c b/drivers/net/wireless/ath/carl9170/fw.c index 47d5c2e910ad..020cd46471f5 100644 --- a/drivers/net/wireless/ath/carl9170/fw.c +++ b/drivers/net/wireless/ath/carl9170/fw.c @@ -310,8 +310,7 @@ static int carl9170_fw(struct ar9170 *ar, const __u8 *data, size_t len) if (SUPP(CARL9170FW_RX_FILTER)) { ar->fw.rx_filter = true; ar->rx_filter_caps = FIF_FCSFAIL | FIF_PLCPFAIL | - FIF_CONTROL | FIF_PSPOLL | FIF_OTHER_BSS | - FIF_PROMISC_IN_BSS; + FIF_CONTROL | FIF_PSPOLL | FIF_OTHER_BSS; } if (SUPP(CARL9170FW_HW_COUNTERS)) diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index f1455a04cb62..59db6732d4e3 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1011,9 +1011,8 @@ static void carl9170_op_configure_filter(struct ieee80211_hw *hw, if (multicast != ar->cur_mc_hash) WARN_ON(carl9170_update_multicast(ar, multicast)); - if (changed_flags & (FIF_OTHER_BSS | FIF_PROMISC_IN_BSS)) { - ar->sniffer_enabled = !!(*new_flags & - (FIF_OTHER_BSS | FIF_PROMISC_IN_BSS)); + if (changed_flags & FIF_OTHER_BSS) { + ar->sniffer_enabled = !!(*new_flags & FIF_OTHER_BSS); WARN_ON(carl9170_set_operating_mode(ar)); } @@ -1033,7 +1032,7 @@ static void carl9170_op_configure_filter(struct ieee80211_hw *hw, if (!(*new_flags & FIF_PSPOLL)) rx_filter |= CARL9170_RX_FILTER_CTL_PSPOLL; - if (!(*new_flags & (FIF_OTHER_BSS | FIF_PROMISC_IN_BSS))) { + if (!(*new_flags & FIF_OTHER_BSS)) { rx_filter |= CARL9170_RX_FILTER_OTHER_RA; rx_filter |= CARL9170_RX_FILTER_DECRY_FAIL; } diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index b2f9521fe551..f40992969b4a 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3131,8 +3131,6 @@ static void b43_adjust_opmode(struct b43_wldev *dev) ctl |= B43_MACCTL_KEEP_BAD; if (wl->filter_flags & FIF_PLCPFAIL) ctl |= B43_MACCTL_KEEP_BADPLCP; - if (wl->filter_flags & FIF_PROMISC_IN_BSS) - ctl |= B43_MACCTL_PROMISC; if (wl->filter_flags & FIF_BCN_PRBRESP_PROMISC) ctl |= B43_MACCTL_BEACPROMISC; @@ -4310,16 +4308,14 @@ static void b43_op_configure_filter(struct ieee80211_hw *hw, goto out_unlock; } - *fflags &= FIF_PROMISC_IN_BSS | - FIF_ALLMULTI | + *fflags &= FIF_ALLMULTI | FIF_FCSFAIL | FIF_PLCPFAIL | FIF_CONTROL | FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC; - changed &= FIF_PROMISC_IN_BSS | - FIF_ALLMULTI | + changed &= FIF_ALLMULTI | FIF_FCSFAIL | FIF_PLCPFAIL | FIF_CONTROL | diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index c77b7f59505c..39d49d6cd07f 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -2055,8 +2055,6 @@ static void b43legacy_adjust_opmode(struct b43legacy_wldev *dev) ctl |= B43legacy_MACCTL_KEEP_BAD; if (wl->filter_flags & FIF_PLCPFAIL) ctl |= B43legacy_MACCTL_KEEP_BADPLCP; - if (wl->filter_flags & FIF_PROMISC_IN_BSS) - ctl |= B43legacy_MACCTL_PROMISC; if (wl->filter_flags & FIF_BCN_PRBRESP_PROMISC) ctl |= B43legacy_MACCTL_BEACPROMISC; @@ -2922,16 +2920,14 @@ static void b43legacy_op_configure_filter(struct ieee80211_hw *hw, } spin_lock_irqsave(&wl->irq_lock, flags); - *fflags &= FIF_PROMISC_IN_BSS | - FIF_ALLMULTI | + *fflags &= FIF_ALLMULTI | FIF_FCSFAIL | FIF_PLCPFAIL | FIF_CONTROL | FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC; - changed &= FIF_PROMISC_IN_BSS | - FIF_ALLMULTI | + changed &= FIF_ALLMULTI | FIF_FCSFAIL | FIF_PLCPFAIL | FIF_CONTROL | diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c index 48135063347e..b46cab250615 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c @@ -41,8 +41,7 @@ #define BRCMS_FLUSH_TIMEOUT 500 /* msec */ /* Flags we support */ -#define MAC_FILTERS (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ +#define MAC_FILTERS (FIF_ALLMULTI | \ FIF_FCSFAIL | \ FIF_CONTROL | \ FIF_OTHER_BSS | \ @@ -743,8 +742,6 @@ brcms_ops_configure_filter(struct ieee80211_hw *hw, changed_flags &= MAC_FILTERS; *total_flags &= MAC_FILTERS; - if (changed_flags & FIF_PROMISC_IN_BSS) - brcms_dbg_info(core, "FIF_PROMISC_IN_BSS\n"); if (changed_flags & FIF_ALLMULTI) brcms_dbg_info(core, "FIF_ALLMULTI\n"); if (changed_flags & FIF_FCSFAIL) diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c index 369527e27689..9728be0e704b 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/main.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c @@ -3571,7 +3571,7 @@ void brcms_c_mac_promisc(struct brcms_c_info *wlc, uint filter_flags) wlc->filter_flags = filter_flags; - if (filter_flags & (FIF_PROMISC_IN_BSS | FIF_OTHER_BSS)) + if (filter_flags & FIF_OTHER_BSS) promisc_bits |= MCTL_PROMISC; if (filter_flags & FIF_BCN_PRBRESP_PROMISC) diff --git a/drivers/net/wireless/cw1200/sta.c b/drivers/net/wireless/cw1200/sta.c index b0f65fa09428..b86500b4418f 100644 --- a/drivers/net/wireless/cw1200/sta.c +++ b/drivers/net/wireless/cw1200/sta.c @@ -578,13 +578,11 @@ void cw1200_configure_filter(struct ieee80211_hw *dev, { struct cw1200_common *priv = dev->priv; bool listening = !!(*total_flags & - (FIF_PROMISC_IN_BSS | - FIF_OTHER_BSS | + (FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC | FIF_PROBE_REQ)); - *total_flags &= FIF_PROMISC_IN_BSS | - FIF_OTHER_BSS | + *total_flags &= FIF_OTHER_BSS | FIF_FCSFAIL | FIF_BCN_PRBRESP_PROMISC | FIF_PROBE_REQ; @@ -592,14 +590,12 @@ void cw1200_configure_filter(struct ieee80211_hw *dev, down(&priv->scan.lock); mutex_lock(&priv->conf_mutex); - priv->rx_filter.promiscuous = (*total_flags & FIF_PROMISC_IN_BSS) - ? 1 : 0; + priv->rx_filter.promiscuous = 0; priv->rx_filter.bssid = (*total_flags & (FIF_OTHER_BSS | FIF_PROBE_REQ)) ? 1 : 0; priv->rx_filter.fcs = (*total_flags & FIF_FCSFAIL) ? 1 : 0; priv->disable_beacon_filter = !(*total_flags & (FIF_BCN_PRBRESP_PROMISC | - FIF_PROMISC_IN_BSS | FIF_PROBE_REQ)); if (priv->listening != listening) { priv->listening = listening; diff --git a/drivers/net/wireless/iwlegacy/3945-mac.c b/drivers/net/wireless/iwlegacy/3945-mac.c index e5665804d986..189cdf58084b 100644 --- a/drivers/net/wireless/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/iwlegacy/3945-mac.c @@ -3048,7 +3048,7 @@ il3945_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, D_MAC80211("Enter: changed: 0x%x, total: 0x%x\n", changed_flags, *total_flags); - CHK(FIF_OTHER_BSS | FIF_PROMISC_IN_BSS, RXON_FILTER_PROMISC_MSK); + CHK(FIF_OTHER_BSS, RXON_FILTER_PROMISC_MSK); CHK(FIF_CONTROL, RXON_FILTER_CTL2HOST_MSK); CHK(FIF_BCN_PRBRESP_PROMISC, RXON_FILTER_BCON_AWARE_MSK); @@ -3074,7 +3074,7 @@ il3945_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, * filters into the device. */ *total_flags &= - FIF_OTHER_BSS | FIF_ALLMULTI | FIF_PROMISC_IN_BSS | + FIF_OTHER_BSS | FIF_ALLMULTI | FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL; } diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c index 976f65fe9c38..e4b175cbeefd 100644 --- a/drivers/net/wireless/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/iwlegacy/4965-mac.c @@ -6166,7 +6166,7 @@ il4965_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, D_MAC80211("Enter: changed: 0x%x, total: 0x%x\n", changed_flags, *total_flags); - CHK(FIF_OTHER_BSS | FIF_PROMISC_IN_BSS, RXON_FILTER_PROMISC_MSK); + CHK(FIF_OTHER_BSS, RXON_FILTER_PROMISC_MSK); /* Setting _just_ RXON_FILTER_CTL2HOST_MSK causes FH errors */ CHK(FIF_CONTROL, RXON_FILTER_CTL2HOST_MSK | RXON_FILTER_PROMISC_MSK); CHK(FIF_BCN_PRBRESP_PROMISC, RXON_FILTER_BCON_AWARE_MSK); @@ -6192,7 +6192,7 @@ il4965_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, * filters into the device. */ *total_flags &= - FIF_OTHER_BSS | FIF_ALLMULTI | FIF_PROMISC_IN_BSS | + FIF_OTHER_BSS | FIF_ALLMULTI | FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL; } diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index 5abd62ed8cb4..c7bc01d92f2f 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -1061,7 +1061,7 @@ static void iwlagn_configure_filter(struct ieee80211_hw *hw, IWL_DEBUG_MAC80211(priv, "Enter: changed: 0x%x, total: 0x%x\n", changed_flags, *total_flags); - CHK(FIF_OTHER_BSS | FIF_PROMISC_IN_BSS, RXON_FILTER_PROMISC_MSK); + CHK(FIF_OTHER_BSS, RXON_FILTER_PROMISC_MSK); /* Setting _just_ RXON_FILTER_CTL2HOST_MSK causes FH errors */ CHK(FIF_CONTROL, RXON_FILTER_CTL2HOST_MSK | RXON_FILTER_PROMISC_MSK); CHK(FIF_BCN_PRBRESP_PROMISC, RXON_FILTER_BCON_AWARE_MSK); @@ -1088,7 +1088,7 @@ static void iwlagn_configure_filter(struct ieee80211_hw *hw, * since we currently do not support programming multicast * filters into the device. */ - *total_flags &= FIF_OTHER_BSS | FIF_ALLMULTI | FIF_PROMISC_IN_BSS | + *total_flags &= FIF_OTHER_BSS | FIF_ALLMULTI | FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL; } diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c index ed02e4bf2c26..1bdf18674fb8 100644 --- a/drivers/net/wireless/libertas_tf/main.c +++ b/drivers/net/wireless/libertas_tf/main.c @@ -439,7 +439,7 @@ static u64 lbtf_op_prepare_multicast(struct ieee80211_hw *hw, return mc_count; } -#define SUPPORTED_FIF_FLAGS (FIF_PROMISC_IN_BSS | FIF_ALLMULTI) +#define SUPPORTED_FIF_FLAGS FIF_ALLMULTI static void lbtf_op_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, unsigned int *new_flags, @@ -458,10 +458,7 @@ static void lbtf_op_configure_filter(struct ieee80211_hw *hw, return; } - if (*new_flags & (FIF_PROMISC_IN_BSS)) - priv->mac_control |= CMD_ACT_MAC_PROMISCUOUS_ENABLE; - else - priv->mac_control &= ~CMD_ACT_MAC_PROMISCUOUS_ENABLE; + priv->mac_control &= ~CMD_ACT_MAC_PROMISCUOUS_ENABLE; if (*new_flags & (FIF_ALLMULTI) || multicast > MRVDRV_MAX_MULTICAST_LIST_SIZE) { priv->mac_control |= CMD_ACT_MAC_ALL_MULTICAST_ENABLE; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 07626cc21d6e..4994269f4ac1 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1554,8 +1554,6 @@ static void mac80211_hwsim_configure_filter(struct ieee80211_hw *hw, wiphy_debug(hw->wiphy, "%s\n", __func__); data->rx_filter = 0; - if (*total_flags & FIF_PROMISC_IN_BSS) - data->rx_filter |= FIF_PROMISC_IN_BSS; if (*total_flags & FIF_ALLMULTI) data->rx_filter |= FIF_ALLMULTI; diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c index 95921167b53f..b71fc74d14ab 100644 --- a/drivers/net/wireless/mwl8k.c +++ b/drivers/net/wireless/mwl8k.c @@ -5192,7 +5192,7 @@ mwl8k_configure_filter_sniffer(struct ieee80211_hw *hw, priv->sniffer_enabled = true; } - *total_flags &= FIF_PROMISC_IN_BSS | FIF_ALLMULTI | + *total_flags &= FIF_ALLMULTI | FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL | FIF_OTHER_BSS; diff --git a/drivers/net/wireless/p54/fwio.c b/drivers/net/wireless/p54/fwio.c index 275408eaf95e..257a9eadd595 100644 --- a/drivers/net/wireless/p54/fwio.c +++ b/drivers/net/wireless/p54/fwio.c @@ -351,8 +351,7 @@ int p54_setup_mac(struct p54_common *priv) * "TRANSPARENT and PROMISCUOUS are mutually exclusive" * STSW45X0C LMAC API - page 12 */ - if (((priv->filter_flags & FIF_PROMISC_IN_BSS) || - (priv->filter_flags & FIF_OTHER_BSS)) && + if (priv->filter_flags & FIF_OTHER_BSS && (mode != P54_FILTER_TYPE_PROMISCUOUS)) mode |= P54_FILTER_TYPE_TRANSPARENT; } else { diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c index e79674f73dc5..2947ad21053c 100644 --- a/drivers/net/wireless/p54/main.c +++ b/drivers/net/wireless/p54/main.c @@ -395,13 +395,11 @@ static void p54_configure_filter(struct ieee80211_hw *dev, { struct p54_common *priv = dev->priv; - *total_flags &= FIF_PROMISC_IN_BSS | - FIF_ALLMULTI | - FIF_OTHER_BSS; + *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS; priv->filter_flags = *total_flags; - if (changed_flags & (FIF_PROMISC_IN_BSS | FIF_OTHER_BSS)) + if (changed_flags & FIF_OTHER_BSS) p54_setup_mac(priv); if (changed_flags & FIF_ALLMULTI || multicast) diff --git a/drivers/net/wireless/rt2x00/rt2400pci.c b/drivers/net/wireless/rt2x00/rt2400pci.c index bdf5590ba304..7da138892026 100644 --- a/drivers/net/wireless/rt2x00/rt2400pci.c +++ b/drivers/net/wireless/rt2x00/rt2400pci.c @@ -273,10 +273,8 @@ static void rt2400pci_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_PLCPFAIL)); rt2x00_set_field32(®, RXCSR0_DROP_CONTROL, !(filter_flags & FIF_CONTROL)); - rt2x00_set_field32(®, RXCSR0_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field32(®, RXCSR0_DROP_NOT_TO_ME, 1); rt2x00_set_field32(®, RXCSR0_DROP_TODS, - !(filter_flags & FIF_PROMISC_IN_BSS) && !rt2x00dev->intf_ap_count); rt2x00_set_field32(®, RXCSR0_DROP_VERSION_ERROR, 1); rt2x00mmio_register_write(rt2x00dev, RXCSR0, reg); diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c index 79f4fe65a119..4ea53aa9ede3 100644 --- a/drivers/net/wireless/rt2x00/rt2500pci.c +++ b/drivers/net/wireless/rt2x00/rt2500pci.c @@ -274,10 +274,8 @@ static void rt2500pci_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_PLCPFAIL)); rt2x00_set_field32(®, RXCSR0_DROP_CONTROL, !(filter_flags & FIF_CONTROL)); - rt2x00_set_field32(®, RXCSR0_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field32(®, RXCSR0_DROP_NOT_TO_ME, 1); rt2x00_set_field32(®, RXCSR0_DROP_TODS, - !(filter_flags & FIF_PROMISC_IN_BSS) && !rt2x00dev->intf_ap_count); rt2x00_set_field32(®, RXCSR0_DROP_VERSION_ERROR, 1); rt2x00_set_field32(®, RXCSR0_DROP_MCAST, diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c index 05c64597838d..237bbb54c7a8 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/rt2x00/rt2500usb.c @@ -434,10 +434,8 @@ static void rt2500usb_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_PLCPFAIL)); rt2x00_set_field16(®, TXRX_CSR2_DROP_CONTROL, !(filter_flags & FIF_CONTROL)); - rt2x00_set_field16(®, TXRX_CSR2_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field16(®, TXRX_CSR2_DROP_NOT_TO_ME, 1); rt2x00_set_field16(®, TXRX_CSR2_DROP_TODS, - !(filter_flags & FIF_PROMISC_IN_BSS) && !rt2x00dev->intf_ap_count); rt2x00_set_field16(®, TXRX_CSR2_DROP_VERSION_ERROR, 1); rt2x00_set_field16(®, TXRX_CSR2_DROP_MULTICAST, diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index be2d54f257b1..09135192c516 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -1513,8 +1513,7 @@ void rt2800_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_FCSFAIL)); rt2x00_set_field32(®, RX_FILTER_CFG_DROP_PHY_ERROR, !(filter_flags & FIF_PLCPFAIL)); - rt2x00_set_field32(®, RX_FILTER_CFG_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field32(®, RX_FILTER_CFG_DROP_NOT_TO_ME, 1); rt2x00_set_field32(®, RX_FILTER_CFG_DROP_NOT_MY_BSSD, 0); rt2x00_set_field32(®, RX_FILTER_CFG_DROP_VER_ERROR, 1); rt2x00_set_field32(®, RX_FILTER_CFG_DROP_MULTICAST, diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index 300876df056f..1b8a459a412b 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -359,8 +359,7 @@ void rt2x00mac_configure_filter(struct ieee80211_hw *hw, FIF_PLCPFAIL | FIF_CONTROL | FIF_PSPOLL | - FIF_OTHER_BSS | - FIF_PROMISC_IN_BSS; + FIF_OTHER_BSS; /* * Apply some rules to the filters: @@ -369,9 +368,6 @@ void rt2x00mac_configure_filter(struct ieee80211_hw *hw, * - Multicast filter seems to kill broadcast traffic so never use it. */ *total_flags |= FIF_ALLMULTI; - if (*total_flags & FIF_OTHER_BSS || - *total_flags & FIF_PROMISC_IN_BSS) - *total_flags |= FIF_PROMISC_IN_BSS | FIF_OTHER_BSS; /* * If the device has a single filter for all control frames, diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c index 819455009fe4..c8a967247a9a 100644 --- a/drivers/net/wireless/rt2x00/rt61pci.c +++ b/drivers/net/wireless/rt2x00/rt61pci.c @@ -530,10 +530,8 @@ static void rt61pci_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_PLCPFAIL)); rt2x00_set_field32(®, TXRX_CSR0_DROP_CONTROL, !(filter_flags & (FIF_CONTROL | FIF_PSPOLL))); - rt2x00_set_field32(®, TXRX_CSR0_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field32(®, TXRX_CSR0_DROP_NOT_TO_ME, 1); rt2x00_set_field32(®, TXRX_CSR0_DROP_TO_DS, - !(filter_flags & FIF_PROMISC_IN_BSS) && !rt2x00dev->intf_ap_count); rt2x00_set_field32(®, TXRX_CSR0_DROP_VERSION_ERROR, 1); rt2x00_set_field32(®, TXRX_CSR0_DROP_MULTICAST, diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c index a5458cf01fb2..65ce3afb888a 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.c +++ b/drivers/net/wireless/rt2x00/rt73usb.c @@ -480,10 +480,8 @@ static void rt73usb_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_PLCPFAIL)); rt2x00_set_field32(®, TXRX_CSR0_DROP_CONTROL, !(filter_flags & (FIF_CONTROL | FIF_PSPOLL))); - rt2x00_set_field32(®, TXRX_CSR0_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field32(®, TXRX_CSR0_DROP_NOT_TO_ME, 1); rt2x00_set_field32(®, TXRX_CSR0_DROP_TO_DS, - !(filter_flags & FIF_PROMISC_IN_BSS) && !rt2x00dev->intf_ap_count); rt2x00_set_field32(®, TXRX_CSR0_DROP_VERSION_ERROR, 1); rt2x00_set_field32(®, TXRX_CSR0_DROP_MULTICAST, diff --git a/drivers/net/wireless/rtlwifi/core.h b/drivers/net/wireless/rtlwifi/core.h index 82733c6b8c46..782ac2fc4b28 100644 --- a/drivers/net/wireless/rtlwifi/core.h +++ b/drivers/net/wireless/rtlwifi/core.h @@ -27,8 +27,7 @@ #define __RTL_CORE_H__ #define RTL_SUPPORTED_FILTERS \ - (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | FIF_CONTROL | \ + (FIF_ALLMULTI | FIF_CONTROL | \ FIF_OTHER_BSS | \ FIF_FCSFAIL | \ FIF_BCN_PRBRESP_PROMISC) diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c index 5d54d16a59e7..f238ee54226c 100644 --- a/drivers/net/wireless/ti/wl1251/main.c +++ b/drivers/net/wireless/ti/wl1251/main.c @@ -763,8 +763,7 @@ static u64 wl1251_op_prepare_multicast(struct ieee80211_hw *hw, return (u64)(unsigned long)fp; } -#define WL1251_SUPPORTED_FILTERS (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ +#define WL1251_SUPPORTED_FILTERS (FIF_ALLMULTI | \ FIF_FCSFAIL | \ FIF_BCN_PRBRESP_PROMISC | \ FIF_CONTROL | \ @@ -795,10 +794,6 @@ static void wl1251_op_configure_filter(struct ieee80211_hw *hw, wl->rx_config = WL1251_DEFAULT_RX_CONFIG; wl->rx_filter = WL1251_DEFAULT_RX_FILTER; - if (*total & FIF_PROMISC_IN_BSS) { - wl->rx_config |= CFG_BSSID_FILTER_EN; - wl->rx_config |= CFG_RX_ALL_GOOD; - } if (*total & FIF_ALLMULTI) /* * CFG_MC_FILTER_EN in rx_config needs to be 0 to receive @@ -825,7 +820,7 @@ static void wl1251_op_configure_filter(struct ieee80211_hw *hw, if (ret < 0) goto out; - if (*total & FIF_ALLMULTI || *total & FIF_PROMISC_IN_BSS) + if (*total & FIF_ALLMULTI) ret = wl1251_acx_group_address_tbl(wl, false, NULL, 0); else if (fp) ret = wl1251_acx_group_address_tbl(wl, fp->enabled, diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 34cef10aefc5..257b9d5821a6 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -3175,8 +3175,7 @@ static u64 wl1271_op_prepare_multicast(struct ieee80211_hw *hw, return (u64)(unsigned long)fp; } -#define WL1271_SUPPORTED_FILTERS (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ +#define WL1271_SUPPORTED_FILTERS (FIF_ALLMULTI | \ FIF_FCSFAIL | \ FIF_BCN_PRBRESP_PROMISC | \ FIF_CONTROL | \ diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index e7af261e9198..89b6f69f09c8 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -1230,7 +1230,7 @@ static u64 zd_op_prepare_multicast(struct ieee80211_hw *hw, } #define SUPPORTED_FIF_FLAGS \ - (FIF_PROMISC_IN_BSS | FIF_ALLMULTI | FIF_FCSFAIL | FIF_CONTROL | \ + (FIF_ALLMULTI | FIF_FCSFAIL | FIF_CONTROL | \ FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC) static void zd_op_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, @@ -1256,7 +1256,7 @@ static void zd_op_configure_filter(struct ieee80211_hw *hw, * we will have some issue with IPv6 which uses multicast for link * layer address resolution. */ - if (*new_flags & (FIF_PROMISC_IN_BSS | FIF_ALLMULTI)) + if (*new_flags & FIF_ALLMULTI) zd_mc_add_all(&hash); spin_lock_irqsave(&mac->lock, flags); diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 4bb4f8ee4132..6b2f813afb52 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -1516,21 +1516,12 @@ static void vnt_configure(struct ieee80211_hw *hw, struct vnt_private *priv = hw->priv; u8 rx_mode = 0; - *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS | FIF_PROMISC_IN_BSS | - FIF_BCN_PRBRESP_PROMISC; + *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC; VNSvInPortB(priv->PortOffset + MAC_REG_RCR, &rx_mode); dev_dbg(&priv->pcid->dev, "rx mode in = %x\n", rx_mode); - if (changed_flags & FIF_PROMISC_IN_BSS) { - /* unconditionally log net taps */ - if (*total_flags & FIF_PROMISC_IN_BSS) - rx_mode |= RCR_UNICAST; - else - rx_mode &= ~RCR_UNICAST; - } - if (changed_flags & FIF_ALLMULTI) { if (*total_flags & FIF_ALLMULTI) { unsigned long flags; diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index ab3ab84cb0a7..0d97b6457ead 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -785,8 +785,7 @@ static void vnt_configure(struct ieee80211_hw *hw, u8 rx_mode = 0; int rc; - *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS | FIF_PROMISC_IN_BSS | - FIF_BCN_PRBRESP_PROMISC; + *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC; rc = vnt_control_in(priv, MESSAGE_TYPE_READ, MAC_REG_RCR, MESSAGE_REQUEST_MACREG, sizeof(u8), &rx_mode); @@ -796,14 +795,6 @@ static void vnt_configure(struct ieee80211_hw *hw, dev_dbg(&priv->usb->dev, "rx mode in = %x\n", rx_mode); - if (changed_flags & FIF_PROMISC_IN_BSS) { - /* unconditionally log net taps */ - if (*total_flags & FIF_PROMISC_IN_BSS) - rx_mode |= RCR_UNICAST; - else - rx_mode &= ~RCR_UNICAST; - } - if (changed_flags & FIF_ALLMULTI) { if (*total_flags & FIF_ALLMULTI) { if (priv->mc_list_count > 2) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0af7464ef57b..4feb74da5eaf 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2507,10 +2507,6 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * stack. It is always safe to pass more frames than requested, * but this has negative impact on power consumption. * - * @FIF_PROMISC_IN_BSS: promiscuous mode within your BSS, - * think of the BSS as your network segment and then this corresponds - * to the regular ethernet device promiscuous mode. - * * @FIF_ALLMULTI: pass all multicast frames, this is used if requested * by the user or if the hardware is not capable of filtering by * multicast address. @@ -2527,8 +2523,8 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * mac80211 needs to do and the amount of CPU wakeups, so you should * honour this flag if possible. * - * @FIF_CONTROL: pass control frames (except for PS Poll), if PROMISC_IN_BSS - * is not set then only those addressed to this station. + * @FIF_CONTROL: pass control frames (except for PS Poll) addressed to this + * station * * @FIF_OTHER_BSS: pass frames destined to other BSSes * @@ -2538,7 +2534,6 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * @FIF_PROBE_REQ: pass probe request frames */ enum ieee80211_filter_flags { - FIF_PROMISC_IN_BSS = 1<<0, FIF_ALLMULTI = 1<<1, FIF_FCSFAIL = 1<<2, FIF_PLCPFAIL = 1<<3, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 556051f68ad7..7d12ba5a4a36 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -725,7 +725,6 @@ struct ieee80211_if_mesh { * enum ieee80211_sub_if_data_flags - virtual interface flags * * @IEEE80211_SDATA_ALLMULTI: interface wants all multicast packets - * @IEEE80211_SDATA_PROMISC: interface is promisc * @IEEE80211_SDATA_OPERATING_GMODE: operating in G-only mode * @IEEE80211_SDATA_DONT_BRIDGE_PACKETS: bridge packets between * associated stations and deliver multicast frames both @@ -735,7 +734,6 @@ struct ieee80211_if_mesh { */ enum ieee80211_sub_if_data_flags { IEEE80211_SDATA_ALLMULTI = BIT(0), - IEEE80211_SDATA_PROMISC = BIT(1), IEEE80211_SDATA_OPERATING_GMODE = BIT(2), IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), IEEE80211_SDATA_DISCONNECT_RESUME = BIT(4), @@ -1211,8 +1209,8 @@ struct ieee80211_local { atomic_t agg_queue_stop[IEEE80211_MAX_QUEUES]; - /* number of interfaces with corresponding IFF_ flags */ - atomic_t iff_allmultis, iff_promiscs; + /* number of interfaces with allmulti RX */ + atomic_t iff_allmultis; struct rate_control_ref *rate_ctrl; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b4ac596a7cb7..7791a08a560a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -697,9 +697,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) if (sdata->flags & IEEE80211_SDATA_ALLMULTI) atomic_inc(&local->iff_allmultis); - if (sdata->flags & IEEE80211_SDATA_PROMISC) - atomic_inc(&local->iff_promiscs); - if (coming_up) local->open_count++; @@ -827,13 +824,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, WARN_ON_ONCE((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) || (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1)); - /* don't count this interface for promisc/allmulti while it is down */ + /* don't count this interface for allmulti while it is down */ if (sdata->flags & IEEE80211_SDATA_ALLMULTI) atomic_dec(&local->iff_allmultis); - if (sdata->flags & IEEE80211_SDATA_PROMISC) - atomic_dec(&local->iff_promiscs); - if (sdata->vif.type == NL80211_IFTYPE_AP) { local->fif_pspoll--; local->fif_probe_req--; @@ -1047,12 +1041,10 @@ static void ieee80211_set_multicast_list(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - int allmulti, promisc, sdata_allmulti, sdata_promisc; + int allmulti, sdata_allmulti; allmulti = !!(dev->flags & IFF_ALLMULTI); - promisc = !!(dev->flags & IFF_PROMISC); sdata_allmulti = !!(sdata->flags & IEEE80211_SDATA_ALLMULTI); - sdata_promisc = !!(sdata->flags & IEEE80211_SDATA_PROMISC); if (allmulti != sdata_allmulti) { if (dev->flags & IFF_ALLMULTI) @@ -1062,13 +1054,6 @@ static void ieee80211_set_multicast_list(struct net_device *dev) sdata->flags ^= IEEE80211_SDATA_ALLMULTI; } - if (promisc != sdata_promisc) { - if (dev->flags & IFF_PROMISC) - atomic_inc(&local->iff_promiscs); - else - atomic_dec(&local->iff_promiscs); - sdata->flags ^= IEEE80211_SDATA_PROMISC; - } spin_lock_bh(&local->filter_lock); __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len); spin_unlock_bh(&local->filter_lock); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index ea31f119234b..b144de971366 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -41,9 +41,6 @@ void ieee80211_configure_filter(struct ieee80211_local *local) unsigned int changed_flags; unsigned int new_flags = 0; - if (atomic_read(&local->iff_promiscs)) - new_flags |= FIF_PROMISC_IN_BSS; - if (atomic_read(&local->iff_allmultis)) new_flags |= FIF_ALLMULTI; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6e3b564b6dea..99fb5d80e60a 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2331,11 +2331,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_frames); ieee80211_add_pending_skb(local, fwd_skb); out: - if (is_multicast_ether_addr(hdr->addr1) || - sdata->dev->flags & IFF_PROMISC) + if (is_multicast_ether_addr(hdr->addr1)) return RX_CONTINUE; - else - return RX_DROP_MONITOR; + return RX_DROP_MONITOR; } #endif @@ -3266,12 +3264,8 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx, if (!bssid && !sdata->u.mgd.use_4addr) return false; if (!multicast && - !ether_addr_equal(sdata->vif.addr, hdr->addr1)) { - if (!(sdata->dev->flags & IFF_PROMISC) || - sdata->u.mgd.use_4addr) - return false; - status->rx_flags &= ~IEEE80211_RX_RA_MATCH; - } + !ether_addr_equal(sdata->vif.addr, hdr->addr1)) + return false; break; case NL80211_IFTYPE_ADHOC: if (!bssid) @@ -3285,9 +3279,7 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx, return false; } else if (!multicast && !ether_addr_equal(sdata->vif.addr, hdr->addr1)) { - if (!(sdata->dev->flags & IFF_PROMISC)) - return false; - status->rx_flags &= ~IEEE80211_RX_RA_MATCH; + return false; } else if (!rx->sta) { int rate_idx; if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT)) @@ -3309,12 +3301,7 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx, } else if (!multicast && !ether_addr_equal(sdata->dev->dev_addr, hdr->addr1)) { - /* if we are in promisc mode we also accept - * packets not destined for us - */ - if (!(sdata->dev->flags & IFF_PROMISC)) - return false; - rx->flags &= ~IEEE80211_RX_RA_MATCH; + return false; } else if (!rx->sta) { int rate_idx; if (status->flag & RX_FLAG_HT) @@ -3327,12 +3314,8 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx, break; case NL80211_IFTYPE_MESH_POINT: if (!multicast && - !ether_addr_equal(sdata->vif.addr, hdr->addr1)) { - if (!(sdata->dev->flags & IFF_PROMISC)) - return false; - - status->rx_flags &= ~IEEE80211_RX_RA_MATCH; - } + !ether_addr_equal(sdata->vif.addr, hdr->addr1)) + return false; break; case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_AP: -- cgit v1.2.3 From 6382246e895fa0ae5162de7c1e5566b9719bdd26 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 20 Apr 2015 22:53:37 +0300 Subject: mac80211: notify the driver upon BAR Rx When we receive a BAR, this typically means that our peer doesn't hear our Block-Acks or that we can't hear its frames. Either way, it is a good indication that the link is in a bad condition. This is why it can serve as a probe to the driver. Use the event_callback callback for this. Since more events with the same data will be added in the feature, the structure that describes the data attached to the event is called in a generic name: ieee80211_ba_event. This also means that from now on, the event_callback can't sleep. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/mac80211.h | 20 ++++++++++++++++++-- net/mac80211/rx.c | 8 ++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 4feb74da5eaf..0c3983b74b3e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -337,10 +337,12 @@ enum ieee80211_bss_change { * enum ieee80211_event_type - event to be notified to the low level driver * @RSSI_EVENT: AP's rssi crossed the a threshold set by the driver. * @MLME_EVENT: event related to MLME + * @BAR_RX_EVENT: a BAR was received */ enum ieee80211_event_type { RSSI_EVENT, MLME_EVENT, + BAR_RX_EVENT, }; /** @@ -399,18 +401,32 @@ struct ieee80211_mlme_event { u16 reason; }; +/** + * struct ieee80211_ba_event - data attached for BlockAck related events + * @sta: pointer to the &ieee80211_sta to which this event relates + * @tid: the tid + * @ssn: the starting sequence number + */ +struct ieee80211_ba_event { + struct ieee80211_sta *sta; + u16 tid; + u16 ssn; +}; + /** * struct ieee80211_event - event to be sent to the driver * @type: The event itself. See &enum ieee80211_event_type. * @rssi: relevant if &type is %RSSI_EVENT * @mlme: relevant if &type is %AUTH_EVENT - * @u: union holding the above two fields + * @ba: relevant if &type is %BAR_RX_EVENT + * @u:union holding the fields above */ struct ieee80211_event { enum ieee80211_event_type type; union { struct ieee80211_rssi_event rssi; struct ieee80211_mlme_event mlme; + struct ieee80211_ba_event ba; } u; }; @@ -3001,7 +3017,7 @@ enum ieee80211_reconfig_type { * The callback can sleep. * @event_callback: Notify driver about any event in mac80211. See * &enum ieee80211_event_type for the different types. - * The callback can sleep. + * The callback must be atomic. * * @release_buffered_frames: Release buffered frames according to the given * parameters. In the case where the driver buffers some frames for diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index b69a071d7679..ac6bfa999416 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2427,6 +2427,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) struct { __le16 control, start_seq_num; } __packed bar_data; + struct ieee80211_event event = { + .type = BAR_RX_EVENT, + }; if (!rx->sta) return RX_DROP_MONITOR; @@ -2442,6 +2445,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) return RX_DROP_MONITOR; start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4; + event.u.ba.tid = tid; + event.u.ba.ssn = start_seq_num; + event.u.ba.sta = &rx->sta->sta; /* reset session timer */ if (tid_agg_rx->timeout) @@ -2454,6 +2460,8 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) start_seq_num, frames); spin_unlock(&tid_agg_rx->reorder_lock); + drv_event_callback(rx->local, rx->sdata, &event); + kfree_skb(skb); return RX_QUEUED; } -- cgit v1.2.3 From b497de63ad5dcdae999c14444c4e7f53fd60119c Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 20 Apr 2015 22:53:38 +0300 Subject: mac80211: notify the driver on reordering buffer timeout When frames time out in the reordering buffer, it is a good indication that something went wrong and the driver may want to know about that to take action or trigger debug flows. It is pointless to notify the driver about each frame that is released. Notify each time the timer fires. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/mac80211.h | 8 ++++++-- net/mac80211/rx.c | 9 +++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0c3983b74b3e..331429898ea1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -338,11 +338,15 @@ enum ieee80211_bss_change { * @RSSI_EVENT: AP's rssi crossed the a threshold set by the driver. * @MLME_EVENT: event related to MLME * @BAR_RX_EVENT: a BAR was received + * @BA_FRAME_TIMEOUT: Frames were released from the reordering buffer because + * they timed out. This won't be called for each frame released, but only + * once each time the timeout triggers. */ enum ieee80211_event_type { RSSI_EVENT, MLME_EVENT, BAR_RX_EVENT, + BA_FRAME_TIMEOUT, }; /** @@ -405,7 +409,7 @@ struct ieee80211_mlme_event { * struct ieee80211_ba_event - data attached for BlockAck related events * @sta: pointer to the &ieee80211_sta to which this event relates * @tid: the tid - * @ssn: the starting sequence number + * @ssn: the starting sequence number (for %BAR_RX_EVENT) */ struct ieee80211_ba_event { struct ieee80211_sta *sta; @@ -418,7 +422,7 @@ struct ieee80211_ba_event { * @type: The event itself. See &enum ieee80211_event_type. * @rssi: relevant if &type is %RSSI_EVENT * @mlme: relevant if &type is %AUTH_EVENT - * @ba: relevant if &type is %BAR_RX_EVENT + * @ba: relevant if &type is %BAR_RX_EVENT or %BA_FRAME_TIMEOUT * @u:union holding the fields above */ struct ieee80211_event { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index ac6bfa999416..e08253547f42 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3229,6 +3229,15 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) ieee80211_sta_reorder_release(sta->sdata, tid_agg_rx, &frames); spin_unlock(&tid_agg_rx->reorder_lock); + if (!skb_queue_empty(&frames)) { + struct ieee80211_event event = { + .type = BA_FRAME_TIMEOUT, + .u.ba.tid = tid, + .u.ba.sta = &sta->sta, + }; + drv_event_callback(rx.local, rx.sdata, &event); + } + ieee80211_rx_handlers(&rx, &frames); } -- cgit v1.2.3 From 4b32b5ad31a68a661f761c76dfd0d076636d3ae9 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 28 Apr 2015 13:03:06 -0700 Subject: ipv6: Stop rt6_info from using inet_peer's metrics inet_peer is indexed by the dst address alone. However, the fib6 tree could have multiple routing entries (rt6_info) for the same dst. For example, 1. A /128 dst via multiple gateways. 2. A RTF_CACHE route cloned from a /128 route. In the above cases, all of them will share the same metrics and step on each other. This patch will steer away from inet_peer's metrics and use dst_cow_metrics_generic() for everything. Change Highlights: 1. Remove rt6_cow_metrics() which currently acquires metrics from inet_peer for DST_HOST route (i.e. /128 route). 2. Add rt6i_pmtu to take care of the pmtu update to avoid creating a full size metrics just to override the RTAX_MTU. 3. After (2), the RTF_CACHE route can also share the metrics with its dst.from route, by: dst_init_metrics(&cache_rt->dst, dst_metrics_ptr(cache_rt->dst.from), true); 4. Stop creating RTF_CACHE route by cloning another RTF_CACHE route. Instead, directly clone from rt->dst. [ Currently, cloning from another RTF_CACHE is only possible during rt6_do_redirect(). Also, the old clone is removed from the tree immediately after the new clone is added. ] In case of cloning from an older redirect RTF_CACHE, it should work as before. In case of cloning from an older pmtu RTF_CACHE, this patch will forget the pmtu and re-learn it (if there is any) from the redirected route. The _rt6i_peer and DST_METRICS_FORCE_OVERWRITE will be removed in the next cleanup patch. Signed-off-by: Martin KaFai Lau Reviewed-by: Hannes Frederic Sowa Cc: Steffen Klassert Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 10 +---- net/ipv6/route.c | 102 +++++++++++++++++++++++++++++--------------------- 2 files changed, 60 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 20e80fa7bbdd..7383a8cc8f84 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -124,6 +124,7 @@ struct rt6_info { unsigned long _rt6i_peer; u32 rt6i_metric; + u32 rt6i_pmtu; /* more non-fragment space at head required */ unsigned short rt6i_nfheader_len; u8 rt6i_protocol; @@ -189,15 +190,6 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) rt0->rt6i_flags |= RTF_EXPIRES; } -static inline void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) -{ - struct dst_entry *new = (struct dst_entry *) from; - - rt->rt6i_flags &= ~RTF_EXPIRES; - dst_hold(new); - rt->dst.from = new; -} - static inline void ip6_rt_put(struct rt6_info *rt) { /* dst_release() accepts a NULL parameter. diff --git a/net/ipv6/route.c b/net/ipv6/route.c index aa4cfdd13f28..4d6eb5d90cb4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -92,6 +92,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); +static void rt6_dst_from_metrics_check(struct rt6_info *rt); static int rt6_score_route(struct rt6_info *rt, int oif, int strict); #ifdef CONFIG_IPV6_ROUTE_INFO @@ -136,33 +137,12 @@ static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) { - struct rt6_info *rt = (struct rt6_info *) dst; - struct inet_peer *peer; - u32 *p = NULL; + struct rt6_info *rt = (struct rt6_info *)dst; - if (!(rt->dst.flags & DST_HOST)) + if (rt->rt6i_flags & RTF_CACHE) + return NULL; + else return dst_cow_metrics_generic(dst, old); - - peer = rt6_get_peer_create(rt); - if (peer) { - u32 *old_p = __DST_METRICS_PTR(old); - unsigned long prev, new; - - p = peer->metrics; - if (inet_metrics_new(peer) || - (old & DST_METRICS_FORCE_OVERWRITE)) - memcpy(p, old_p, sizeof(u32) * RTAX_MAX); - - new = (unsigned long) p; - prev = cmpxchg(&dst->_metrics, old, new); - - if (prev != old) { - p = __DST_METRICS_PTR(prev); - if (prev & DST_METRICS_READ_ONLY) - p = NULL; - } - } - return p; } static inline const void *choose_neigh_daddr(struct rt6_info *rt, @@ -323,8 +303,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) struct inet6_dev *idev = rt->rt6i_idev; struct dst_entry *from = dst->from; - if (!(rt->dst.flags & DST_HOST)) - dst_destroy_metrics_generic(dst); + dst_destroy_metrics_generic(dst); if (idev) { rt->rt6i_idev = NULL; @@ -333,11 +312,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) dst->from = NULL; dst_release(from); - - if (rt6_has_peer(rt)) { - struct inet_peer *peer = rt6_peer_ptr(rt); - inet_putpeer(peer); - } } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -1003,6 +977,7 @@ redo_rt6_select: goto redo_fib6_lookup_lock; out2: + rt6_dst_from_metrics_check(rt); rt->dst.lastuse = jiffies; rt->dst.__use++; @@ -1111,6 +1086,13 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori * Destination cache support functions */ +static void rt6_dst_from_metrics_check(struct rt6_info *rt) +{ + if (rt->dst.from && + dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from)) + dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true); +} + static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) { struct rt6_info *rt; @@ -1127,6 +1109,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt6_check_expired(rt)) return NULL; + rt6_dst_from_metrics_check(rt); + return dst; } @@ -1179,7 +1163,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - dst_metric_set(dst, RTAX_MTU, mtu); + rt6->rt6i_pmtu = mtu; rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); } } @@ -1359,9 +1343,14 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) static unsigned int ip6_mtu(const struct dst_entry *dst) { + const struct rt6_info *rt = (const struct rt6_info *)dst; + unsigned int mtu = rt->rt6i_pmtu; struct inet6_dev *idev; - unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + if (mtu) + goto out; + + mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) goto out; @@ -1947,12 +1936,27 @@ out: * Misc support functions */ +static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) +{ + BUG_ON(from->dst.from); + + rt->rt6i_flags &= ~RTF_EXPIRES; + dst_hold(&from->dst); + rt->dst.from = &from->dst; + dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); +} + static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, const struct in6_addr *dest) { struct net *net = dev_net(ort->dst.dev); - struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0, - ort->rt6i_table); + struct rt6_info *rt; + + if (ort->rt6i_flags & RTF_CACHE) + ort = (struct rt6_info *)ort->dst.from; + + rt = ip6_dst_alloc(net, ort->dst.dev, 0, + ort->rt6i_table); if (rt) { rt->dst.input = ort->dst.input; @@ -1961,7 +1965,6 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, rt->rt6i_dst.addr = *dest; rt->rt6i_dst.plen = 128; - dst_copy_metrics(&rt->dst, &ort->dst); rt->dst.error = ort->dst.error; rt->rt6i_idev = ort->rt6i_idev; if (rt->rt6i_idev) @@ -2393,11 +2396,20 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) PMTU discouvery. */ if (rt->dst.dev == arg->dev && - !dst_metric_locked(&rt->dst, RTAX_MTU) && - (dst_mtu(&rt->dst) >= arg->mtu || - (dst_mtu(&rt->dst) < arg->mtu && - dst_mtu(&rt->dst) == idev->cnf.mtu6))) { - dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); + !dst_metric_locked(&rt->dst, RTAX_MTU)) { + if (rt->rt6i_flags & RTF_CACHE) { + /* For RTF_CACHE with rt6i_pmtu == 0 + * (i.e. a redirected route), + * the metrics of its rt->dst.from has already + * been updated. + */ + if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu) + rt->rt6i_pmtu = arg->mtu; + } else if (dst_mtu(&rt->dst) >= arg->mtu || + (dst_mtu(&rt->dst) < arg->mtu && + dst_mtu(&rt->dst) == idev->cnf.mtu6)) { + dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); + } } return 0; } @@ -2627,6 +2639,7 @@ static int rt6_fill_node(struct net *net, int iif, int type, u32 portid, u32 seq, int prefix, int nowait, unsigned int flags) { + u32 metrics[RTAX_MAX]; struct rtmsg *rtm; struct nlmsghdr *nlh; long expires; @@ -2740,7 +2753,10 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; } - if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) + memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); + if (rt->rt6i_pmtu) + metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; + if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; if (rt->rt6i_flags & RTF_GATEWAY) { -- cgit v1.2.3 From afc4eef80c92b199357db3570d3c9c7631d699ff Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 28 Apr 2015 13:03:07 -0700 Subject: ipv6: Remove DST_METRICS_FORCE_OVERWRITE and _rt6i_peer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _rt6i_peer is no longer needed after the last patch, 'ipv6: Stop rt6_info from using inet_peer's metrics'. DST_METRICS_FORCE_OVERWRITE is added by commit e5fd387ad5b3 ("ipv6: do not overwrite inetpeer metrics prematurely"). Since inetpeer is no longer used for metrics, this bit is also not needed. Signed-off-by: Martin KaFai Lau Reviewed-by: Hannes Frederic Sowa Cc: Michal Kubeček Cc: Steffen Klassert Signed-off-by: David S. Miller --- include/net/dst.h | 6 ------ include/net/ip6_fib.h | 31 ------------------------------- net/ipv6/route.c | 36 +----------------------------------- net/ipv6/xfrm6_policy.c | 14 -------------- 4 files changed, 1 insertion(+), 86 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 0fb99a26e973..22aa93f165f1 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -109,7 +109,6 @@ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); extern const u32 dst_default_metrics[]; #define DST_METRICS_READ_ONLY 0x1UL -#define DST_METRICS_FORCE_OVERWRITE 0x2UL #define DST_METRICS_FLAGS 0x3UL #define __DST_METRICS_PTR(Y) \ ((u32 *)((Y) & ~DST_METRICS_FLAGS)) @@ -120,11 +119,6 @@ static inline bool dst_metrics_read_only(const struct dst_entry *dst) return dst->_metrics & DST_METRICS_READ_ONLY; } -static inline void dst_metrics_set_force_overwrite(struct dst_entry *dst) -{ - dst->_metrics |= DST_METRICS_FORCE_OVERWRITE; -} - void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old); static inline void dst_destroy_metrics_generic(struct dst_entry *dst) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 7383a8cc8f84..e00018011028 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -121,7 +121,6 @@ struct rt6_info { struct rt6key rt6i_prefsrc; struct inet6_dev *rt6i_idev; - unsigned long _rt6i_peer; u32 rt6i_metric; u32 rt6i_pmtu; @@ -130,36 +129,6 @@ struct rt6_info { u8 rt6i_protocol; }; -static inline struct inet_peer *rt6_peer_ptr(struct rt6_info *rt) -{ - return inetpeer_ptr(rt->_rt6i_peer); -} - -static inline bool rt6_has_peer(struct rt6_info *rt) -{ - return inetpeer_ptr_is_peer(rt->_rt6i_peer); -} - -static inline void __rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer) -{ - __inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer); -} - -static inline bool rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer) -{ - return inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer); -} - -static inline void rt6_init_peer(struct rt6_info *rt, struct inet_peer_base *base) -{ - inetpeer_init_ptr(&rt->_rt6i_peer, base); -} - -static inline void rt6_transfer_peer(struct rt6_info *rt, struct rt6_info *ort) -{ - inetpeer_transfer_peer(&rt->_rt6i_peer, &ort->_rt6i_peer); -} - static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) { return ((struct rt6_info *)dst)->rt6i_idev; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4d6eb5d90cb4..352271154ddb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -105,36 +105,6 @@ static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *gwaddr, int ifindex); #endif -static void rt6_bind_peer(struct rt6_info *rt, int create) -{ - struct inet_peer_base *base; - struct inet_peer *peer; - - base = inetpeer_base_ptr(rt->_rt6i_peer); - if (!base) - return; - - peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); - if (peer) { - if (!rt6_set_peer(rt, peer)) - inet_putpeer(peer); - } -} - -static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) -{ - if (rt6_has_peer(rt)) - return rt6_peer_ptr(rt); - - rt6_bind_peer(rt, create); - return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); -} - -static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) -{ - return __rt6_get_peer(rt, 1); -} - static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) { struct rt6_info *rt = (struct rt6_info *)dst; @@ -291,7 +261,6 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, struct dst_entry *dst = &rt->dst; memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); - rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); INIT_LIST_HEAD(&rt->rt6i_siblings); } return rt; @@ -1052,7 +1021,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori new = &rt->dst; memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); - rt6_init_peer(rt, net->ipv6.peers); new->__use = 1; new->input = dst_discard; @@ -1597,10 +1565,8 @@ int ip6_route_add(struct fib6_config *cfg) ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; - if (rt->rt6i_dst.plen == 128) { + if (rt->rt6i_dst.plen == 128) rt->dst.flags |= DST_HOST; - dst_metrics_set_force_overwrite(&rt->dst); - } #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index f337a908a76a..6ae256b4c55a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -71,13 +71,6 @@ static int xfrm6_get_tos(const struct flowi *fl) return 0; } -static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst) -{ - struct rt6_info *rt = (struct rt6_info *)xdst; - - rt6_init_peer(rt, net->ipv6.peers); -} - static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { @@ -106,8 +99,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, return -ENODEV; } - rt6_transfer_peer(&xdst->u.rt6, rt); - /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | @@ -255,10 +246,6 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); - if (rt6_has_peer(&xdst->u.rt6)) { - struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6); - inet_putpeer(peer); - } xfrm_dst_destroy(xdst); } @@ -308,7 +295,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .get_saddr = xfrm6_get_saddr, .decode_session = _decode_session6, .get_tos = xfrm6_get_tos, - .init_dst = xfrm6_init_dst, .init_path = xfrm6_init_path, .fill_dst = xfrm6_fill_dst, .blackhole_route = ip6_blackhole_route, -- cgit v1.2.3 From 4749c3ef854e3a5d3dd3cc0ccd2dcb7e05d583bd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 30 Apr 2015 12:12:00 +0200 Subject: net: sched: remove TC_MUNGED bits Not used. pedit sets TC_MUNGED when packet content was altered, but all the core does is unset MUNGED again and then set OK2MUNGE. And the latter isn't tested anywhere. So lets remove both TC_MUNGED and TC_OK2MUNGE. Signed-off-by: Florian Westphal Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- Documentation/networking/tc-actions-env-rules.txt | 2 -- include/net/sch_generic.h | 2 -- include/uapi/linux/pkt_cls.h | 3 +++ net/sched/act_api.c | 5 ----- net/sched/act_pedit.c | 5 +---- 5 files changed, 4 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/Documentation/networking/tc-actions-env-rules.txt b/Documentation/networking/tc-actions-env-rules.txt index 70d6cf608251..95c71716b2e2 100644 --- a/Documentation/networking/tc-actions-env-rules.txt +++ b/Documentation/networking/tc-actions-env-rules.txt @@ -14,8 +14,6 @@ resets them for you, so invoke skb_act_clone() rather than skb_clone(). 2) If you munge any packet thou shalt call pskb_expand_head in the case someone else is referencing the skb. After that you "own" the skb. -You must also tell us if it is ok to munge the packet (TC_OK2MUNGE), -this way any action downstream can stomp on the packet. 3) Dropping packets you don't own is a no-no. You simply return TC_ACT_SHOT to the caller and they will drop it. diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 6d778efcfdfd..994b5a092f33 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -755,8 +755,6 @@ static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask, if (n) { n->tc_verd = SET_TC_VERD(n->tc_verd, 0); - n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); - n->tc_verd = CLR_TC_MUNGED(n->tc_verd); } return n; } diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index bf08e76bf505..6810ca43a80a 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -35,6 +35,8 @@ bits 9,10,11: redirect counter - redirect TTL. Loop avoidance * * */ +#ifndef __KERNEL__ +/* backwards compat for userspace only */ #define TC_MUNGED _TC_MAKEMASK1(0) #define SET_TC_MUNGED(v) ( TC_MUNGED | (v & ~TC_MUNGED)) #define CLR_TC_MUNGED(v) ( v & ~TC_MUNGED) @@ -42,6 +44,7 @@ bits 9,10,11: redirect counter - redirect TTL. Loop avoidance #define TC_OK2MUNGE _TC_MAKEMASK1(1) #define SET_TC_OK2MUNGE(v) ( TC_OK2MUNGE | (v & ~TC_OK2MUNGE)) #define CLR_TC_OK2MUNGE(v) ( v & ~TC_OK2MUNGE) +#endif #define S_TC_VERD _TC_MAKE32(2) #define M_TC_VERD _TC_MAKEMASK(4,S_TC_VERD) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 3d43e4979f27..af427a3dbcba 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -392,11 +392,6 @@ int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions, list_for_each_entry(a, actions, list) { repeat: ret = a->ops->act(skb, a, res); - if (TC_MUNGED & skb->tc_verd) { - /* copied already, allow trampling */ - skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); - skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd); - } if (ret == TC_ACT_REPEAT) goto repeat; /* we need a ttl - JHS */ if (ret != TC_ACT_PIPE) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 59649d588d79..17e6d6669c7f 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -108,7 +108,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = a->priv; - int i, munged = 0; + int i; unsigned int off; if (skb_unclone(skb, GFP_ATOMIC)) @@ -156,11 +156,8 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, *ptr = ((*ptr & tkey->mask) ^ tkey->val); if (ptr == &_data) skb_store_bits(skb, off + offset, ptr, 4); - munged++; } - if (munged) - skb->tc_verd = SET_TC_MUNGED(skb->tc_verd); goto done; } else WARN(1, "pedit BUG: index %d\n", p->tcf_index); -- cgit v1.2.3 From 82a584b7cd366511a22e37675b029cf2fb58e291 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 29 Apr 2015 15:33:21 -0700 Subject: ipv6: Flow label state ranges This patch divides the IPv6 flow label space into two ranges: 0-7ffff is reserved for flow label manager, 80000-fffff will be used for creating auto flow labels (per RFC6438). This only affects how labels are set on transmit, it does not affect receive. This range split can be disbaled by systcl. Background: IPv6 flow labels have been an unmitigated disappointment thus far in the lifetime of IPv6. Support in HW devices to use them for ECMP is lacking, and OSes don't turn them on by default. If we had these we could get much better hashing in IPv6 networks without resorting to DPI, possibly eliminating some of the motivations to to define new encaps in UDP just for getting ECMP. Unfortunately, the initial specfications of IPv6 did not clarify how they are to be used. There has always been a vague concept that these can be used for ECMP, flow hashing, etc. and we do now have a good standard how to this in RFC6438. The problem is that flow labels can be either stateful or stateless (as in RFC6438), and we are presented with the possibility that a stateless label may collide with a stateful one. Attempts to split the flow label space were rejected in IETF. When we added support in Linux for RFC6438, we could not turn on flow labels by default due to this conflict. This patch splits the flow label space and should give us a path to enabling auto flow labels by default for all IPv6 packets. This is an API change so we need to consider compatibility with existing deployment. The stateful range is chosen to be the lower values in hopes that most uses would have chosen small numbers. Once we resolve the stateless/stateful issue, we can proceed to look at enabling RFC6438 flow labels by default (starting with scaled testing). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 ++++++++ include/net/ipv6.h | 9 +++++++-- include/net/netns/ipv6.h | 1 + net/ipv6/af_inet6.c | 1 + net/ipv6/ip6_flowlabel.c | 4 ++++ net/ipv6/sysctl_net_ipv6.c | 8 ++++++++ 6 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 071fb18dc57c..5095c63e50ed 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1213,6 +1213,14 @@ auto_flowlabels - BOOLEAN FALSE: disabled Default: false +flowlabel_state_ranges - BOOLEAN + Split the flow label number space into two ranges. 0-0x7FFFF is + reserved for the IPv6 flow manager facility, 0x80000-0xFFFFF + is reserved for stateless flow labels as described in RFC6437. + TRUE: enabled + FALSE: disabled + Default: true + anycast_src_echo_reply - BOOLEAN Controls the use of anycast addresses as source addresses for ICMPv6 echo reply diff --git a/include/net/ipv6.h b/include/net/ipv6.h index eec8ad3c9843..53d25ef1699a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -239,8 +239,10 @@ struct ip6_flowlabel { struct net *fl_net; }; -#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) -#define IPV6_FLOWLABEL_MASK cpu_to_be32(0x000FFFFF) +#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) +#define IPV6_FLOWLABEL_MASK cpu_to_be32(0x000FFFFF) +#define IPV6_FLOWLABEL_STATELESS_FLAG cpu_to_be32(0x00080000) + #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) #define IPV6_TCLASS_SHIFT 20 @@ -719,6 +721,9 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, hash ^= hash >> 12; flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; + + if (net->ipv6.sysctl.flowlabel_state_ranges) + flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG; } return flowlabel; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index d2527bf81142..8d93544a2d2b 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -34,6 +34,7 @@ struct netns_sysctl_ipv6 { int fwmark_reflect; int idgen_retries; int idgen_delay; + int flowlabel_state_ranges; }; struct netns_ipv6 { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index eef63b394c5a..4632afa57e05 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -768,6 +768,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.auto_flowlabels = 0; net->ipv6.sysctl.idgen_retries = 3; net->ipv6.sysctl.idgen_delay = 1 * HZ; + net->ipv6.sysctl.flowlabel_state_ranges = 1; atomic_set(&net->ipv6.fib6_sernum, 1); err = ipv6_init_mibs(net); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index d491125011c4..1f9ebe3cbb4a 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -595,6 +595,10 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) return -EINVAL; + if (net->ipv6.sysctl.flowlabel_state_ranges && + (freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG)) + return -ERANGE; + fl = fl_create(net, sk, &freq, optval, optlen, &err); if (!fl) return err; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index abcc79f649b3..4e705add4f18 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -68,6 +68,13 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "flowlabel_state_ranges", + .data = &init_net.ipv6.sysctl.flowlabel_state_ranges, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -109,6 +116,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect; ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries; ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay; + ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) -- cgit v1.2.3 From d54385ce68cd18ab002b46f61246ad197cec92de Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 30 Apr 2015 14:53:54 -0700 Subject: etherdev: Process is_multicast_ether_addr at same size as other operations This change makes it so that we process the address in is_multicast_ether_addr at the same size as the other calls. This allows us to avoid duplicate reads when used with other calls such as is_zero_ether_addr or eth_addr_copy. In addition I have added a 64 bit version of the function so in eth_type_trans we can process the destination address as a 64 bit value throughout. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 24 +++++++++++++++++++++++- net/ethernet/eth.c | 2 +- 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 606563ef8a72..c4a10f991fe0 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -110,7 +110,29 @@ static inline bool is_zero_ether_addr(const u8 *addr) */ static inline bool is_multicast_ether_addr(const u8 *addr) { - return 0x01 & addr[0]; +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + u32 a = *(const u32 *)addr; +#else + u16 a = *(const u16 *)addr; +#endif +#ifdef __BIG_ENDIAN + return 0x01 & (a >> ((sizeof(a) * 8) - 8)); +#else + return 0x01 & a; +#endif +} + +static inline bool is_multicast_ether_addr_64bits(const u8 addr[6+2]) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 +#ifdef __BIG_ENDIAN + return 0x01 & ((*(const u64 *)addr) >> 56); +#else + return 0x01 & (*(const u64 *)addr); +#endif +#else + return is_multicast_ether_addr(addr); +#endif } /** diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 60069318d5d1..21c211e9fd5a 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -159,7 +159,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) skb_pull_inline(skb, ETH_HLEN); eth = eth_hdr(skb); - if (unlikely(is_multicast_ether_addr(eth->h_dest))) { + if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) skb->pkt_type = PACKET_BROADCAST; else -- cgit v1.2.3 From 50fb799289501c2eab9f43fc9af513027e1e994f Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 1 May 2015 11:30:12 -0700 Subject: net: Add skb_get_hash_perturb This calls flow_disect and __skb_get_hash to procure a hash for a packet. Input includes a key to initialize jhash. This function does not set skb->hash. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ net/core/flow_dissector.c | 38 ++++++++++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 66e374d62f64..acb83e249e3f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -927,6 +927,8 @@ static inline __u32 skb_get_hash(struct sk_buff *skb) return skb->hash; } +__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb); + static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) { return skb->hash; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2c35c02a931e..b1df995ef06c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -267,13 +267,12 @@ static __always_inline void __flow_hash_secret_init(void) net_get_random_once(&hashrnd, sizeof(hashrnd)); } -static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c) +static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c, u32 keyval) { - __flow_hash_secret_init(); - return jhash_3words(a, b, c, hashrnd); + return jhash_3words(a, b, c, keyval); } -static inline u32 __flow_hash_from_keys(struct flow_keys *keys) +static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) { u32 hash; @@ -287,7 +286,8 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys) hash = __flow_hash_3words((__force u32)keys->dst, (__force u32)keys->src, - (__force u32)keys->ports); + (__force u32)keys->ports, + keyval); if (!hash) hash = 1; @@ -296,10 +296,20 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys) u32 flow_hash_from_keys(struct flow_keys *keys) { - return __flow_hash_from_keys(keys); + __flow_hash_secret_init(); + return __flow_hash_from_keys(keys, hashrnd); } EXPORT_SYMBOL(flow_hash_from_keys); +static inline u32 ___skb_get_hash(const struct sk_buff *skb, + struct flow_keys *keys, u32 keyval) +{ + if (!skb_flow_dissect(skb, keys)) + return 0; + + return __flow_hash_from_keys(keys, keyval); +} + /* * __skb_get_hash: calculate a flow hash based on src/dst addresses * and src/dst port numbers. Sets hash in skb to non-zero hash value @@ -309,8 +319,12 @@ EXPORT_SYMBOL(flow_hash_from_keys); void __skb_get_hash(struct sk_buff *skb) { struct flow_keys keys; + u32 hash; - if (!skb_flow_dissect(skb, &keys)) + __flow_hash_secret_init(); + + hash = ___skb_get_hash(skb, &keys, hashrnd); + if (!hash) return; if (keys.ports) @@ -318,10 +332,18 @@ void __skb_get_hash(struct sk_buff *skb) skb->sw_hash = 1; - skb->hash = __flow_hash_from_keys(&keys); + skb->hash = hash; } EXPORT_SYMBOL(__skb_get_hash); +__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) +{ + struct flow_keys keys; + + return ___skb_get_hash(skb, &keys, perturb); +} +EXPORT_SYMBOL(skb_get_hash_perturb); + /* * Returns a Tx hash based on the given packet descriptor a Tx queues' number * to be used as a distribution range. -- cgit v1.2.3 From 2f59e1ebaa7f762c8825871b5486b5f5b4fa952f Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 1 May 2015 11:30:17 -0700 Subject: net: Add flow_keys digest Some users of flow keys (well just sch_choke now) need to pass flow_keys in skbuff cb, and use them for exact comparisons of flows so that skb->hash is not sufficient. In order to increase size of the flow_keys structure, we introduce another structure for the purpose of passing flow keys in skbuff cb. We limit this structure to sixteen bytes, and we will technically treat this as a digest of flow_keys struct hence its name flow_keys_digest. In the first incaranation we just copy the flow_keys structure up to 16 bytes-- this is the same information previously passed in the cb. In the future, we'll adapt this for larger flow_keys and could use something like SHA-1 over the whole flow_keys to improve the quality of the digest. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_keys.h | 16 ++++++++++++++++ net/core/flow_dissector.c | 27 +++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) (limited to 'include') diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h index dc8fd81412bf..6d6ef626811a 100644 --- a/include/net/flow_keys.h +++ b/include/net/flow_keys.h @@ -42,4 +42,20 @@ static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 u32 flow_hash_from_keys(struct flow_keys *keys); unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len, __be16 protocol); + +/* struct flow_keys_digest: + * + * This structure is used to hold a digest of the full flow keys. This is a + * larger "hash" of a flow to allow definitively matching specific flows where + * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so + * that it can by used in CB of skb (see sch_choke for an example). + */ +#define FLOW_KEYS_DIGEST_LEN 16 +struct flow_keys_digest { + u8 data[FLOW_KEYS_DIGEST_LEN]; +}; + +void make_flow_keys_digest(struct flow_keys_digest *digest, + const struct flow_keys *flow); + #endif diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index b1df995ef06c..d3acc4dff4ae 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -310,6 +310,33 @@ static inline u32 ___skb_get_hash(const struct sk_buff *skb, return __flow_hash_from_keys(keys, keyval); } +struct _flow_keys_digest_data { + __be16 n_proto; + u8 ip_proto; + u8 padding; + __be32 ports; + __be32 src; + __be32 dst; +}; + +void make_flow_keys_digest(struct flow_keys_digest *digest, + const struct flow_keys *flow) +{ + struct _flow_keys_digest_data *data = + (struct _flow_keys_digest_data *)digest; + + BUILD_BUG_ON(sizeof(*data) > sizeof(*digest)); + + memset(digest, 0, sizeof(*digest)); + + data->n_proto = flow->n_proto; + data->ip_proto = flow->ip_proto; + data->ports = flow->ports; + data->src = flow->src; + data->dst = flow->dst; +} +EXPORT_SYMBOL(make_flow_keys_digest); + /* * __skb_get_hash: calculate a flow hash based on src/dst addresses * and src/dst port numbers. Sets hash in skb to non-zero hash value -- cgit v1.2.3 From c19ae86a510cf4332af64caab04718bc853d3184 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Fri, 1 May 2015 22:19:43 -0700 Subject: tc: remove unused redirect ttl improves ingress+u32 performance from 22.4 Mpps to 22.9 Mpps Signed-off-by: Jamal Hadi Salim Signed-off-by: Alexei Starovoitov Acked-by: Florian Westphal Acked-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 2 ++ net/core/dev.c | 9 --------- 2 files changed, 2 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 6810ca43a80a..596ffa0c7084 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -65,11 +65,13 @@ bits 9,10,11: redirect counter - redirect TTL. Loop avoidance #define SET_TC_NCLS(v) ( TC_NCLS | (v & ~TC_NCLS)) #define CLR_TC_NCLS(v) ( v & ~TC_NCLS) +#ifndef __KERNEL__ #define S_TC_RTTL _TC_MAKE32(9) #define M_TC_RTTL _TC_MAKEMASK(3,S_TC_RTTL) #define G_TC_RTTL(x) _TC_GETVALUE(x,S_TC_RTTL,M_TC_RTTL) #define V_TC_RTTL(x) _TC_MAKEVALUE(x,S_TC_RTTL) #define SET_TC_RTTL(v,n) ((V_TC_RTTL(n)) | (v & ~M_TC_RTTL)) +#endif #define S_TC_AT _TC_MAKE32(12) #define M_TC_AT _TC_MAKEMASK(2,S_TC_AT) diff --git a/net/core/dev.c b/net/core/dev.c index 74a5b62f7568..862875ec8f2f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3531,18 +3531,9 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); */ static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) { - struct net_device *dev = skb->dev; - u32 ttl = G_TC_RTTL(skb->tc_verd); int result = TC_ACT_OK; struct Qdisc *q; - if (unlikely(MAX_RED_LOOP < ttl++)) { - net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n", - skb->skb_iif, dev->ifindex); - return TC_ACT_SHOT; - } - - skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); q = rcu_dereference(rxq->qdisc); -- cgit v1.2.3 From 9afd85c9e4552b276e2f4cfefd622bdeeffbbf26 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Sat, 2 May 2015 14:01:07 +0200 Subject: net: Export IGMP/MLD message validation code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With this patch, the IGMP and MLD message validation functions are moved from the bridge code to IPv4/IPv6 multicast files. Some small refactoring was done to enhance readibility and to iron out some differences in behaviour between the IGMP and MLD parsing code (e.g. the skb-cloning of MLD messages is now only done if necessary, just like the IGMP part always did). Finally, these IGMP and MLD message validation functions are exported so that not only the bridge can use it but batman-adv later, too. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- include/linux/igmp.h | 1 + include/linux/skbuff.h | 3 + include/net/addrconf.h | 1 + net/bridge/br_multicast.c | 218 +++++++--------------------------------------- net/core/skbuff.c | 87 ++++++++++++++++++ net/ipv4/igmp.c | 162 ++++++++++++++++++++++++++++++++++ net/ipv6/Makefile | 1 + net/ipv6/mcast_snoop.c | 213 ++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 498 insertions(+), 188 deletions(-) create mode 100644 net/ipv6/mcast_snoop.c (limited to 'include') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 2c677afeea47..193ad488d3e2 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -130,5 +130,6 @@ extern void ip_mc_unmap(struct in_device *); extern void ip_mc_remap(struct in_device *); extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr); extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr); +int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed); #endif diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index acb83e249e3f..9c2f793573fa 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3419,6 +3419,9 @@ static inline void skb_checksum_none_assert(const struct sk_buff *skb) bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); int skb_checksum_setup(struct sk_buff *skb, bool recalculate); +struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, + unsigned int transport_len, + __sum16(*skb_chkf)(struct sk_buff *skb)); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 80456f72d70a..def59d3a34d5 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -142,6 +142,7 @@ void ipv6_mc_unmap(struct inet6_dev *idev); void ipv6_mc_remap(struct inet6_dev *idev); void ipv6_mc_init_dev(struct inet6_dev *idev); void ipv6_mc_destroy_dev(struct inet6_dev *idev); +int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed); void addrconf_dad_failure(struct inet6_ifaddr *ifp); bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index b52f4cb8aee9..2d69d5cab52f 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -975,9 +975,6 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, int err = 0; __be32 group; - if (!pskb_may_pull(skb, sizeof(*ih))) - return -EINVAL; - ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); len = sizeof(*ih); @@ -1248,25 +1245,14 @@ static int br_ip4_multicast_query(struct net_bridge *br, max_delay = 10 * HZ; group = 0; } - } else { - if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) { - err = -EINVAL; - goto out; - } - + } else if (skb->len >= sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) goto out; max_delay = ih3->code ? IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; - } - - /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer - * all-systems destination addresses (224.0.0.1) for general queries - */ - if (!group && iph->daddr != htonl(INADDR_ALLHOSTS_GROUP)) { - err = -EINVAL; + } else { goto out; } @@ -1329,12 +1315,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ - if (!(ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL)) { - err = -EINVAL; - goto out; - } - if (skb->len == sizeof(*mld)) { if (!pskb_may_pull(skb, sizeof(*mld))) { err = -EINVAL; @@ -1358,14 +1338,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, is_general_query = group && ipv6_addr_any(group); - /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer - * all-nodes destination address (ff02::1) for general queries - */ - if (is_general_query && !ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) { - err = -EINVAL; - goto out; - } - if (is_general_query) { saddr.proto = htons(ETH_P_IPV6); saddr.u.ip6 = ip6h->saddr; @@ -1557,66 +1529,22 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, struct sk_buff *skb, u16 vid) { - struct sk_buff *skb2 = skb; - const struct iphdr *iph; + struct sk_buff *skb_trimmed = NULL; struct igmphdr *ih; - unsigned int len; - unsigned int offset; int err; - /* We treat OOM as packet loss for now. */ - if (!pskb_may_pull(skb, sizeof(*iph))) - return -EINVAL; - - iph = ip_hdr(skb); - - if (iph->ihl < 5 || iph->version != 4) - return -EINVAL; - - if (!pskb_may_pull(skb, ip_hdrlen(skb))) - return -EINVAL; - - iph = ip_hdr(skb); + err = ip_mc_check_igmp(skb, &skb_trimmed); - if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) - return -EINVAL; - - if (iph->protocol != IPPROTO_IGMP) { - if (!ipv4_is_local_multicast(iph->daddr)) + if (err == -ENOMSG) { + if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; + } else if (err < 0) { + return err; } - len = ntohs(iph->tot_len); - if (skb->len < len || len < ip_hdrlen(skb)) - return -EINVAL; - - if (skb->len > len) { - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) - return -ENOMEM; - - err = pskb_trim_rcsum(skb2, len); - if (err) - goto err_out; - } - - len -= ip_hdrlen(skb2); - offset = skb_network_offset(skb2) + ip_hdrlen(skb2); - __skb_pull(skb2, offset); - skb_reset_transport_header(skb2); - - err = -EINVAL; - if (!pskb_may_pull(skb2, sizeof(*ih))) - goto out; - - if (skb_checksum_simple_validate(skb2)) - goto out; - - err = 0; - BR_INPUT_SKB_CB(skb)->igmp = 1; - ih = igmp_hdr(skb2); + ih = igmp_hdr(skb); switch (ih->type) { case IGMP_HOST_MEMBERSHIP_REPORT: @@ -1625,21 +1553,19 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = br_ip4_multicast_add_group(br, port, ih->group, vid); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: - err = br_ip4_multicast_igmp3_report(br, port, skb2, vid); + err = br_ip4_multicast_igmp3_report(br, port, skb_trimmed, vid); break; case IGMP_HOST_MEMBERSHIP_QUERY: - err = br_ip4_multicast_query(br, port, skb2, vid); + err = br_ip4_multicast_query(br, port, skb_trimmed, vid); break; case IGMP_HOST_LEAVE_MESSAGE: br_ip4_multicast_leave_group(br, port, ih->group, vid); break; } -out: - __skb_push(skb2, offset); -err_out: - if (skb2 != skb) - kfree_skb(skb2); + if (skb_trimmed) + kfree_skb(skb_trimmed); + return err; } @@ -1649,126 +1575,42 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, struct sk_buff *skb, u16 vid) { - struct sk_buff *skb2; - const struct ipv6hdr *ip6h; - u8 icmp6_type; - u8 nexthdr; - __be16 frag_off; - unsigned int len; - int offset; + struct sk_buff *skb_trimmed = NULL; + struct mld_msg *mld; int err; - if (!pskb_may_pull(skb, sizeof(*ip6h))) - return -EINVAL; + err = ipv6_mc_check_mld(skb, &skb_trimmed); - ip6h = ipv6_hdr(skb); - - /* - * We're interested in MLD messages only. - * - Version is 6 - * - MLD has always Router Alert hop-by-hop option - * - But we do not support jumbrograms. - */ - if (ip6h->version != 6) - return 0; - - /* Prevent flooding this packet if there is no listener present */ - if (!ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) - BR_INPUT_SKB_CB(skb)->mrouters_only = 1; - - if (ip6h->nexthdr != IPPROTO_HOPOPTS || - ip6h->payload_len == 0) - return 0; - - len = ntohs(ip6h->payload_len) + sizeof(*ip6h); - if (skb->len < len) - return -EINVAL; - - nexthdr = ip6h->nexthdr; - offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr, &frag_off); - - if (offset < 0 || nexthdr != IPPROTO_ICMPV6) + if (err == -ENOMSG) { + if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr)) + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; - - /* Okay, we found ICMPv6 header */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) - return -ENOMEM; - - err = -EINVAL; - if (!pskb_may_pull(skb2, offset + sizeof(struct icmp6hdr))) - goto out; - - len -= offset - skb_network_offset(skb2); - - __skb_pull(skb2, offset); - skb_reset_transport_header(skb2); - skb_postpull_rcsum(skb2, skb_network_header(skb2), - skb_network_header_len(skb2)); - - icmp6_type = icmp6_hdr(skb2)->icmp6_type; - - switch (icmp6_type) { - case ICMPV6_MGM_QUERY: - case ICMPV6_MGM_REPORT: - case ICMPV6_MGM_REDUCTION: - case ICMPV6_MLD2_REPORT: - break; - default: - err = 0; - goto out; - } - - /* Okay, we found MLD message. Check further. */ - if (skb2->len > len) { - err = pskb_trim_rcsum(skb2, len); - if (err) - goto out; - err = -EINVAL; + } else if (err < 0) { + return err; } - ip6h = ipv6_hdr(skb2); - - if (skb_checksum_validate(skb2, IPPROTO_ICMPV6, ip6_compute_pseudo)) - goto out; - - err = 0; - BR_INPUT_SKB_CB(skb)->igmp = 1; + mld = (struct mld_msg *)skb_transport_header(skb); - switch (icmp6_type) { + switch (mld->mld_type) { case ICMPV6_MGM_REPORT: - { - struct mld_msg *mld; - if (!pskb_may_pull(skb2, sizeof(*mld))) { - err = -EINVAL; - goto out; - } - mld = (struct mld_msg *)skb_transport_header(skb2); BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid); break; - } case ICMPV6_MLD2_REPORT: - err = br_ip6_multicast_mld2_report(br, port, skb2, vid); + err = br_ip6_multicast_mld2_report(br, port, skb_trimmed, vid); break; case ICMPV6_MGM_QUERY: - err = br_ip6_multicast_query(br, port, skb2, vid); + err = br_ip6_multicast_query(br, port, skb_trimmed, vid); break; case ICMPV6_MGM_REDUCTION: - { - struct mld_msg *mld; - if (!pskb_may_pull(skb2, sizeof(*mld))) { - err = -EINVAL; - goto out; - } - mld = (struct mld_msg *)skb_transport_header(skb2); br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid); - } + break; } -out: - kfree_skb(skb2); + if (skb_trimmed) + kfree_skb(skb_trimmed); + return err; } #endif diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3cfff2a3d651..1e4278a4dd7e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4030,6 +4030,93 @@ int skb_checksum_setup(struct sk_buff *skb, bool recalculate) } EXPORT_SYMBOL(skb_checksum_setup); +/** + * skb_checksum_maybe_trim - maybe trims the given skb + * @skb: the skb to check + * @transport_len: the data length beyond the network header + * + * Checks whether the given skb has data beyond the given transport length. + * If so, returns a cloned skb trimmed to this transport length. + * Otherwise returns the provided skb. Returns NULL in error cases + * (e.g. transport_len exceeds skb length or out-of-memory). + * + * Caller needs to set the skb transport header and release the returned skb. + * Provided skb is consumed. + */ +static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb, + unsigned int transport_len) +{ + struct sk_buff *skb_chk; + unsigned int len = skb_transport_offset(skb) + transport_len; + int ret; + + if (skb->len < len) { + kfree_skb(skb); + return NULL; + } else if (skb->len == len) { + return skb; + } + + skb_chk = skb_clone(skb, GFP_ATOMIC); + kfree_skb(skb); + + if (!skb_chk) + return NULL; + + ret = pskb_trim_rcsum(skb_chk, len); + if (ret) { + kfree_skb(skb_chk); + return NULL; + } + + return skb_chk; +} + +/** + * skb_checksum_trimmed - validate checksum of an skb + * @skb: the skb to check + * @transport_len: the data length beyond the network header + * @skb_chkf: checksum function to use + * + * Applies the given checksum function skb_chkf to the provided skb. + * Returns a checked and maybe trimmed skb. Returns NULL on error. + * + * If the skb has data beyond the given transport length, then a + * trimmed & cloned skb is checked and returned. + * + * Caller needs to set the skb transport header and release the returned skb. + * Provided skb is consumed. + */ +struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, + unsigned int transport_len, + __sum16(*skb_chkf)(struct sk_buff *skb)) +{ + struct sk_buff *skb_chk; + unsigned int offset = skb_transport_offset(skb); + int ret; + + skb_chk = skb_checksum_maybe_trim(skb, transport_len); + if (!skb_chk) + return NULL; + + if (!pskb_may_pull(skb_chk, offset)) { + kfree_skb(skb_chk); + return NULL; + } + + __skb_pull(skb_chk, offset); + ret = skb_chkf(skb_chk); + __skb_push(skb_chk, offset); + + if (ret) { + kfree_skb(skb_chk); + return NULL; + } + + return skb_chk; +} +EXPORT_SYMBOL(skb_checksum_trimmed); + void __skb_warn_lro_forwarding(const struct sk_buff *skb) { net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a3a697f5ffba..651cdf648ec4 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1339,6 +1339,168 @@ out: } EXPORT_SYMBOL(ip_mc_inc_group); +static int ip_mc_check_iphdr(struct sk_buff *skb) +{ + const struct iphdr *iph; + unsigned int len; + unsigned int offset = skb_network_offset(skb) + sizeof(*iph); + + if (!pskb_may_pull(skb, offset)) + return -EINVAL; + + iph = ip_hdr(skb); + + if (iph->version != 4 || ip_hdrlen(skb) < sizeof(*iph)) + return -EINVAL; + + offset += ip_hdrlen(skb) - sizeof(*iph); + + if (!pskb_may_pull(skb, offset)) + return -EINVAL; + + iph = ip_hdr(skb); + + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + return -EINVAL; + + len = skb_network_offset(skb) + ntohs(iph->tot_len); + if (skb->len < len || len < offset) + return -EINVAL; + + skb_set_transport_header(skb, offset); + + return 0; +} + +static int ip_mc_check_igmp_reportv3(struct sk_buff *skb) +{ + unsigned int len = skb_transport_offset(skb); + + len += sizeof(struct igmpv3_report); + + return pskb_may_pull(skb, len) ? 0 : -EINVAL; +} + +static int ip_mc_check_igmp_query(struct sk_buff *skb) +{ + unsigned int len = skb_transport_offset(skb); + + len += sizeof(struct igmphdr); + if (skb->len < len) + return -EINVAL; + + /* IGMPv{1,2}? */ + if (skb->len != len) { + /* or IGMPv3? */ + len += sizeof(struct igmpv3_query) - sizeof(struct igmphdr); + if (skb->len < len || !pskb_may_pull(skb, len)) + return -EINVAL; + } + + /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer + * all-systems destination addresses (224.0.0.1) for general queries + */ + if (!igmp_hdr(skb)->group && + ip_hdr(skb)->daddr != htonl(INADDR_ALLHOSTS_GROUP)) + return -EINVAL; + + return 0; +} + +static int ip_mc_check_igmp_msg(struct sk_buff *skb) +{ + switch (igmp_hdr(skb)->type) { + case IGMP_HOST_LEAVE_MESSAGE: + case IGMP_HOST_MEMBERSHIP_REPORT: + case IGMPV2_HOST_MEMBERSHIP_REPORT: + /* fall through */ + return 0; + case IGMPV3_HOST_MEMBERSHIP_REPORT: + return ip_mc_check_igmp_reportv3(skb); + case IGMP_HOST_MEMBERSHIP_QUERY: + return ip_mc_check_igmp_query(skb); + default: + return -ENOMSG; + } +} + +static inline __sum16 ip_mc_validate_checksum(struct sk_buff *skb) +{ + return skb_checksum_simple_validate(skb); +} + +static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) + +{ + struct sk_buff *skb_chk; + unsigned int transport_len; + unsigned int len = skb_transport_offset(skb) + sizeof(struct igmphdr); + int ret; + + transport_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb); + + skb_get(skb); + skb_chk = skb_checksum_trimmed(skb, transport_len, + ip_mc_validate_checksum); + if (!skb_chk) + return -EINVAL; + + if (!pskb_may_pull(skb_chk, len)) { + kfree_skb(skb_chk); + return -EINVAL; + } + + ret = ip_mc_check_igmp_msg(skb_chk); + if (ret) { + kfree_skb(skb_chk); + return ret; + } + + if (skb_trimmed) + *skb_trimmed = skb_chk; + else + kfree_skb(skb_chk); + + return 0; +} + +/** + * ip_mc_check_igmp - checks whether this is a sane IGMP packet + * @skb: the skb to validate + * @skb_trimmed: to store an skb pointer trimmed to IPv4 packet tail (optional) + * + * Checks whether an IPv4 packet is a valid IGMP packet. If so sets + * skb network and transport headers accordingly and returns zero. + * + * -EINVAL: A broken packet was detected, i.e. it violates some internet + * standard + * -ENOMSG: IP header validation succeeded but it is not an IGMP packet. + * -ENOMEM: A memory allocation failure happened. + * + * Optionally, an skb pointer might be provided via skb_trimmed (or set it + * to NULL): After parsing an IGMP packet successfully it will point to + * an skb which has its tail aligned to the IP packet end. This might + * either be the originally provided skb or a trimmed, cloned version if + * the skb frame had data beyond the IP packet. A cloned skb allows us + * to leave the original skb and its full frame unchanged (which might be + * desirable for layer 2 frame jugglers). + * + * The caller needs to release a reference count from any returned skb_trimmed. + */ +int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) +{ + int ret = ip_mc_check_iphdr(skb); + + if (ret < 0) + return ret; + + if (ip_hdr(skb)->protocol != IPPROTO_IGMP) + return -ENOMSG; + + return __ip_mc_check_igmp(skb, skb_trimmed); +} +EXPORT_SYMBOL(ip_mc_check_igmp); + /* * Resend IGMP JOIN report; used by netdev notifier. */ diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 2e8c06108ab9..0f3f1999719a 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -48,4 +48,5 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o ifneq ($(CONFIG_IPV6),) obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o +obj-y += mcast_snoop.o endif diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c new file mode 100644 index 000000000000..1a2cbc13a7d3 --- /dev/null +++ b/net/ipv6/mcast_snoop.c @@ -0,0 +1,213 @@ +/* Copyright (C) 2010: YOSHIFUJI Hideaki + * Copyright (C) 2015: Linus Lüssing + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * + * Based on the MLD support added to br_multicast.c by YOSHIFUJI Hideaki. + */ + +#include +#include +#include +#include +#include + +static int ipv6_mc_check_ip6hdr(struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h; + unsigned int len; + unsigned int offset = skb_network_offset(skb) + sizeof(*ip6h); + + if (!pskb_may_pull(skb, offset)) + return -EINVAL; + + ip6h = ipv6_hdr(skb); + + if (ip6h->version != 6) + return -EINVAL; + + len = offset + ntohs(ip6h->payload_len); + if (skb->len < len || len <= offset) + return -EINVAL; + + return 0; +} + +static int ipv6_mc_check_exthdrs(struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h; + unsigned int offset; + u8 nexthdr; + __be16 frag_off; + + ip6h = ipv6_hdr(skb); + + if (ip6h->nexthdr != IPPROTO_HOPOPTS) + return -ENOMSG; + + nexthdr = ip6h->nexthdr; + offset = skb_network_offset(skb) + sizeof(*ip6h); + offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); + + if (offset < 0) + return -EINVAL; + + if (nexthdr != IPPROTO_ICMPV6) + return -ENOMSG; + + skb_set_transport_header(skb, offset); + + return 0; +} + +static int ipv6_mc_check_mld_reportv2(struct sk_buff *skb) +{ + unsigned int len = skb_transport_offset(skb); + + len += sizeof(struct mld2_report); + + return pskb_may_pull(skb, len) ? 0 : -EINVAL; +} + +static int ipv6_mc_check_mld_query(struct sk_buff *skb) +{ + struct mld_msg *mld; + unsigned int len = skb_transport_offset(skb); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ + if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) + return -EINVAL; + + len += sizeof(struct mld_msg); + if (skb->len < len) + return -EINVAL; + + /* MLDv1? */ + if (skb->len != len) { + /* or MLDv2? */ + len += sizeof(struct mld2_query) - sizeof(struct mld_msg); + if (skb->len < len || !pskb_may_pull(skb, len)) + return -EINVAL; + } + + mld = (struct mld_msg *)skb_transport_header(skb); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer + * all-nodes destination address (ff02::1) for general queries + */ + if (ipv6_addr_any(&mld->mld_mca) && + !ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr)) + return -EINVAL; + + return 0; +} + +static int ipv6_mc_check_mld_msg(struct sk_buff *skb) +{ + struct mld_msg *mld = (struct mld_msg *)skb_transport_header(skb); + + switch (mld->mld_type) { + case ICMPV6_MGM_REDUCTION: + case ICMPV6_MGM_REPORT: + /* fall through */ + return 0; + case ICMPV6_MLD2_REPORT: + return ipv6_mc_check_mld_reportv2(skb); + case ICMPV6_MGM_QUERY: + return ipv6_mc_check_mld_query(skb); + default: + return -ENOMSG; + } +} + +static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb) +{ + return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo); +} + +static int __ipv6_mc_check_mld(struct sk_buff *skb, + struct sk_buff **skb_trimmed) + +{ + struct sk_buff *skb_chk = NULL; + unsigned int transport_len; + unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg); + int ret; + + transport_len = ntohs(ipv6_hdr(skb)->payload_len); + transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr); + + skb_get(skb); + skb_chk = skb_checksum_trimmed(skb, transport_len, + ipv6_mc_validate_checksum); + if (!skb_chk) + return -EINVAL; + + if (!pskb_may_pull(skb_chk, len)) { + kfree_skb(skb_chk); + return -EINVAL; + } + + ret = ipv6_mc_check_mld_msg(skb_chk); + if (ret) { + kfree_skb(skb_chk); + return ret; + } + + if (skb_trimmed) + *skb_trimmed = skb_chk; + else + kfree_skb(skb_chk); + + return 0; +} + +/** + * ipv6_mc_check_mld - checks whether this is a sane MLD packet + * @skb: the skb to validate + * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional) + * + * Checks whether an IPv6 packet is a valid MLD packet. If so sets + * skb network and transport headers accordingly and returns zero. + * + * -EINVAL: A broken packet was detected, i.e. it violates some internet + * standard + * -ENOMSG: IP header validation succeeded but it is not an MLD packet. + * -ENOMEM: A memory allocation failure happened. + * + * Optionally, an skb pointer might be provided via skb_trimmed (or set it + * to NULL): After parsing an MLD packet successfully it will point to + * an skb which has its tail aligned to the IP packet end. This might + * either be the originally provided skb or a trimmed, cloned version if + * the skb frame had data beyond the IP packet. A cloned skb allows us + * to leave the original skb and its full frame unchanged (which might be + * desirable for layer 2 frame jugglers). + * + * The caller needs to release a reference count from any returned skb_trimmed. + */ +int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) +{ + int ret; + + ret = ipv6_mc_check_ip6hdr(skb); + if (ret < 0) + return ret; + + ret = ipv6_mc_check_exthdrs(skb); + if (ret < 0) + return ret; + + return __ipv6_mc_check_mld(skb, skb_trimmed); +} +EXPORT_SYMBOL(ipv6_mc_check_mld); -- cgit v1.2.3 From f5c4ae07992ca64d8628a11439c184baf5595e4b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 Apr 2015 12:09:01 +0200 Subject: mac80211: make LED trigger names const This is just a code cleanup, make the LED trigger names const as they're not expected to be modified by drivers. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath5k/led.c | 2 +- drivers/net/wireless/ath/carl9170/led.c | 2 +- drivers/net/wireless/p54/led.c | 2 +- include/net/mac80211.h | 27 ++++++++++++++------------- net/mac80211/led.c | 17 +++++++++-------- 5 files changed, 26 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ath5k/led.c b/drivers/net/wireless/ath/ath5k/led.c index ca4b7ccd697f..803030fd17d3 100644 --- a/drivers/net/wireless/ath/ath5k/led.c +++ b/drivers/net/wireless/ath/ath5k/led.c @@ -124,7 +124,7 @@ ath5k_led_brightness_set(struct led_classdev *led_dev, static int ath5k_register_led(struct ath5k_hw *ah, struct ath5k_led *led, - const char *name, char *trigger) + const char *name, const char *trigger) { int err; diff --git a/drivers/net/wireless/ath/carl9170/led.c b/drivers/net/wireless/ath/carl9170/led.c index 78dadc797558..2c74425f5059 100644 --- a/drivers/net/wireless/ath/carl9170/led.c +++ b/drivers/net/wireless/ath/carl9170/led.c @@ -122,7 +122,7 @@ static void carl9170_led_set_brightness(struct led_classdev *led, } static int carl9170_led_register_led(struct ar9170 *ar, int i, char *name, - char *trigger) + const char *trigger) { int err; diff --git a/drivers/net/wireless/p54/led.c b/drivers/net/wireless/p54/led.c index 1f6fd5ff5531..9a8fedd3c0f5 100644 --- a/drivers/net/wireless/p54/led.c +++ b/drivers/net/wireless/p54/led.c @@ -83,7 +83,7 @@ static void p54_led_brightness_set(struct led_classdev *led_dev, static int p54_register_led(struct p54_common *priv, unsigned int led_index, - char *name, char *trigger) + char *name, const char *trigger) { struct p54_led_dev *led = &priv->leds[led_index]; int err; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 331429898ea1..8a3a7d7cda26 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3486,14 +3486,15 @@ enum ieee80211_tpt_led_trigger_flags { }; #ifdef CONFIG_MAC80211_LEDS -char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw); -char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw); -char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw); -char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw); -char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, - unsigned int flags, - const struct ieee80211_tpt_blink *blink_table, - unsigned int blink_table_len); +const char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw); +const char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw); +const char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw); +const char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw); +const char * +__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, + unsigned int flags, + const struct ieee80211_tpt_blink *blink_table, + unsigned int blink_table_len); #endif /** * ieee80211_get_tx_led_name - get name of TX LED @@ -3507,7 +3508,7 @@ char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, * * Return: The name of the LED trigger. %NULL if not configured for LEDs. */ -static inline char *ieee80211_get_tx_led_name(struct ieee80211_hw *hw) +static inline const char *ieee80211_get_tx_led_name(struct ieee80211_hw *hw) { #ifdef CONFIG_MAC80211_LEDS return __ieee80211_get_tx_led_name(hw); @@ -3528,7 +3529,7 @@ static inline char *ieee80211_get_tx_led_name(struct ieee80211_hw *hw) * * Return: The name of the LED trigger. %NULL if not configured for LEDs. */ -static inline char *ieee80211_get_rx_led_name(struct ieee80211_hw *hw) +static inline const char *ieee80211_get_rx_led_name(struct ieee80211_hw *hw) { #ifdef CONFIG_MAC80211_LEDS return __ieee80211_get_rx_led_name(hw); @@ -3549,7 +3550,7 @@ static inline char *ieee80211_get_rx_led_name(struct ieee80211_hw *hw) * * Return: The name of the LED trigger. %NULL if not configured for LEDs. */ -static inline char *ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) +static inline const char *ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) { #ifdef CONFIG_MAC80211_LEDS return __ieee80211_get_assoc_led_name(hw); @@ -3570,7 +3571,7 @@ static inline char *ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) * * Return: The name of the LED trigger. %NULL if not configured for LEDs. */ -static inline char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw) +static inline const char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw) { #ifdef CONFIG_MAC80211_LEDS return __ieee80211_get_radio_led_name(hw); @@ -3591,7 +3592,7 @@ static inline char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw) * * Note: This function must be called before ieee80211_register_hw(). */ -static inline char * +static inline const char * ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, unsigned int flags, const struct ieee80211_tpt_blink *blink_table, unsigned int blink_table_len) diff --git a/net/mac80211/led.c b/net/mac80211/led.c index e2b836446af3..3e13eb86f85d 100644 --- a/net/mac80211/led.c +++ b/net/mac80211/led.c @@ -133,7 +133,7 @@ void ieee80211_led_exit(struct ieee80211_local *local) } } -char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw) +const char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); @@ -141,7 +141,7 @@ char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw) } EXPORT_SYMBOL(__ieee80211_get_radio_led_name); -char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) +const char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); @@ -149,7 +149,7 @@ char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) } EXPORT_SYMBOL(__ieee80211_get_assoc_led_name); -char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw) +const char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); @@ -157,7 +157,7 @@ char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw) } EXPORT_SYMBOL(__ieee80211_get_tx_led_name); -char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw) +const char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); @@ -211,10 +211,11 @@ static void tpt_trig_timer(unsigned long data) read_unlock(&tpt_trig->trig.leddev_list_lock); } -char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, - unsigned int flags, - const struct ieee80211_tpt_blink *blink_table, - unsigned int blink_table_len) +const char * +__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, + unsigned int flags, + const struct ieee80211_tpt_blink *blink_table, + unsigned int blink_table_len) { struct ieee80211_local *local = hw_to_local(hw); struct tpt_led_trigger *tpt_trig; -- cgit v1.2.3 From cd8ae85299d54155702a56811b2e035e63064d3d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 3 May 2015 21:34:46 -0700 Subject: tcp: provide SYN headers for passive connections This patch allows a server application to get the TCP SYN headers for its passive connections. This is useful if the server is doing fingerprinting of clients based on SYN packet contents. Two socket options are added: TCP_SAVE_SYN and TCP_SAVED_SYN. The first is used on a socket to enable saving the SYN headers for child connections. This can be set before or after the listen() call. The latter is used to retrieve the SYN headers for passive connections, if the parent listener has enabled TCP_SAVE_SYN. TCP_SAVED_SYN is read once, it frees the saved SYN headers. The data returned in TCP_SAVED_SYN are network (IPv4/IPv6) and TCP headers. Original patch was written by Tom Herbert, I changed it to not hold a full skb (and associated dst and conntracking reference). We have used such patch for about 3 years at Google. Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Tested-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 8 ++++++++ include/net/request_sock.h | 4 +++- include/uapi/linux/tcp.h | 2 ++ net/ipv4/tcp.c | 35 +++++++++++++++++++++++++++++++++++ net/ipv4/tcp_input.c | 18 ++++++++++++++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_minisocks.c | 3 +++ 7 files changed, 70 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3b2911502a8c..e6fb5df22db1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -199,6 +199,7 @@ struct tcp_sock { syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ + save_syn:1, /* Save headers of SYN packet */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ @@ -326,6 +327,7 @@ struct tcp_sock { * socket. Used to retransmit SYNACKs etc. */ struct request_sock *fastopen_rsk; + u32 *saved_syn; }; enum tsq_flags { @@ -393,4 +395,10 @@ static inline int fastopen_init_queue(struct sock *sk, int backlog) return 0; } +static inline void tcp_saved_syn_free(struct tcp_sock *tp) +{ + kfree(tp->saved_syn); + tp->saved_syn = NULL; +} + #endif /* _LINUX_TCP_H */ diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 9f4265ce8892..87935cad2f7b 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -64,6 +64,7 @@ struct request_sock { struct timer_list rsk_timer; const struct request_sock_ops *rsk_ops; struct sock *sk; + u32 *saved_syn; u32 secid; u32 peer_secid; }; @@ -77,7 +78,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) req->rsk_ops = ops; sock_hold(sk_listener); req->rsk_listener = sk_listener; - + req->saved_syn = NULL; /* Following is temporary. It is coupled with debugging * helpers in reqsk_put() & reqsk_free() */ @@ -104,6 +105,7 @@ static inline void reqsk_free(struct request_sock *req) req->rsk_ops->destructor(req); if (req->rsk_listener) sock_put(req->rsk_listener); + kfree(req->saved_syn); kmem_cache_free(req->rsk_ops->slab, req); } diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index faa72f4fa547..51ebedba577f 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -113,6 +113,8 @@ enum { #define TCP_TIMESTAMP 24 #define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */ #define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ +#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ +#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ struct tcp_repair_opt { __u32 opt_code; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 46efa03d2b11..ecccfdc50d76 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2482,6 +2482,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level, icsk->icsk_syn_retries = val; break; + case TCP_SAVE_SYN: + if (val < 0 || val > 1) + err = -EINVAL; + else + tp->save_syn = val; + break; + case TCP_LINGER2: if (val < 0) tp->linger2 = -1; @@ -2818,6 +2825,34 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_NOTSENT_LOWAT: val = tp->notsent_lowat; break; + case TCP_SAVE_SYN: + val = tp->save_syn; + break; + case TCP_SAVED_SYN: { + if (get_user(len, optlen)) + return -EFAULT; + + lock_sock(sk); + if (tp->saved_syn) { + len = min_t(unsigned int, tp->saved_syn[0], len); + if (put_user(len, optlen)) { + release_sock(sk); + return -EFAULT; + } + if (copy_to_user(optval, tp->saved_syn + 1, len)) { + release_sock(sk); + return -EFAULT; + } + tcp_saved_syn_free(tp); + release_sock(sk); + } else { + release_sock(sk); + len = 0; + if (put_user(len, optlen)) + return -EFAULT; + } + return 0; + } default: return -ENOPROTOOPT; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 09bdc4abfcbb..df2ca615cd0c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6060,6 +6060,23 @@ static bool tcp_syn_flood_action(struct sock *sk, return want_cookie; } +static void tcp_reqsk_record_syn(const struct sock *sk, + struct request_sock *req, + const struct sk_buff *skb) +{ + if (tcp_sk(sk)->save_syn) { + u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb); + u32 *copy; + + copy = kmalloc(len + sizeof(u32), GFP_ATOMIC); + if (copy) { + copy[0] = len; + memcpy(©[1], skb_network_header(skb), len); + req->saved_syn = copy; + } + } +} + int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) @@ -6192,6 +6209,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->tfo_listener = false; af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); } + tcp_reqsk_record_syn(sk, req, skb); return 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fc1c658ec6c1..91cb4768a860 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1802,6 +1802,7 @@ void tcp_v4_destroy_sock(struct sock *sk) /* If socket is aborted during connect operation */ tcp_free_fastopen_req(tp); + tcp_saved_syn_free(tp); sk_sockets_allocated_dec(sk); sock_release_memcg(sk); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index e5d7649136fc..ebe2ab2596ed 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -536,6 +536,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->fastopen_rsk = NULL; newtp->syn_data_acked = 0; + newtp->saved_syn = req->saved_syn; + req->saved_syn = NULL; + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); } return newsk; -- cgit v1.2.3 From 2c7a88c252bf3381958cf716f31b6b2e0f2f3fa7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 4 May 2015 14:33:48 -0700 Subject: etherdev: Fix sparse error, make test usable by other functions This change does two things. First it fixes a sparse error for the fact that the __be16 degrades to an integer. Since that is actually what I am kind of doing I am simply working around that by forcing both sides of the comparison to u16. Also I realized on some compilers I was generating another instruction for big endian systems such as PowerPC since it was masking the value before doing the comparison. So to resolve that I have simply pulled the mask out and wrapped it in an #ifndef __BIG_ENDIAN. Lastly I pulled this all out into its own function. I notices there are similar checks in a number of other places so this function can be reused there to help reduce overhead in these paths as well. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 18 ++++++++++++++++++ net/ethernet/eth.c | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index c4a10f991fe0..9012f8775208 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -190,6 +190,24 @@ static inline bool is_valid_ether_addr(const u8 *addr) return !is_multicast_ether_addr(addr) && !is_zero_ether_addr(addr); } +/** + * eth_proto_is_802_3 - Determine if a given Ethertype/length is a protocol + * @proto: Ethertype/length value to be tested + * + * Check that the value from the Ethertype/length field is a valid Ethertype. + * + * Return true if the valid is an 802.3 supported Ethertype. + */ +static inline bool eth_proto_is_802_3(__be16 proto) +{ +#ifndef __BIG_ENDIAN + /* if CPU is little endian mask off bits representing LSB */ + proto &= htons(0xFF00); +#endif + /* cast both to u16 and compare since LSB can be ignored */ + return (__force u16)proto >= (__force u16)htons(ETH_P_802_3_MIN); +} + /** * eth_random_addr - Generate software assigned random Ethernet address * @addr: Pointer to a six-byte array containing the Ethernet address diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 314e4c5a5a5e..9045e2a1108f 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -179,7 +179,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) if (unlikely(netdev_uses_dsa(dev))) return htons(ETH_P_XDSA); - if (likely((eth->h_proto & htons(0xFF00)) >= htons(ETH_P_802_3_MIN))) + if (likely(eth_proto_is_802_3(eth->h_proto))) return eth->h_proto; /* -- cgit v1.2.3 From 9545b22da647cf6fbbac9c5a48c50fd72d892b11 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 4 May 2015 14:34:10 -0700 Subject: vlan: Use eth_proto_is_802_3 Replace "ntohs(proto) >= ETH_P_802_3_MIN" w/ eth_proto_is_802_3(proto). Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 920e4457ce6e..b9ab677c0c0a 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -539,7 +539,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, */ proto = vhdr->h_vlan_encapsulated_proto; - if (ntohs(proto) >= ETH_P_802_3_MIN) { + if (eth_proto_is_802_3(proto)) { skb->protocol = proto; return; } -- cgit v1.2.3 From a2f11835994ed5bcd6d66c7205947cc482231b08 Mon Sep 17 00:00:00 2001 From: Shawn Landden Date: Tue, 5 May 2015 09:07:16 -0700 Subject: can.h: make padding given by gcc explicit The current definition of struct can_frame has a 16-byte size, with 8-byte alignment, but the 3 bytes of padding are not explicit like the similar 2 bytes of padding of struct canfd_frame. Make it explicit so it is easier to read. Signed-off-by: Shawn Landden Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- Documentation/networking/can.txt | 3 +++ include/uapi/linux/can.h | 6 ++++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt index 5abad1e921ca..b48d4a149411 100644 --- a/Documentation/networking/can.txt +++ b/Documentation/networking/can.txt @@ -268,6 +268,9 @@ solution for a couple of reasons: struct can_frame { canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */ __u8 can_dlc; /* frame payload length in byte (0 .. 8) */ + __u8 __pad; /* padding */ + __u8 __res0; /* reserved / padding */ + __u8 __res1; /* reserved / padding */ __u8 data[8] __attribute__((aligned(8))); }; diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 41892f720057..9692cda5f8fc 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -95,11 +95,17 @@ typedef __u32 can_err_mask_t; * @can_dlc: frame payload length in byte (0 .. 8) aka data length code * N.B. the DLC field from ISO 11898-1 Chapter 8.4.2.3 has a 1:1 * mapping of the 'data length code' to the real payload length + * @__pad: padding + * @__res0: reserved / padding + * @__res1: reserved / padding * @data: CAN frame payload (up to 8 byte) */ struct can_frame { canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */ __u8 can_dlc; /* frame payload length in byte (0 .. CAN_MAX_DLEN) */ + __u8 __pad; /* padding */ + __u8 __res0; /* reserved / padding */ + __u8 __res1; /* reserved / padding */ __u8 data[CAN_MAX_DLEN] __attribute__((aligned(8))); }; -- cgit v1.2.3 From 9352c19f639354f093cb5457315c01bcb94aa82a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 20 Apr 2015 18:12:41 +0200 Subject: mac80211: extend get_tkip_seq to all keys Extend the function to read the TKIP IV32/IV16 to read the IV/PN for all ciphers in order to allow drivers with full hardware crypto to properly support this. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath5k/mac80211-ops.c | 1 - drivers/net/wireless/rt2x00/rt2800lib.c | 16 +++-- drivers/net/wireless/rt2x00/rt2800lib.h | 5 +- drivers/net/wireless/rt2x00/rt2800pci.c | 2 +- drivers/net/wireless/rt2x00/rt2800soc.c | 2 +- drivers/net/wireless/rt2x00/rt2800usb.c | 2 +- include/net/mac80211.h | 79 +++++++++++------------ net/mac80211/cfg.c | 91 +++++++++++++++++---------- net/mac80211/driver-ops.h | 11 ++-- net/mac80211/trace.h | 42 +++++++------ 10 files changed, 143 insertions(+), 108 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c index 0210630972d6..dc44cfef7517 100644 --- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c +++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c @@ -798,7 +798,6 @@ const struct ieee80211_ops ath5k_hw_ops = { .sw_scan_start = ath5k_sw_scan_start, .sw_scan_complete = ath5k_sw_scan_complete, .get_stats = ath5k_get_stats, - /* .get_tkip_seq = not implemented */ /* .set_frag_threshold = not implemented */ /* .set_rts_threshold = not implemented */ /* .sta_add = not implemented */ diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index 09135192c516..dfeca8355b22 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -7817,21 +7817,25 @@ EXPORT_SYMBOL_GPL(rt2800_probe_hw); /* * IEEE80211 stack callback functions. */ -void rt2800_get_tkip_seq(struct ieee80211_hw *hw, u8 hw_key_idx, u32 *iv32, - u16 *iv16) +void rt2800_get_key_seq(struct ieee80211_hw *hw, + struct ieee80211_key_conf *key, + struct ieee80211_key_seq *seq) { struct rt2x00_dev *rt2x00dev = hw->priv; struct mac_iveiv_entry iveiv_entry; u32 offset; - offset = MAC_IVEIV_ENTRY(hw_key_idx); + if (key->cipher != WLAN_CIPHER_SUITE_TKIP) + return; + + offset = MAC_IVEIV_ENTRY(key->hw_key_idx); rt2800_register_multiread(rt2x00dev, offset, &iveiv_entry, sizeof(iveiv_entry)); - memcpy(iv16, &iveiv_entry.iv[0], sizeof(*iv16)); - memcpy(iv32, &iveiv_entry.iv[4], sizeof(*iv32)); + memcpy(&seq->tkip.iv16, &iveiv_entry.iv[0], 2); + memcpy(&seq->tkip.iv32, &iveiv_entry.iv[4], 4); } -EXPORT_SYMBOL_GPL(rt2800_get_tkip_seq); +EXPORT_SYMBOL_GPL(rt2800_get_key_seq); int rt2800_set_rts_threshold(struct ieee80211_hw *hw, u32 value) { diff --git a/drivers/net/wireless/rt2x00/rt2800lib.h b/drivers/net/wireless/rt2x00/rt2800lib.h index 3019db637a4b..1609b8a7f7eb 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.h +++ b/drivers/net/wireless/rt2x00/rt2800lib.h @@ -209,8 +209,9 @@ int rt2800_read_eeprom_efuse(struct rt2x00_dev *rt2x00dev); int rt2800_probe_hw(struct rt2x00_dev *rt2x00dev); -void rt2800_get_tkip_seq(struct ieee80211_hw *hw, u8 hw_key_idx, u32 *iv32, - u16 *iv16); +void rt2800_get_key_seq(struct ieee80211_hw *hw, + struct ieee80211_key_conf *key, + struct ieee80211_key_seq *seq); int rt2800_set_rts_threshold(struct ieee80211_hw *hw, u32 value); int rt2800_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u16 queue_idx, diff --git a/drivers/net/wireless/rt2x00/rt2800pci.c b/drivers/net/wireless/rt2x00/rt2800pci.c index cc1b3cc73c5a..0af22573a2eb 100644 --- a/drivers/net/wireless/rt2x00/rt2800pci.c +++ b/drivers/net/wireless/rt2x00/rt2800pci.c @@ -309,7 +309,7 @@ static const struct ieee80211_ops rt2800pci_mac80211_ops = { .sw_scan_start = rt2x00mac_sw_scan_start, .sw_scan_complete = rt2x00mac_sw_scan_complete, .get_stats = rt2x00mac_get_stats, - .get_tkip_seq = rt2800_get_tkip_seq, + .get_key_seq = rt2800_get_key_seq, .set_rts_threshold = rt2800_set_rts_threshold, .sta_add = rt2x00mac_sta_add, .sta_remove = rt2x00mac_sta_remove, diff --git a/drivers/net/wireless/rt2x00/rt2800soc.c b/drivers/net/wireless/rt2x00/rt2800soc.c index aaa7aa4cad9d..a985a5a7945e 100644 --- a/drivers/net/wireless/rt2x00/rt2800soc.c +++ b/drivers/net/wireless/rt2x00/rt2800soc.c @@ -148,7 +148,7 @@ static const struct ieee80211_ops rt2800soc_mac80211_ops = { .sw_scan_start = rt2x00mac_sw_scan_start, .sw_scan_complete = rt2x00mac_sw_scan_complete, .get_stats = rt2x00mac_get_stats, - .get_tkip_seq = rt2800_get_tkip_seq, + .get_key_seq = rt2800_get_key_seq, .set_rts_threshold = rt2800_set_rts_threshold, .sta_add = rt2x00mac_sta_add, .sta_remove = rt2x00mac_sta_remove, diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c index 6ec2466b52b6..5932306084fd 100644 --- a/drivers/net/wireless/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/rt2x00/rt2800usb.c @@ -835,7 +835,7 @@ static const struct ieee80211_ops rt2800usb_mac80211_ops = { .sw_scan_start = rt2x00mac_sw_scan_start, .sw_scan_complete = rt2x00mac_sw_scan_complete, .get_stats = rt2x00mac_get_stats, - .get_tkip_seq = rt2800_get_tkip_seq, + .get_key_seq = rt2800_get_key_seq, .set_rts_threshold = rt2800_set_rts_threshold, .sta_add = rt2x00mac_sta_add, .sta_remove = rt2x00mac_sta_remove, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8a3a7d7cda26..47b39c26fc05 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1501,6 +1501,40 @@ struct ieee80211_key_conf { u8 key[0]; }; +/** + * struct ieee80211_key_seq - key sequence counter + * + * @tkip: TKIP data, containing IV32 and IV16 in host byte order + * @ccmp: PN data, most significant byte first (big endian, + * reverse order than in packet) + * @aes_cmac: PN data, most significant byte first (big endian, + * reverse order than in packet) + * @aes_gmac: PN data, most significant byte first (big endian, + * reverse order than in packet) + * @gcmp: PN data, most significant byte first (big endian, + * reverse order than in packet) + */ +struct ieee80211_key_seq { + union { + struct { + u32 iv32; + u16 iv16; + } tkip; + struct { + u8 pn[6]; + } ccmp; + struct { + u8 pn[6]; + } aes_cmac; + struct { + u8 pn[6]; + } aes_gmac; + struct { + u8 pn[6]; + } gcmp; + }; +}; + /** * struct ieee80211_cipher_scheme - cipher scheme * @@ -2836,9 +2870,9 @@ enum ieee80211_reconfig_type { * Returns zero if statistics are available. * The callback can sleep. * - * @get_tkip_seq: If your device implements TKIP encryption in hardware this - * callback should be provided to read the TKIP transmit IVs (both IV32 - * and IV16) for the given key from hardware. + * @get_key_seq: If your device implements encryption in hardware and does + * IV/PN assignment then this callback should be provided to read the + * IV/PN for the given key from hardware. * The callback must be atomic. * * @set_frag_threshold: Configuration of fragmentation threshold. Assign this @@ -3237,8 +3271,9 @@ struct ieee80211_ops { struct ieee80211_vif *vif); int (*get_stats)(struct ieee80211_hw *hw, struct ieee80211_low_level_stats *stats); - void (*get_tkip_seq)(struct ieee80211_hw *hw, u8 hw_key_idx, - u32 *iv32, u16 *iv16); + void (*get_key_seq)(struct ieee80211_hw *hw, + struct ieee80211_key_conf *key, + struct ieee80211_key_seq *seq); int (*set_frag_threshold)(struct ieee80211_hw *hw, u32 value); int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value); int (*sta_add)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, @@ -4272,40 +4307,6 @@ void ieee80211_get_tkip_p2k(struct ieee80211_key_conf *keyconf, void ieee80211_aes_cmac_calculate_k1_k2(struct ieee80211_key_conf *keyconf, u8 *k1, u8 *k2); -/** - * struct ieee80211_key_seq - key sequence counter - * - * @tkip: TKIP data, containing IV32 and IV16 in host byte order - * @ccmp: PN data, most significant byte first (big endian, - * reverse order than in packet) - * @aes_cmac: PN data, most significant byte first (big endian, - * reverse order than in packet) - * @aes_gmac: PN data, most significant byte first (big endian, - * reverse order than in packet) - * @gcmp: PN data, most significant byte first (big endian, - * reverse order than in packet) - */ -struct ieee80211_key_seq { - union { - struct { - u32 iv32; - u16 iv16; - } tkip; - struct { - u8 pn[6]; - } ccmp; - struct { - u8 pn[6]; - } aes_cmac; - struct { - u8 pn[6]; - } aes_gmac; - struct { - u8 pn[6]; - } gcmp; - }; -}; - /** * ieee80211_get_key_tx_seq - get key TX sequence counter * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 72a0178af737..dd7014b09396 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -312,6 +312,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, u32 iv32; u16 iv16; int err = -ENOENT; + struct ieee80211_key_seq kseq = {}; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -342,10 +343,12 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, iv32 = key->u.tkip.tx.iv32; iv16 = key->u.tkip.tx.iv16; - if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) - drv_get_tkip_seq(sdata->local, - key->conf.hw_key_idx, - &iv32, &iv16); + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + drv_get_key_seq(sdata->local, key, &kseq); + iv32 = kseq.tkip.iv32; + iv16 = kseq.tkip.iv16; + } seq[0] = iv16 & 0xff; seq[1] = (iv16 >> 8) & 0xff; @@ -358,49 +361,73 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: - pn64 = atomic64_read(&key->u.ccmp.tx_pn); - seq[0] = pn64; - seq[1] = pn64 >> 8; - seq[2] = pn64 >> 16; - seq[3] = pn64 >> 24; - seq[4] = pn64 >> 32; - seq[5] = pn64 >> 40; + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + drv_get_key_seq(sdata->local, key, &kseq); + memcpy(seq, kseq.ccmp.pn, 6); + } else { + pn64 = atomic64_read(&key->u.ccmp.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; + } params.seq = seq; params.seq_len = 6; break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: - pn64 = atomic64_read(&key->u.aes_cmac.tx_pn); - seq[0] = pn64; - seq[1] = pn64 >> 8; - seq[2] = pn64 >> 16; - seq[3] = pn64 >> 24; - seq[4] = pn64 >> 32; - seq[5] = pn64 >> 40; + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + drv_get_key_seq(sdata->local, key, &kseq); + memcpy(seq, kseq.aes_cmac.pn, 6); + } else { + pn64 = atomic64_read(&key->u.aes_cmac.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; + } params.seq = seq; params.seq_len = 6; break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: - pn64 = atomic64_read(&key->u.aes_gmac.tx_pn); - seq[0] = pn64; - seq[1] = pn64 >> 8; - seq[2] = pn64 >> 16; - seq[3] = pn64 >> 24; - seq[4] = pn64 >> 32; - seq[5] = pn64 >> 40; + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + drv_get_key_seq(sdata->local, key, &kseq); + memcpy(seq, kseq.aes_gmac.pn, 6); + } else { + pn64 = atomic64_read(&key->u.aes_gmac.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; + } params.seq = seq; params.seq_len = 6; break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: - pn64 = atomic64_read(&key->u.gcmp.tx_pn); - seq[0] = pn64; - seq[1] = pn64 >> 8; - seq[2] = pn64 >> 16; - seq[3] = pn64 >> 24; - seq[4] = pn64 >> 32; - seq[5] = pn64 >> 40; + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + drv_get_key_seq(sdata->local, key, &kseq); + memcpy(seq, kseq.gcmp.pn, 6); + } else { + pn64 = atomic64_read(&key->u.gcmp.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; + } params.seq = seq; params.seq_len = 6; break; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 26e1ca8a474a..c01e681b90fb 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -417,12 +417,13 @@ static inline int drv_get_stats(struct ieee80211_local *local, return ret; } -static inline void drv_get_tkip_seq(struct ieee80211_local *local, - u8 hw_key_idx, u32 *iv32, u16 *iv16) +static inline void drv_get_key_seq(struct ieee80211_local *local, + struct ieee80211_key *key, + struct ieee80211_key_seq *seq) { - if (local->ops->get_tkip_seq) - local->ops->get_tkip_seq(&local->hw, hw_key_idx, iv32, iv16); - trace_drv_get_tkip_seq(local, hw_key_idx, iv32, iv16); + if (local->ops->get_key_seq) + local->ops->get_key_seq(&local->hw, &key->conf, seq); + trace_drv_get_key_seq(local, &key->conf); } static inline int drv_set_frag_threshold(struct ieee80211_local *local, diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 4c2e7690226a..6f14591d8ca9 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -69,6 +69,17 @@ #define CHANCTX_PR_ARG CHANDEF_PR_ARG, MIN_CHANDEF_PR_ARG, \ __entry->rx_chains_static, __entry->rx_chains_dynamic +#define KEY_ENTRY __field(u32, cipher) \ + __field(u8, hw_key_idx) \ + __field(u8, flags) \ + __field(s8, keyidx) +#define KEY_ASSIGN(k) __entry->cipher = (k)->cipher; \ + __entry->flags = (k)->flags; \ + __entry->keyidx = (k)->keyidx; \ + __entry->hw_key_idx = (k)->hw_key_idx; +#define KEY_PR_FMT " cipher:0x%x, flags=%#x, keyidx=%d, hw_key_idx=%d" +#define KEY_PR_ARG __entry->cipher, __entry->flags, __entry->keyidx, __entry->hw_key_idx + /* @@ -522,25 +533,19 @@ TRACE_EVENT(drv_set_key, LOCAL_ENTRY VIF_ENTRY STA_ENTRY - __field(u32, cipher) - __field(u8, hw_key_idx) - __field(u8, flags) - __field(s8, keyidx) + KEY_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; - __entry->cipher = key->cipher; - __entry->flags = key->flags; - __entry->keyidx = key->keyidx; - __entry->hw_key_idx = key->hw_key_idx; + KEY_ASSIGN(key); ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT, - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT KEY_PR_FMT, + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, KEY_PR_ARG ) ); @@ -656,28 +661,25 @@ TRACE_EVENT(drv_get_stats, ) ); -TRACE_EVENT(drv_get_tkip_seq, +TRACE_EVENT(drv_get_key_seq, TP_PROTO(struct ieee80211_local *local, - u8 hw_key_idx, u32 *iv32, u16 *iv16), + struct ieee80211_key_conf *key), - TP_ARGS(local, hw_key_idx, iv32, iv16), + TP_ARGS(local, key), TP_STRUCT__entry( LOCAL_ENTRY - __field(u8, hw_key_idx) - __field(u32, iv32) - __field(u16, iv16) + KEY_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; - __entry->hw_key_idx = hw_key_idx; - __entry->iv32 = *iv32; - __entry->iv16 = *iv16; + KEY_ASSIGN(key); ), TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG + LOCAL_PR_FMT KEY_PR_FMT, + LOCAL_PR_ARG, KEY_PR_ARG ) ); -- cgit v1.2.3 From a31cf1c69e89e0c2d5515b04aca313f1014a714d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 20 Apr 2015 18:21:58 +0200 Subject: mac80211: extend get_key() to return PN for all ciphers For ciphers not supported by mac80211, the function currently doesn't return any PN data. Fix this by extending the driver's get_key_seq() a little more to allow moving arbitrary PN data. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 +++++++ net/mac80211/cfg.c | 9 +++++++++ net/mac80211/key.c | 4 ++-- net/mac80211/key.h | 3 +-- 4 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 47b39c26fc05..67e0df14ba0f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1501,6 +1501,8 @@ struct ieee80211_key_conf { u8 key[0]; }; +#define IEEE80211_MAX_PN_LEN 16 + /** * struct ieee80211_key_seq - key sequence counter * @@ -1513,6 +1515,7 @@ struct ieee80211_key_conf { * reverse order than in packet) * @gcmp: PN data, most significant byte first (big endian, * reverse order than in packet) + * @hw: data for HW-only (e.g. cipher scheme) keys */ struct ieee80211_key_seq { union { @@ -1532,6 +1535,10 @@ struct ieee80211_key_seq { struct { u8 pn[6]; } gcmp; + struct { + u8 seq[IEEE80211_MAX_PN_LEN]; + u8 seq_len; + } hw; }; }; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index dd7014b09396..3469bbdc891c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -431,6 +431,15 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, params.seq = seq; params.seq_len = 6; break; + default: + if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + break; + if (WARN_ON(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) + break; + drv_get_key_seq(sdata->local, key, &kseq); + params.seq = kseq.hw.seq; + params.seq_len = kseq.hw.seq_len; + break; } params.key = key->conf.key; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 3e0b814f4db3..0a5d5c5ad30f 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -485,8 +485,8 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, break; default: if (cs) { - size_t len = (seq_len > MAX_PN_LEN) ? - MAX_PN_LEN : seq_len; + size_t len = (seq_len > IEEE80211_MAX_PN_LEN) ? + IEEE80211_MAX_PN_LEN : seq_len; key->conf.iv_len = cs->hdr_len; key->conf.icv_len = cs->mic_len; diff --git a/net/mac80211/key.h b/net/mac80211/key.h index c5a31835be0e..df430a618764 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -18,7 +18,6 @@ #define NUM_DEFAULT_KEYS 4 #define NUM_DEFAULT_MGMT_KEYS 2 -#define MAX_PN_LEN 16 struct ieee80211_local; struct ieee80211_sub_if_data; @@ -116,7 +115,7 @@ struct ieee80211_key { } gcmp; struct { /* generic cipher scheme */ - u8 rx_pn[IEEE80211_NUM_TIDS + 1][MAX_PN_LEN]; + u8 rx_pn[IEEE80211_NUM_TIDS + 1][IEEE80211_MAX_PN_LEN]; } gen; } u; -- cgit v1.2.3 From 06f207fc541862ba8902ceda0ddeade6ea6bce72 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Wed, 6 May 2015 16:28:31 +0300 Subject: cfg80211: change GO_CONCURRENT to IR_CONCURRENT for STA The GO_CONCURRENT regulatory definition can be extended to station interfaces requesting to IR as part of TDLS off-channel operations. Rename the GO_CONCURRENT flag to IR_CONCURRENT and allow the added use-case. Change internal users of GO_CONCURRENT to use the new definition. Signed-off-by: Arik Nemtsov Reviewed-by: Johannes Berg Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlwifi/iwl-nvm-parse.c | 2 +- include/net/cfg80211.h | 4 +-- include/uapi/linux/nl80211.h | 28 +++++++++++--------- net/wireless/chan.c | 38 ++++++++++++++++------------ net/wireless/nl80211.c | 4 +-- net/wireless/reg.c | 4 +-- 6 files changed, 45 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c index 83903a5025c2..0b5a81d52a3e 100644 --- a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c @@ -248,7 +248,7 @@ static u32 iwl_get_channel_flags(u8 ch_num, int ch_idx, bool is_5ghz, */ if ((nvm_flags & NVM_CHANNEL_GO_CONCURRENT) && (flags & IEEE80211_CHAN_NO_IR)) - flags |= IEEE80211_CHAN_GO_CONCURRENT; + flags |= IEEE80211_CHAN_IR_CONCURRENT; return flags; } diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f8d6813cd5b2..d63ecec73090 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -111,7 +111,7 @@ enum ieee80211_band { * This may be due to the driver or due to regulatory bandwidth * restrictions. * @IEEE80211_CHAN_INDOOR_ONLY: see %NL80211_FREQUENCY_ATTR_INDOOR_ONLY - * @IEEE80211_CHAN_GO_CONCURRENT: see %NL80211_FREQUENCY_ATTR_GO_CONCURRENT + * @IEEE80211_CHAN_IR_CONCURRENT: see %NL80211_FREQUENCY_ATTR_IR_CONCURRENT * @IEEE80211_CHAN_NO_20MHZ: 20 MHz bandwidth is not permitted * on this channel. * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted @@ -129,7 +129,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_80MHZ = 1<<7, IEEE80211_CHAN_NO_160MHZ = 1<<8, IEEE80211_CHAN_INDOOR_ONLY = 1<<9, - IEEE80211_CHAN_GO_CONCURRENT = 1<<10, + IEEE80211_CHAN_IR_CONCURRENT = 1<<10, IEEE80211_CHAN_NO_20MHZ = 1<<11, IEEE80211_CHAN_NO_10MHZ = 1<<12, }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 241220c43e86..c0ab6b0a3919 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2620,16 +2620,17 @@ enum nl80211_band_attr { * an indoor surroundings, i.e., it is connected to AC power (and not * through portable DC inverters) or is under the control of a master * that is acting as an AP and is connected to AC power. - * @NL80211_FREQUENCY_ATTR_GO_CONCURRENT: GO operation is allowed on this + * @NL80211_FREQUENCY_ATTR_IR_CONCURRENT: IR operation is allowed on this * channel if it's connected concurrently to a BSS on the same channel on * the 2 GHz band or to a channel in the same UNII band (on the 5 GHz - * band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO on a - * channel that has the GO_CONCURRENT attribute set can be done when there - * is a clear assessment that the device is operating under the guidance of - * an authorized master, i.e., setting up a GO while the device is also - * connected to an AP with DFS and radar detection on the UNII band (it is - * up to user-space, i.e., wpa_supplicant to perform the required - * verifications) + * band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO or TDLS + * off-channel on a channel that has the IR_CONCURRENT attribute set can be + * done when there is a clear assessment that the device is operating under + * the guidance of an authorized master, i.e., setting up a GO or TDLS + * off-channel while the device is also connected to an AP with DFS and + * radar detection on the UNII band (it is up to user-space, i.e., + * wpa_supplicant to perform the required verifications). Using this + * attribute for IR is disallowed for master interfaces (IBSS, AP). * @NL80211_FREQUENCY_ATTR_NO_20MHZ: 20 MHz operation is not allowed * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_NO_10MHZ: 10 MHz operation is not allowed @@ -2641,7 +2642,7 @@ enum nl80211_band_attr { * See https://apps.fcc.gov/eas/comments/GetPublishedDocument.html?id=327&tn=528122 * for more information on the FCC description of the relaxations allowed * by NL80211_FREQUENCY_ATTR_INDOOR_ONLY and - * NL80211_FREQUENCY_ATTR_GO_CONCURRENT. + * NL80211_FREQUENCY_ATTR_IR_CONCURRENT. */ enum nl80211_frequency_attr { __NL80211_FREQUENCY_ATTR_INVALID, @@ -2659,7 +2660,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_160MHZ, NL80211_FREQUENCY_ATTR_DFS_CAC_TIME, NL80211_FREQUENCY_ATTR_INDOOR_ONLY, - NL80211_FREQUENCY_ATTR_GO_CONCURRENT, + NL80211_FREQUENCY_ATTR_IR_CONCURRENT, NL80211_FREQUENCY_ATTR_NO_20MHZ, NL80211_FREQUENCY_ATTR_NO_10MHZ, @@ -2672,6 +2673,8 @@ enum nl80211_frequency_attr { #define NL80211_FREQUENCY_ATTR_PASSIVE_SCAN NL80211_FREQUENCY_ATTR_NO_IR #define NL80211_FREQUENCY_ATTR_NO_IBSS NL80211_FREQUENCY_ATTR_NO_IR #define NL80211_FREQUENCY_ATTR_NO_IR NL80211_FREQUENCY_ATTR_NO_IR +#define NL80211_FREQUENCY_ATTR_GO_CONCURRENT \ + NL80211_FREQUENCY_ATTR_IR_CONCURRENT /** * enum nl80211_bitrate_attr - bitrate attributes @@ -2830,7 +2833,7 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_AUTO_BW: maximum available bandwidth should be calculated * base on contiguous rules and wider channels will be allowed to cross * multiple contiguous/overlapping frequency ranges. - * @NL80211_RRF_GO_CONCURRENT: See &NL80211_FREQUENCY_ATTR_GO_CONCURRENT + * @NL80211_RRF_IR_CONCURRENT: See &NL80211_FREQUENCY_ATTR_IR_CONCURRENT * @NL80211_RRF_NO_HT40MINUS: channels can't be used in HT40- operation * @NL80211_RRF_NO_HT40PLUS: channels can't be used in HT40+ operation * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed @@ -2847,7 +2850,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_IR = 1<<7, __NL80211_RRF_NO_IBSS = 1<<8, NL80211_RRF_AUTO_BW = 1<<11, - NL80211_RRF_GO_CONCURRENT = 1<<12, + NL80211_RRF_IR_CONCURRENT = 1<<12, NL80211_RRF_NO_HT40MINUS = 1<<13, NL80211_RRF_NO_HT40PLUS = 1<<14, NL80211_RRF_NO_80MHZ = 1<<15, @@ -2859,6 +2862,7 @@ enum nl80211_reg_rule_flags { #define NL80211_RRF_NO_IR NL80211_RRF_NO_IR #define NL80211_RRF_NO_HT40 (NL80211_RRF_NO_HT40MINUS |\ NL80211_RRF_NO_HT40PLUS) +#define NL80211_RRF_GO_CONCURRENT NL80211_RRF_IR_CONCURRENT /* For backport compatibility with older userspace */ #define NL80211_RRF_NO_IR_ALL (NL80211_RRF_NO_IR | __NL80211_RRF_NO_IBSS) diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 5bcffdbf3e88..915b328b9ac5 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -698,19 +698,20 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, EXPORT_SYMBOL(cfg80211_chandef_usable); /* - * For GO only, check if the channel can be used under permissive conditions - * mandated by the some regulatory bodies, i.e., the channel is marked with - * IEEE80211_CHAN_GO_CONCURRENT and there is an additional station interface + * Check if the channel can be used under permissive conditions mandated by + * some regulatory bodies, i.e., the channel is marked with + * IEEE80211_CHAN_IR_CONCURRENT and there is an additional station interface * associated to an AP on the same channel or on the same UNII band * (assuming that the AP is an authorized master). - * In addition allow the GO to operate on a channel on which indoor operation is + * In addition allow operation on a channel on which indoor operation is * allowed, iff we are currently operating in an indoor environment. */ -static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev, +static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, + enum nl80211_iftype iftype, struct ieee80211_channel *chan) { struct wireless_dev *wdev; - struct wiphy *wiphy = wiphy_idx_to_wiphy(rdev->wiphy_idx); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); ASSERT_RTNL(); @@ -718,16 +719,22 @@ static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev, !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR)) return false; + /* only valid for GO and TDLS off-channel (station/p2p-CL) */ + if (iftype != NL80211_IFTYPE_P2P_GO && + iftype != NL80211_IFTYPE_STATION && + iftype != NL80211_IFTYPE_P2P_CLIENT) + return false; + if (regulatory_indoor_allowed() && (chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) return true; - if (!(chan->flags & IEEE80211_CHAN_GO_CONCURRENT)) + if (!(chan->flags & IEEE80211_CHAN_IR_CONCURRENT)) return false; /* * Generally, it is possible to rely on another device/driver to allow - * the GO concurrent relaxation, however, since the device can further + * the IR concurrent relaxation, however, since the device can further * enforce the relaxation (by doing a similar verifications as this), * and thus fail the GO instantiation, consider only the interfaces of * the current registered device. @@ -748,7 +755,8 @@ static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev, * GO_CONCURRENT is disconnected now. But then we must make sure * we're not outdoor on an indoor-only channel. */ - if (wdev->iftype == NL80211_IFTYPE_P2P_GO && + if (iftype == NL80211_IFTYPE_P2P_GO && + wdev->iftype == NL80211_IFTYPE_P2P_GO && wdev->beacon_interval && !(chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) other_chan = wdev->chandef.chan; @@ -793,7 +801,6 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype) { - struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); bool res; u32 prohibited_flags = IEEE80211_CHAN_DISABLED | IEEE80211_CHAN_RADAR; @@ -801,13 +808,12 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype); /* - * Under certain conditions suggested by the some regulatory bodies - * a GO can operate on channels marked with IEEE80211_NO_IR - * so set this flag only if such relaxations are not enabled and - * the conditions are not met. + * Under certain conditions suggested by some regulatory bodies a + * GO/STA can IR on channels marked with IEEE80211_NO_IR. Set this flag + * only if such relaxations are not enabled and the conditions are not + * met. */ - if (iftype != NL80211_IFTYPE_P2P_GO || - !cfg80211_go_permissive_chan(rdev, chandef->chan)) + if (!cfg80211_ir_permissive_chan(wiphy, iftype, chandef->chan)) prohibited_flags |= IEEE80211_CHAN_NO_IR; if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 && diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8a33bbae9ec5..c264effd00a6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -639,8 +639,8 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY)) goto nla_put_failure; - if ((chan->flags & IEEE80211_CHAN_GO_CONCURRENT) && - nla_put_flag(msg, NL80211_FREQUENCY_ATTR_GO_CONCURRENT)) + if ((chan->flags & IEEE80211_CHAN_IR_CONCURRENT) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_IR_CONCURRENT)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ)) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0e347f888fe9..d359e0610198 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -989,8 +989,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_NO_OFDM; if (rd_flags & NL80211_RRF_NO_OUTDOOR) channel_flags |= IEEE80211_CHAN_INDOOR_ONLY; - if (rd_flags & NL80211_RRF_GO_CONCURRENT) - channel_flags |= IEEE80211_CHAN_GO_CONCURRENT; + if (rd_flags & NL80211_RRF_IR_CONCURRENT) + channel_flags |= IEEE80211_CHAN_IR_CONCURRENT; if (rd_flags & NL80211_RRF_NO_HT40MINUS) channel_flags |= IEEE80211_CHAN_NO_HT40MINUS; if (rd_flags & NL80211_RRF_NO_HT40PLUS) -- cgit v1.2.3 From 21c8fe9915276d923f8c1e43434fd6d37a3b9aef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 May 2015 14:26:24 -0700 Subject: tcp: adjust window probe timers to safer values With the advent of small rto timers in datacenter TCP, (ip route ... rto_min x), the following can happen : 1) Qdisc is full, transmit fails. TCP sets a timer based on icsk_rto to retry the transmit, without exponential backoff. With low icsk_rto, and lot of sockets, all cpus are servicing timer interrupts like crazy. Intent of the code was to retry with a timer between 200 (TCP_RTO_MIN) and 500ms (TCP_RESOURCE_PROBE_INTERVAL) 2) Receivers can send zero windows if they don't drain their receive queue. TCP sends zero window probes, based on icsk_rto current value, with exponential backoff. With /proc/sys/net/ipv4/tcp_retries2 being 15 (or even smaller in some cases), sender can abort in less than one or two minutes ! If receiver stops the sender, it obviously doesn't care of very tight rto. Probability of dropping the ACK reopening the window is not worth the risk. Lets change the base timer to be at least 200ms (TCP_RTO_MIN) for these events (but not normal RTO based retransmits) A followup patch adds a new SNMP counter, as it would have helped a lot diagnosing this issue. Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 27 ++++++++++++++++++++++----- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_output.c | 2 +- 3 files changed, 24 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 6d204f3f9df8..7a2248a35b13 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1043,14 +1043,31 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) return tp->is_cwnd_limited; } -static inline void tcp_check_probe_timer(struct sock *sk) +/* Something is really bad, we could not queue an additional packet, + * because qdisc is full or receiver sent a 0 window. + * We do not want to add fuel to the fire, or abort too early, + * so make sure the timer we arm now is at least 200ms in the future, + * regardless of current icsk_rto value (as it could be ~2ms) + */ +static inline unsigned long tcp_probe0_base(const struct sock *sk) { - const struct tcp_sock *tp = tcp_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); + return max_t(unsigned long, inet_csk(sk)->icsk_rto, TCP_RTO_MIN); +} - if (!tp->packets_out && !icsk->icsk_pending) +/* Variant of inet_csk_rto_backoff() used for zero window probes */ +static inline unsigned long tcp_probe0_when(const struct sock *sk, + unsigned long max_when) +{ + u64 when = (u64)tcp_probe0_base(sk) << inet_csk(sk)->icsk_backoff; + + return (unsigned long)min_t(u64, when, max_when); +} + +static inline void tcp_check_probe_timer(struct sock *sk) +{ + if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - icsk->icsk_rto, TCP_RTO_MAX); + tcp_probe0_base(sk), TCP_RTO_MAX); } static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index df2ca615cd0c..cf8b20ff6658 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3233,7 +3233,7 @@ static void tcp_ack_probe(struct sock *sk) * This function is not for random using! */ } else { - unsigned long when = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); + unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX); inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a369e8a70b2c..b76c719e1979 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3490,7 +3490,7 @@ void tcp_send_probe0(struct sock *sk) probe_max = TCP_RESOURCE_PROBE_INTERVAL; } inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - inet_csk_rto_backoff(icsk, probe_max), + tcp_probe0_when(sk, probe_max), TCP_RTO_MAX); } -- cgit v1.2.3 From e520af48c7e5acae5f17f82a79ba7ab7cf156f3b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 May 2015 14:26:25 -0700 Subject: tcp: add TCPWinProbe and TCPKeepAlive SNMP counters Diagnosing problems related to Window Probes has been hard because we lack a counter. TCPWinProbe counts the number of ACK packets a sender has to send at regular intervals to make sure a reverse ACK packet opening back a window had not been lost. TCPKeepAlive counts the number of ACK packets sent to keep TCP flows alive (SO_KEEPALIVE) Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Nandita Dukkipati Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- include/uapi/linux/snmp.h | 2 ++ net/ipv4/proc.c | 2 ++ net/ipv4/tcp_output.c | 13 +++++++------ net/ipv4/tcp_timer.c | 2 +- 5 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 7a2248a35b13..b8ea12880fd9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -527,7 +527,7 @@ int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t); void tcp_send_probe0(struct sock *); void tcp_send_partial(struct sock *); -int tcp_write_wakeup(struct sock *); +int tcp_write_wakeup(struct sock *, int mib); void tcp_send_fin(struct sock *sk); void tcp_send_active_reset(struct sock *sk, gfp_t priority); int tcp_send_synack(struct sock *); diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 6a6fb747c78d..eee8968407f0 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -276,6 +276,8 @@ enum LINUX_MIB_TCPACKSKIPPEDFINWAIT2, /* TCPACKSkippedFinWait2 */ LINUX_MIB_TCPACKSKIPPEDTIMEWAIT, /* TCPACKSkippedTimeWait */ LINUX_MIB_TCPACKSKIPPEDCHALLENGE, /* TCPACKSkippedChallenge */ + LINUX_MIB_TCPWINPROBE, /* TCPWinProbe */ + LINUX_MIB_TCPKEEPALIVE, /* TCPKeepAlive */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index e1f3b911dd1e..da5d483e236a 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -298,6 +298,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2), SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT), SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE), + SNMP_MIB_ITEM("TCPWinProbe", LINUX_MIB_TCPWINPROBE), + SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b76c719e1979..7386d32cd670 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3382,7 +3382,7 @@ EXPORT_SYMBOL_GPL(tcp_send_ack); * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is * out-of-date with SND.UNA-1 to probe window. */ -static int tcp_xmit_probe_skb(struct sock *sk, int urgent) +static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -3400,6 +3400,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) */ tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); skb_mstamp_get(&skb->skb_mstamp); + NET_INC_STATS_BH(sock_net(sk), mib); return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); } @@ -3407,12 +3408,12 @@ void tcp_send_window_probe(struct sock *sk) { if (sk->sk_state == TCP_ESTABLISHED) { tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; - tcp_xmit_probe_skb(sk, 0); + tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE); } } /* Initiate keepalive or window probe from timer. */ -int tcp_write_wakeup(struct sock *sk) +int tcp_write_wakeup(struct sock *sk, int mib) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -3449,8 +3450,8 @@ int tcp_write_wakeup(struct sock *sk) return err; } else { if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) - tcp_xmit_probe_skb(sk, 1); - return tcp_xmit_probe_skb(sk, 0); + tcp_xmit_probe_skb(sk, 1, mib); + return tcp_xmit_probe_skb(sk, 0, mib); } } @@ -3464,7 +3465,7 @@ void tcp_send_probe0(struct sock *sk) unsigned long probe_max; int err; - err = tcp_write_wakeup(sk); + err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); if (tp->packets_out || !tcp_send_head(sk)) { /* Cancel probe timer, if it is not required. */ diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8c65dc147d8b..65bf670e8714 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -616,7 +616,7 @@ static void tcp_keepalive_timer (unsigned long data) tcp_write_err(sk); goto out; } - if (tcp_write_wakeup(sk) <= 0) { + if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) { icsk->icsk_probes_out++; elapsed = keepalive_intvl_when(tp); } else { -- cgit v1.2.3 From 4ae92bc77ac8e620f7c8d59b5882a4cb0d1c4ef1 Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Wed, 6 May 2015 16:12:27 +0200 Subject: net: filter: add a callback to allow classic post-verifier transformations This is in preparation for use by the seccomp code, the rationale is not to duplicate additional code within the seccomp layer, but instead, have it abstracted and hidden within the classic BPF API. As an interim step, this now also makes bpf_prepare_filter() visible (not as exported symbol though), so that seccomp can reuse that code path instead of reimplementing it. Joint work with Daniel Borkmann. Signed-off-by: Nicolas Schichan Signed-off-by: Daniel Borkmann Cc: Alexei Starovoitov Cc: Kees Cook Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 6 ++++++ net/core/filter.c | 18 +++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index fa11b3a367be..91996247cb55 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -384,7 +384,13 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); +typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, + unsigned int flen); + int bpf_check_classic(const struct sock_filter *filter, unsigned int flen); +struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, + bpf_aux_classic_check_t trans); + int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); diff --git a/net/core/filter.c b/net/core/filter.c index bf831a85c315..e670494f1d83 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -988,7 +988,8 @@ out_err: return ERR_PTR(err); } -static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp) +struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, + bpf_aux_classic_check_t trans) { int err; @@ -1001,6 +1002,17 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp) return ERR_PTR(err); } + /* There might be additional checks and transformations + * needed on classic filters, f.e. in case of seccomp. + */ + if (trans) { + err = trans(fp->insns, fp->len); + if (err) { + __bpf_prog_release(fp); + return ERR_PTR(err); + } + } + /* Probe if we can JIT compile the filter and if so, do * the compilation of the filter. */ @@ -1050,7 +1062,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - fp = bpf_prepare_filter(fp); + fp = bpf_prepare_filter(fp, NULL); if (IS_ERR(fp)) return PTR_ERR(fp); @@ -1135,7 +1147,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - prog = bpf_prepare_filter(prog); + prog = bpf_prepare_filter(prog, NULL); if (IS_ERR(prog)) return PTR_ERR(prog); -- cgit v1.2.3 From d9e12f42e58da475379b9080708b94f2095904af Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Wed, 6 May 2015 16:12:28 +0200 Subject: seccomp: simplify seccomp_prepare_filter and reuse bpf_prepare_filter Remove the calls to bpf_check_classic(), bpf_convert_filter() and bpf_migrate_runtime() and let bpf_prepare_filter() take care of that instead. seccomp_check_filter() is passed to bpf_prepare_filter() so that it gets called from there, after bpf_check_classic(). We can now remove exposure of two internal classic BPF functions previously used by seccomp. The export of bpf_check_classic() symbol, previously known as sk_chk_filter(), was there since pre git times, and no in-tree module was using it, therefore remove it. Joint work with Daniel Borkmann. Signed-off-by: Nicolas Schichan Signed-off-by: Daniel Borkmann Cc: Alexei Starovoitov Cc: Kees Cook Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 4 --- kernel/seccomp.c | 68 ++++++++++++++++---------------------------------- net/core/filter.c | 8 +++--- 3 files changed, 26 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 91996247cb55..0dcb44bcfc5f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -363,9 +363,6 @@ int sk_filter(struct sock *sk, struct sk_buff *skb); void bpf_prog_select_runtime(struct bpf_prog *fp); void bpf_prog_free(struct bpf_prog *fp); -int bpf_convert_filter(struct sock_filter *prog, int len, - struct bpf_insn *new_prog, int *new_len); - struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags); @@ -387,7 +384,6 @@ int sk_detach_filter(struct sock *sk); typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, unsigned int flen); -int bpf_check_classic(const struct sock_filter *filter, unsigned int flen); struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, bpf_aux_classic_check_t trans); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 4f44028943e6..93d40f7f3683 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -347,15 +347,14 @@ static inline void seccomp_sync_threads(void) static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) { struct seccomp_filter *filter; - unsigned long fp_size; - struct sock_filter *fp; - int new_len; - long ret; + struct bpf_prog *prog; + unsigned long fsize; if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) return ERR_PTR(-EINVAL); + BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter)); - fp_size = fprog->len * sizeof(struct sock_filter); + fsize = bpf_classic_proglen(fprog); /* * Installing a seccomp filter requires that the task has @@ -368,60 +367,37 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) CAP_SYS_ADMIN) != 0) return ERR_PTR(-EACCES); - fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN); - if (!fp) + prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); + if (!prog) return ERR_PTR(-ENOMEM); /* Copy the instructions from fprog. */ - ret = -EFAULT; - if (copy_from_user(fp, fprog->filter, fp_size)) - goto free_prog; - - /* Check and rewrite the fprog via the skb checker */ - ret = bpf_check_classic(fp, fprog->len); - if (ret) - goto free_prog; + if (copy_from_user(prog->insns, fprog->filter, fsize)) { + __bpf_prog_free(prog); + return ERR_PTR(-EFAULT); + } - /* Check and rewrite the fprog for seccomp use */ - ret = seccomp_check_filter(fp, fprog->len); - if (ret) - goto free_prog; + prog->len = fprog->len; - /* Convert 'sock_filter' insns to 'bpf_insn' insns */ - ret = bpf_convert_filter(fp, fprog->len, NULL, &new_len); - if (ret) - goto free_prog; + /* bpf_prepare_filter() already takes care of freeing + * memory in case something goes wrong. + */ + prog = bpf_prepare_filter(prog, seccomp_check_filter); + if (IS_ERR(prog)) + return ERR_CAST(prog); /* Allocate a new seccomp_filter */ - ret = -ENOMEM; filter = kzalloc(sizeof(struct seccomp_filter), GFP_KERNEL|__GFP_NOWARN); - if (!filter) - goto free_prog; - - filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN); - if (!filter->prog) - goto free_filter; - - ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len); - if (ret) - goto free_filter_prog; + if (!filter) { + bpf_prog_destroy(prog); + return ERR_PTR(-ENOMEM); + } - kfree(fp); + filter->prog = prog; atomic_set(&filter->usage, 1); - filter->prog->len = new_len; - - bpf_prog_select_runtime(filter->prog); return filter; - -free_filter_prog: - __bpf_prog_free(filter->prog); -free_filter: - kfree(filter); -free_prog: - kfree(fp); - return ERR_PTR(ret); } /** diff --git a/net/core/filter.c b/net/core/filter.c index e670494f1d83..f887084740cd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -355,8 +355,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, * for socket filters: ctx == 'struct sk_buff *', for seccomp: * ctx == 'struct seccomp_data *'. */ -int bpf_convert_filter(struct sock_filter *prog, int len, - struct bpf_insn *new_prog, int *new_len) +static int bpf_convert_filter(struct sock_filter *prog, int len, + struct bpf_insn *new_prog, int *new_len) { int new_flen = 0, pass = 0, target, i; struct bpf_insn *new_insn; @@ -751,7 +751,8 @@ static bool chk_code_allowed(u16 code_to_probe) * * Returns 0 if the rule set is legal or -EINVAL if not. */ -int bpf_check_classic(const struct sock_filter *filter, unsigned int flen) +static int bpf_check_classic(const struct sock_filter *filter, + unsigned int flen) { bool anc_found; int pc; @@ -825,7 +826,6 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen) return -EINVAL; } -EXPORT_SYMBOL(bpf_check_classic); static int bpf_prog_store_orig_filter(struct bpf_prog *fp, const struct sock_fprog *fprog) -- cgit v1.2.3 From ac67eb2c5347bd9976308c0e0cf1d9e7ca690342 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 6 May 2015 16:12:30 +0200 Subject: seccomp, filter: add and use bpf_prog_create_from_user from seccomp Seccomp has always been a special candidate when it comes to preparation of its filters in seccomp_prepare_filter(). Due to the extra checks and filter rewrite it partially duplicates code and has BPF internals exposed. This patch adds a generic API inside the BPF code code that seccomp can use and thus keep it's filter preparation code minimal and better maintainable. The other side-effect is that now classic JITs can add seccomp support as well by only providing a BPF_LDX | BPF_W | BPF_ABS translation. Tested with seccomp and BPF test suites. Signed-off-by: Daniel Borkmann Cc: Nicolas Schichan Cc: Alexei Starovoitov Cc: Kees Cook Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 12 +++++------- kernel/seccomp.c | 42 ++++++++++++----------------------------- net/core/filter.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 66 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0dcb44bcfc5f..3c03a6085b82 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -374,19 +374,17 @@ static inline void bpf_prog_unlock_free(struct bpf_prog *fp) __bpf_prog_free(fp); } +typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, + unsigned int flen); + int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); +int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, + bpf_aux_classic_check_t trans); void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); - -typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, - unsigned int flen); - -struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, - bpf_aux_classic_check_t trans); - int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 93d40f7f3683..245df6b32b81 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -346,15 +346,13 @@ static inline void seccomp_sync_threads(void) */ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) { - struct seccomp_filter *filter; - struct bpf_prog *prog; - unsigned long fsize; + struct seccomp_filter *sfilter; + int ret; if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) return ERR_PTR(-EINVAL); BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter)); - fsize = bpf_classic_proglen(fprog); /* * Installing a seccomp filter requires that the task has @@ -367,37 +365,21 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) CAP_SYS_ADMIN) != 0) return ERR_PTR(-EACCES); - prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); - if (!prog) - return ERR_PTR(-ENOMEM); - - /* Copy the instructions from fprog. */ - if (copy_from_user(prog->insns, fprog->filter, fsize)) { - __bpf_prog_free(prog); - return ERR_PTR(-EFAULT); - } - - prog->len = fprog->len; - - /* bpf_prepare_filter() already takes care of freeing - * memory in case something goes wrong. - */ - prog = bpf_prepare_filter(prog, seccomp_check_filter); - if (IS_ERR(prog)) - return ERR_CAST(prog); - /* Allocate a new seccomp_filter */ - filter = kzalloc(sizeof(struct seccomp_filter), - GFP_KERNEL|__GFP_NOWARN); - if (!filter) { - bpf_prog_destroy(prog); + sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN); + if (!sfilter) return ERR_PTR(-ENOMEM); + + ret = bpf_prog_create_from_user(&sfilter->prog, fprog, + seccomp_check_filter); + if (ret < 0) { + kfree(sfilter); + return ERR_PTR(ret); } - filter->prog = prog; - atomic_set(&filter->usage, 1); + atomic_set(&sfilter->usage, 1); - return filter; + return sfilter; } /** diff --git a/net/core/filter.c b/net/core/filter.c index 45c015d6b974..a831f193e2c7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -991,8 +991,8 @@ out_err: return ERR_PTR(err); } -struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, - bpf_aux_classic_check_t trans) +static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, + bpf_aux_classic_check_t trans) { int err; @@ -1074,6 +1074,53 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) } EXPORT_SYMBOL_GPL(bpf_prog_create); +/** + * bpf_prog_create_from_user - create an unattached filter from user buffer + * @pfp: the unattached filter that is created + * @fprog: the filter program + * @trans: post-classic verifier transformation handler + * + * This function effectively does the same as bpf_prog_create(), only + * that it builds up its insns buffer from user space provided buffer. + * It also allows for passing a bpf_aux_classic_check_t handler. + */ +int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, + bpf_aux_classic_check_t trans) +{ + unsigned int fsize = bpf_classic_proglen(fprog); + struct bpf_prog *fp; + + /* Make sure new filter is there and in the right amounts. */ + if (fprog->filter == NULL) + return -EINVAL; + + fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); + if (!fp) + return -ENOMEM; + + if (copy_from_user(fp->insns, fprog->filter, fsize)) { + __bpf_prog_free(fp); + return -EFAULT; + } + + fp->len = fprog->len; + /* Since unattached filters are not copied back to user + * space through sk_get_filter(), we do not need to hold + * a copy here, and can spare us the work. + */ + fp->orig_prog = NULL; + + /* bpf_prepare_filter() already takes care of freeing + * memory in case something goes wrong. + */ + fp = bpf_prepare_filter(fp, trans); + if (IS_ERR(fp)) + return PTR_ERR(fp); + + *pfp = fp; + return 0; +} + void bpf_prog_destroy(struct bpf_prog *fp) { __bpf_prog_release(fp); -- cgit v1.2.3 From 7a0877d4b438886b72be61632eaa774d13262f70 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 7 May 2015 11:02:49 +0200 Subject: netns: rename peernet2id() to peernet2id_alloc() In a following commit, a new function will be introduced to only lookup for a nsid (no allocation if the nsid doesn't exist). To avoid confusion, the existing function is renamed. Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 2 +- include/net/net_namespace.h | 2 +- net/core/net_namespace.c | 4 ++-- net/core/rtnetlink.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 3517ab0aa803..48341ae49012 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -336,7 +336,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (!net_eq(dev_net(vxlan->dev), vxlan->net) && nla_put_s32(skb, NDA_LINK_NETNSID, - peernet2id(dev_net(vxlan->dev), vxlan->net))) + peernet2id_alloc(dev_net(vxlan->dev), vxlan->net))) goto nla_put_failure; if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index f733656404de..6d1e2eae32fb 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -271,7 +271,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #define __net_initconst __initconst #endif -int peernet2id(struct net *net, struct net *peer); +int peernet2id_alloc(struct net *net, struct net *peer); struct net *get_net_ns_by_id(struct net *net, int id); struct pernet_operations { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 37c68bb72db3..9c806ac569f9 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -202,13 +202,13 @@ static int __peernet2id(struct net *net, struct net *peer, bool alloc) /* This function returns the id of a peer netns. If no id is assigned, one will * be allocated and returned. */ -int peernet2id(struct net *net, struct net *peer) +int peernet2id_alloc(struct net *net, struct net *peer) { bool alloc = atomic_read(&peer->count) == 0 ? false : true; return __peernet2id(net, peer, alloc); } -EXPORT_SYMBOL(peernet2id); +EXPORT_SYMBOL(peernet2id_alloc); struct net *get_net_ns_by_id(struct net *net, int id) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 666e0928ba40..83e08323fdcd 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1204,7 +1204,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); if (!net_eq(dev_net(dev), link_net)) { - int id = peernet2id(dev_net(dev), link_net); + int id = peernet2id_alloc(dev_net(dev), link_net); if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) goto nla_put_failure; -- cgit v1.2.3 From 59324cf35aba5336b611074028777838a963d03b Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 7 May 2015 11:02:53 +0200 Subject: netlink: allow to listen "all" netns More accurately, listen all netns that have a nsid assigned into the netns where the netlink socket is opened. For this purpose, a netlink socket option is added: NETLINK_LISTEN_ALL_NSID. When this option is set on a netlink socket, this socket will receive netlink notifications from all netns that have a nsid assigned into the netns where the socket has been opened. The nsid is sent to userland via an anscillary data. With this patch, a daemon needs only one socket to listen many netns. This is useful when the number of netns is high. Because 0 is a valid value for a nsid, the field nsid_is_set indicates if the field nsid is valid or not. skb->cb is initialized to 0 on skb allocation, thus we are sure that we will never send a nsid 0 by error to the userland. Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 ++ include/net/net_namespace.h | 2 ++ include/uapi/linux/netlink.h | 1 + net/core/net_namespace.c | 10 ++++++++- net/netlink/af_netlink.c | 52 +++++++++++++++++++++++++++++++++++++++----- 5 files changed, 61 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 6835c1279df7..9120edb650a0 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -28,6 +28,8 @@ struct netlink_skb_parms { __u32 dst_group; __u32 flags; struct sock *sk; + bool nsid_is_set; + int nsid; }; #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 6d1e2eae32fb..3f850acc844e 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -272,6 +272,8 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #endif int peernet2id_alloc(struct net *net, struct net *peer); +int peernet2id(struct net *net, struct net *peer); +bool peernet_has_id(struct net *net, struct net *peer); struct net *get_net_ns_by_id(struct net *net, int id); struct pernet_operations { diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 1a85940f8ab7..3e34b7d702f8 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -108,6 +108,7 @@ struct nlmsgerr { #define NETLINK_NO_ENOBUFS 5 #define NETLINK_RX_RING 6 #define NETLINK_TX_RING 7 +#define NETLINK_LISTEN_ALL_NSID 8 struct nl_pktinfo { __u32 group; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ae5008b097de..a665bf490c88 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -229,7 +229,7 @@ int peernet2id_alloc(struct net *net, struct net *peer) EXPORT_SYMBOL(peernet2id_alloc); /* This function returns, if assigned, the id of a peer netns. */ -static int peernet2id(struct net *net, struct net *peer) +int peernet2id(struct net *net, struct net *peer) { unsigned long flags; int id; @@ -240,6 +240,14 @@ static int peernet2id(struct net *net, struct net *peer) return id; } +/* This function returns true is the peer netns has an id assigned into the + * current netns. + */ +bool peernet_has_id(struct net *net, struct net *peer) +{ + return peernet2id(net, peer) >= 0; +} + struct net *get_net_ns_by_id(struct net *net, int id) { unsigned long flags; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index bf7f56d7a9aa..a5fff75accf8 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -83,6 +83,7 @@ struct listeners { #define NETLINK_F_RECV_PKTINFO 0x2 #define NETLINK_F_BROADCAST_SEND_ERROR 0x4 #define NETLINK_F_RECV_NO_ENOBUFS 0x8 +#define NETLINK_F_LISTEN_ALL_NSID 0x10 static inline int netlink_is_kernel(struct sock *sk) { @@ -1932,8 +1933,17 @@ static void do_one_broadcast(struct sock *sk, !test_bit(p->group - 1, nlk->groups)) return; - if (!net_eq(sock_net(sk), p->net)) - return; + if (!net_eq(sock_net(sk), p->net)) { + if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID)) + return; + + if (!peernet_has_id(sock_net(sk), p->net)) + return; + + if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns, + CAP_NET_BROADCAST)) + return; + } if (p->failure) { netlink_overrun(sk); @@ -1959,13 +1969,22 @@ static void do_one_broadcast(struct sock *sk, p->failure = 1; if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) p->delivery_failure = 1; - } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { + goto out; + } + if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { kfree_skb(p->skb2); p->skb2 = NULL; - } else if (sk_filter(sk, p->skb2)) { + goto out; + } + if (sk_filter(sk, p->skb2)) { kfree_skb(p->skb2); p->skb2 = NULL; - } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { + goto out; + } + NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); + NETLINK_CB(p->skb2).nsid_is_set = true; + val = netlink_broadcast_deliver(sk, p->skb2); + if (val < 0) { netlink_overrun(sk); if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) p->delivery_failure = 1; @@ -1974,6 +1993,7 @@ static void do_one_broadcast(struct sock *sk, p->delivered = 1; p->skb2 = NULL; } +out: sock_put(sk); } @@ -2202,6 +2222,16 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, break; } #endif /* CONFIG_NETLINK_MMAP */ + case NETLINK_LISTEN_ALL_NSID: + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) + return -EPERM; + + if (val) + nlk->flags |= NETLINK_F_LISTEN_ALL_NSID; + else + nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID; + err = 0; + break; default: err = -ENOPROTOOPT; } @@ -2268,6 +2298,16 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); } +static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) +{ + if (!NETLINK_CB(skb).nsid_is_set) + return; + + put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int), + &NETLINK_CB(skb).nsid); +} + static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; @@ -2421,6 +2461,8 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (nlk->flags & NETLINK_F_RECV_PKTINFO) netlink_cmsg_recv_pktinfo(msg, skb); + if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID) + netlink_cmsg_listen_all_nsid(sk, msg, skb); memset(&scm, 0, sizeof(scm)); scm.creds = *NETLINK_CREDS(skb); -- cgit v1.2.3 From 80ba92fa1a92dea128283f69f55b02242e213650 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 May 2015 15:05:12 -0700 Subject: codel: add ce_threshold attribute For DCTCP or similar ECN based deployments on fabrics with shallow buffers, hosts are responsible for a good part of the buffering. This patch adds an optional ce_threshold to codel & fq_codel qdiscs, so that DCTCP can have feedback from queuing in the host. A DCTCP enabled egress port simply have a queue occupancy threshold above which ECT packets get CE mark. In codel language this translates to a sojourn time, so that one doesn't have to worry about bytes or bandwidth but delays. This makes the host an active participant in the health of the whole network. This also helps experimenting DCTCP in a setup without DCTCP compliant fabric. On following example, ce_threshold is set to 1ms, and we can see from 'ldelay xxx us' that TCP is not trying to go around the 5ms codel target. Queue has more capacity to absorb inelastic bursts (say from UDP traffic), as queues are maintained to an optimal level. lpaa23:~# ./tc -s -d qd sh dev eth1 qdisc mq 1: dev eth1 root Sent 87910654696 bytes 58065331 pkt (dropped 0, overlimits 0 requeues 42961) backlog 3108242b 364p requeues 42961 qdisc codel 8063: dev eth1 parent 1:1 limit 1000p target 5.0ms ce_threshold 1.0ms interval 100.0ms Sent 7363778701 bytes 4863809 pkt (dropped 0, overlimits 0 requeues 5503) rate 2348Mbit 193919pps backlog 255866b 46p requeues 5503 count 0 lastcount 0 ldelay 1.0ms drop_next 0us maxpacket 68130 ecn_mark 0 drop_overlimit 0 ce_mark 72384 qdisc codel 8064: dev eth1 parent 1:2 limit 1000p target 5.0ms ce_threshold 1.0ms interval 100.0ms Sent 7636486190 bytes 5043942 pkt (dropped 0, overlimits 0 requeues 5186) rate 2319Mbit 191538pps backlog 207418b 64p requeues 5186 count 0 lastcount 0 ldelay 694us drop_next 0us maxpacket 68130 ecn_mark 0 drop_overlimit 0 ce_mark 69873 qdisc codel 8065: dev eth1 parent 1:3 limit 1000p target 5.0ms ce_threshold 1.0ms interval 100.0ms Sent 11569360142 bytes 7641602 pkt (dropped 0, overlimits 0 requeues 5554) rate 3041Mbit 251096pps backlog 210446b 59p requeues 5554 count 0 lastcount 0 ldelay 889us drop_next 0us maxpacket 68130 ecn_mark 0 drop_overlimit 0 ce_mark 37780 ... Signed-off-by: Eric Dumazet Cc: Florian Westphal Cc: Daniel Borkmann Cc: Glenn Judd Cc: Nandita Dukkipati Cc: Neal Cardwell Cc: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/codel.h | 12 +++++++++++- include/uapi/linux/pkt_sched.h | 4 ++++ net/sched/sch_codel.c | 15 +++++++++++++-- net/sched/sch_fq_codel.c | 15 ++++++++++++++- 4 files changed, 42 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/codel.h b/include/net/codel.h index aeee28081245..8c0f78f209e8 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -7,7 +7,7 @@ * Copyright (C) 2011-2012 Kathleen Nichols * Copyright (C) 2011-2012 Van Jacobson * Copyright (C) 2012 Michael D. Taht - * Copyright (C) 2012 Eric Dumazet + * Copyright (C) 2012,2015 Eric Dumazet * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -119,11 +119,13 @@ static inline u32 codel_time_to_us(codel_time_t val) /** * struct codel_params - contains codel parameters * @target: target queue size (in time units) + * @ce_threshold: threshold for marking packets with ECN CE * @interval: width of moving time window * @ecn: is Explicit Congestion Notification enabled */ struct codel_params { codel_time_t target; + codel_time_t ce_threshold; codel_time_t interval; bool ecn; }; @@ -159,17 +161,22 @@ struct codel_vars { * @maxpacket: largest packet we've seen so far * @drop_count: temp count of dropped packets in dequeue() * ecn_mark: number of packets we ECN marked instead of dropping + * ce_mark: number of packets CE marked because sojourn time was above ce_threshold */ struct codel_stats { u32 maxpacket; u32 drop_count; u32 ecn_mark; + u32 ce_mark; }; +#define CODEL_DISABLED_THRESHOLD INT_MAX + static void codel_params_init(struct codel_params *params) { params->interval = MS2TIME(100); params->target = MS2TIME(5); + params->ce_threshold = CODEL_DISABLED_THRESHOLD; params->ecn = false; } @@ -350,6 +357,9 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, vars->rec_inv_sqrt); } end: + if (skb && codel_time_after(vars->ldelay, params->ce_threshold) && + INET_ECN_set_ce(skb)) + stats->ce_mark++; return skb; } #endif diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 534b84710745..69d88b309cc7 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -679,6 +679,7 @@ enum { TCA_CODEL_LIMIT, TCA_CODEL_INTERVAL, TCA_CODEL_ECN, + TCA_CODEL_CE_THRESHOLD, __TCA_CODEL_MAX }; @@ -695,6 +696,7 @@ struct tc_codel_xstats { __u32 drop_overlimit; /* number of time max qdisc packet limit was hit */ __u32 ecn_mark; /* number of packets we ECN marked instead of dropped */ __u32 dropping; /* are we in dropping state ? */ + __u32 ce_mark; /* number of CE marked packets because of ce_threshold */ }; /* FQ_CODEL */ @@ -707,6 +709,7 @@ enum { TCA_FQ_CODEL_ECN, TCA_FQ_CODEL_FLOWS, TCA_FQ_CODEL_QUANTUM, + TCA_FQ_CODEL_CE_THRESHOLD, __TCA_FQ_CODEL_MAX }; @@ -730,6 +733,7 @@ struct tc_fq_codel_qd_stats { */ __u32 new_flows_len; /* count of flows in new list */ __u32 old_flows_len; /* count of flows in old list */ + __u32 ce_mark; /* packets above ce_threshold */ }; struct tc_fq_codel_cl_stats { diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index de28f8e968e8..1474b6560fac 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -6,7 +6,7 @@ * * Implemented on linux by : * Copyright (C) 2012 Michael D. Taht - * Copyright (C) 2012 Eric Dumazet + * Copyright (C) 2012,2015 Eric Dumazet * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -109,6 +109,7 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = { [TCA_CODEL_LIMIT] = { .type = NLA_U32 }, [TCA_CODEL_INTERVAL] = { .type = NLA_U32 }, [TCA_CODEL_ECN] = { .type = NLA_U32 }, + [TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 }, }; static int codel_change(struct Qdisc *sch, struct nlattr *opt) @@ -133,6 +134,12 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT; } + if (tb[TCA_CODEL_CE_THRESHOLD]) { + u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]); + + q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT; + } + if (tb[TCA_CODEL_INTERVAL]) { u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]); @@ -201,7 +208,10 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_CODEL_ECN, q->params.ecn)) goto nla_put_failure; - + if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD && + nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD, + codel_time_to_us(q->params.ce_threshold))) + goto nla_put_failure; return nla_nest_end(skb, opts); nla_put_failure: @@ -220,6 +230,7 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) .ldelay = codel_time_to_us(q->vars.ldelay), .dropping = q->vars.dropping, .ecn_mark = q->stats.ecn_mark, + .ce_mark = q->stats.ce_mark, }; if (q->vars.dropping) { diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index a6fc53d69513..778739786b32 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -6,7 +6,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Copyright (C) 2012 Eric Dumazet + * Copyright (C) 2012,2015 Eric Dumazet */ #include @@ -292,6 +292,7 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { [TCA_FQ_CODEL_ECN] = { .type = NLA_U32 }, [TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 }, [TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 }, + [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 }, }; static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) @@ -322,6 +323,12 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT; } + if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) { + u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]); + + q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT; + } + if (tb[TCA_FQ_CODEL_INTERVAL]) { u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]); @@ -441,6 +448,11 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) q->flows_cnt)) goto nla_put_failure; + if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD && + nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD, + codel_time_to_us(q->cparams.ce_threshold))) + goto nla_put_failure; + return nla_nest_end(skb, opts); nla_put_failure: @@ -459,6 +471,7 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.qdisc_stats.drop_overlimit = q->drop_overlimit; st.qdisc_stats.ecn_mark = q->cstats.ecn_mark; st.qdisc_stats.new_flow_count = q->new_flow_count; + st.qdisc_stats.ce_mark = q->cstats.ce_mark; list_for_each(pos, &q->new_flows) st.qdisc_stats.new_flows_len++; -- cgit v1.2.3 From 140e807da12988e2a925fe029336e7bb67a8d4de Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 21:07:08 -0500 Subject: tun: Utilize the normal socket network namespace refcounting. There is no need for tun to do the weird network namespace refcounting. The existing network namespace refcounting in tfile has almost exactly the same lifetime. So rewrite the code to use the struct sock network namespace refcounting and remove the unnecessary hand rolled network namespace refcounting and the unncesary tfile->net. This change allows the tun code to directly call sock_put bypassing sock_release and making SOCK_EXTERNALLY_ALLOCATED unnecessary. Remove the now unncessary tun_release so that if anything tries to use the sock_release code path the kernel will oops, and let us know about the bug. The macvtap code already uses it's internal socket this way. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- drivers/net/tun.c | 24 ++++-------------------- include/linux/net.h | 1 - net/socket.c | 3 --- 3 files changed, 4 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e470ae59d405..3262f3e2b8b2 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -146,7 +146,6 @@ struct tun_file { struct socket socket; struct socket_wq wq; struct tun_struct __rcu *tun; - struct net *net; struct fasync_struct *fasync; /* only used for fasnyc */ unsigned int flags; @@ -493,10 +492,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } - - BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, - &tfile->socket.flags)); - sk_release_kernel(&tfile->sk); + sock_put(&tfile->sk); } } @@ -1492,18 +1488,10 @@ out: return ret; } -static int tun_release(struct socket *sock) -{ - if (sock->sk) - sock_put(sock->sk); - return 0; -} - /* Ops structure to mimic raw sockets with tun */ static const struct proto_ops tun_socket_ops = { .sendmsg = tun_sendmsg, .recvmsg = tun_recvmsg, - .release = tun_release, }; static struct proto tun_proto = { @@ -1865,7 +1853,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, if (cmd == TUNSETIFF && !tun) { ifr.ifr_name[IFNAMSIZ-1] = '\0'; - ret = tun_set_iff(tfile->net, file, &ifr); + ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr); if (ret) goto unlock; @@ -2154,16 +2142,16 @@ out: static int tun_chr_open(struct inode *inode, struct file * file) { + struct net *net = current->nsproxy->net_ns; struct tun_file *tfile; DBG1(KERN_INFO, "tunX: tun_chr_open\n"); - tfile = (struct tun_file *)sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, + tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tun_proto); if (!tfile) return -ENOMEM; RCU_INIT_POINTER(tfile->tun, NULL); - tfile->net = get_net(current->nsproxy->net_ns); tfile->flags = 0; tfile->ifindex = 0; @@ -2174,13 +2162,11 @@ static int tun_chr_open(struct inode *inode, struct file * file) tfile->socket.ops = &tun_socket_ops; sock_init_data(&tfile->socket, &tfile->sk); - sk_change_net(&tfile->sk, tfile->net); tfile->sk.sk_write_space = tun_sock_write_space; tfile->sk.sk_sndbuf = INT_MAX; file->private_data = tfile; - set_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags); INIT_LIST_HEAD(&tfile->next); sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); @@ -2191,10 +2177,8 @@ static int tun_chr_open(struct inode *inode, struct file * file) static int tun_chr_close(struct inode *inode, struct file *file) { struct tun_file *tfile = file->private_data; - struct net *net = tfile->net; tun_detach(tfile, true); - put_net(net); return 0; } diff --git a/include/linux/net.h b/include/linux/net.h index 738ea48be889..8a5e81d2bdf7 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -38,7 +38,6 @@ struct net; #define SOCK_NOSPACE 2 #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 -#define SOCK_EXTERNALLY_ALLOCATED 5 #ifndef ARCH_HAS_SOCKET_TYPES /** diff --git a/net/socket.c b/net/socket.c index 884e32997698..b5f1f43ed8f4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -576,9 +576,6 @@ void sock_release(struct socket *sock) if (rcu_dereference_protected(sock->wq, 1)->fasync_list) pr_err("%s: fasync list not empty!\n", __func__); - if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags)) - return; - this_cpu_sub(sockets_in_use, 1); if (!sock->file) { iput(SOCK_INODE(sock)); -- cgit v1.2.3 From eeb1bd5c40edb0e2fd925c8535e2fdebdbc5cef2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 21:08:05 -0500 Subject: net: Add a struct net parameter to sock_create_kern This is long overdue, and is part of cleaning up how we allocate kernel sockets that don't reference count struct net. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- drivers/block/drbd/drbd_receiver.c | 4 ++-- fs/afs/rxrpc.c | 2 +- fs/dlm/lowcomms.c | 16 ++++++++-------- include/linux/net.h | 2 +- net/bluetooth/rfcomm/core.c | 2 +- net/ceph/messenger.c | 4 ++-- net/ipv4/af_inet.c | 2 +- net/ipv4/udp_tunnel.c | 2 +- net/ipv6/ip6_udp_tunnel.c | 2 +- net/l2tp/l2tp_core.c | 4 ++-- net/netfilter/ipvs/ip_vs_sync.c | 4 ++-- net/rxrpc/ar-local.c | 4 ++-- net/socket.c | 4 ++-- 13 files changed, 26 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index cee20354ac37..c097909c589c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -598,7 +598,7 @@ static struct socket *drbd_try_connect(struct drbd_connection *connection) memcpy(&peer_in6, &connection->peer_addr, peer_addr_len); what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family, + err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (err < 0) { sock = NULL; @@ -693,7 +693,7 @@ static int prepare_listen_socket(struct drbd_connection *connection, struct acce memcpy(&my_addr, &connection->my_addr, my_addr_len); what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family, + err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family, SOCK_STREAM, IPPROTO_TCP, &s_listen); if (err) { s_listen = NULL; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 3a57a1b0fb51..b50642870a43 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -85,7 +85,7 @@ int afs_open_socket(void) return -ENOMEM; } - ret = sock_create_kern(AF_RXRPC, SOCK_DGRAM, PF_INET, &socket); + ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET, &socket); if (ret < 0) { destroy_workqueue(afs_async_calls); _leave(" = %d [socket]", ret); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index d08e079ea5d3..754fd6c0b747 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -921,8 +921,8 @@ static int tcp_accept_from_sock(struct connection *con) mutex_unlock(&connections_lock); memset(&peeraddr, 0, sizeof(peeraddr)); - result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, - IPPROTO_TCP, &newsock); + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + SOCK_STREAM, IPPROTO_TCP, &newsock); if (result < 0) return -ENOMEM; @@ -1173,8 +1173,8 @@ static void tcp_connect_to_sock(struct connection *con) goto out; /* Create a socket to communicate with */ - result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, - IPPROTO_TCP, &sock); + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + SOCK_STREAM, IPPROTO_TCP, &sock); if (result < 0) goto out_err; @@ -1258,8 +1258,8 @@ static struct socket *tcp_create_listen_sock(struct connection *con, addr_len = sizeof(struct sockaddr_in6); /* Create a socket to communicate with */ - result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, - IPPROTO_TCP, &sock); + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + SOCK_STREAM, IPPROTO_TCP, &sock); if (result < 0) { log_print("Can't create listening comms socket"); goto create_out; @@ -1365,8 +1365,8 @@ static int sctp_listen_for_all(void) log_print("Using SCTP for communications"); - result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_SEQPACKET, - IPPROTO_SCTP, &sock); + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + SOCK_SEQPACKET, IPPROTO_SCTP, &sock); if (result < 0) { log_print("Can't create comms socket, check SCTP is loaded"); goto out; diff --git a/include/linux/net.h b/include/linux/net.h index 8a5e81d2bdf7..04aa06852771 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -207,7 +207,7 @@ void sock_unregister(int family); int __sock_create(struct net *net, int family, int type, int proto, struct socket **res, int kern); int sock_create(int family, int type, int proto, struct socket **res); -int sock_create_kern(int family, int type, int proto, struct socket **res); +int sock_create_kern(struct net *net, int family, int type, int proto, struct socket **res); int sock_create_lite(int family, int type, int proto, struct socket **res); void sock_release(struct socket *sock); int sock_sendmsg(struct socket *sock, struct msghdr *msg); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 4fea24275b17..29709fbfd1f5 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -200,7 +200,7 @@ static int rfcomm_l2sock_create(struct socket **sock) BT_DBG(""); - err = sock_create_kern(PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP, sock); + err = sock_create_kern(&init_net, PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP, sock); if (!err) { struct sock *sk = (*sock)->sk; sk->sk_data_ready = rfcomm_l2data_ready; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 967080a9f043..073262fea6dd 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -480,8 +480,8 @@ static int ceph_tcp_connect(struct ceph_connection *con) int ret; BUG_ON(con->sock); - ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, - IPPROTO_TCP, &sock); + ret = sock_create_kern(&init_net, con->peer_addr.in_addr.ss_family, + SOCK_STREAM, IPPROTO_TCP, &sock); if (ret) return ret; sock->sk->sk_allocation = GFP_NOFS; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8b47a4d79d04..09f4d024dfe5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1430,7 +1430,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, struct net *net) { struct socket *sock; - int rc = sock_create_kern(family, type, protocol, &sock); + int rc = sock_create_kern(&init_net, family, type, protocol, &sock); if (rc == 0) { *sk = sock->sk; diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 6bb98cc193c9..4e2837476967 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -15,7 +15,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, struct socket *sock = NULL; struct sockaddr_in udp_addr; - err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock); + err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &sock); if (err < 0) goto error; diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index bba8903e871f..478576b61214 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -19,7 +19,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, int err; struct socket *sock = NULL; - err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); + err = sock_create_kern(&init_net, AF_INET6, SOCK_DGRAM, 0, &sock); if (err < 0) goto error; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index a29a504492af..ae513a2fe7f3 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1399,7 +1399,7 @@ static int l2tp_tunnel_sock_create(struct net *net, if (cfg->local_ip6 && cfg->peer_ip6) { struct sockaddr_l2tpip6 ip6_addr = {0}; - err = sock_create_kern(AF_INET6, SOCK_DGRAM, + err = sock_create_kern(&init_net, AF_INET6, SOCK_DGRAM, IPPROTO_L2TP, &sock); if (err < 0) goto out; @@ -1429,7 +1429,7 @@ static int l2tp_tunnel_sock_create(struct net *net, { struct sockaddr_l2tpip ip_addr = {0}; - err = sock_create_kern(AF_INET, SOCK_DGRAM, + err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, IPPROTO_L2TP, &sock); if (err < 0) goto out; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 19b9cce6c210..2e9a5b5d1239 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1458,7 +1458,7 @@ static struct socket *make_send_sock(struct net *net, int id) int result; /* First create a socket move it to right name space later */ - result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + result = sock_create_kern(&init_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); @@ -1518,7 +1518,7 @@ static struct socket *make_receive_sock(struct net *net, int id) int result; /* First create a socket */ - result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + result = sock_create_kern(&init_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c index ca904ed5400a..78483b4602bf 100644 --- a/net/rxrpc/ar-local.c +++ b/net/rxrpc/ar-local.c @@ -73,8 +73,8 @@ static int rxrpc_create_local(struct rxrpc_local *local) _enter("%p{%d}", local, local->srx.transport_type); /* create a socket to represent the local endpoint */ - ret = sock_create_kern(PF_INET, local->srx.transport_type, IPPROTO_UDP, - &local->socket); + ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type, + IPPROTO_UDP, &local->socket); if (ret < 0) { _leave(" = %d [socket]", ret); return ret; diff --git a/net/socket.c b/net/socket.c index b5f1f43ed8f4..9963a0b53a64 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1210,9 +1210,9 @@ int sock_create(int family, int type, int protocol, struct socket **res) } EXPORT_SYMBOL(sock_create); -int sock_create_kern(int family, int type, int protocol, struct socket **res) +int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res) { - return __sock_create(&init_net, family, type, protocol, res, 1); + return __sock_create(net, family, type, protocol, res, 1); } EXPORT_SYMBOL(sock_create_kern); -- cgit v1.2.3 From 11aa9c28b4209242a9de0a661a7b3405adb568a0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 21:09:13 -0500 Subject: net: Pass kern from net_proto_family.create to sk_alloc In preparation for changing how struct net is refcounted on kernel sockets pass the knowledge that we are creating a kernel socket from sock_create_kern through to sk_alloc. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- crypto/af_alg.c | 4 ++-- drivers/isdn/mISDN/socket.c | 12 ++++++------ drivers/net/macvtap.c | 2 +- drivers/net/ppp/pppoe.c | 4 ++-- drivers/net/ppp/pppox.c | 2 +- drivers/net/ppp/pptp.c | 4 ++-- drivers/net/tun.c | 2 +- include/linux/if_pppox.h | 2 +- include/net/af_vsock.h | 2 +- include/net/llc_conn.h | 2 +- include/net/sock.h | 2 +- net/appletalk/ddp.c | 2 +- net/atm/common.c | 4 ++-- net/atm/common.h | 2 +- net/atm/pvc.c | 2 +- net/atm/svc.c | 2 +- net/ax25/af_ax25.c | 4 ++-- net/bluetooth/bnep/sock.c | 2 +- net/bluetooth/cmtp/sock.c | 2 +- net/bluetooth/hci_sock.c | 2 +- net/bluetooth/hidp/sock.c | 2 +- net/bluetooth/l2cap_sock.c | 10 +++++----- net/bluetooth/rfcomm/sock.c | 8 ++++---- net/bluetooth/sco.c | 8 ++++---- net/caif/caif_socket.c | 2 +- net/can/af_can.c | 2 +- net/core/sock.c | 3 ++- net/decnet/af_decnet.c | 8 ++++---- net/ieee802154/socket.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipx/af_ipx.c | 2 +- net/irda/af_irda.c | 2 +- net/iucv/af_iucv.c | 10 +++++----- net/key/af_key.c | 2 +- net/l2tp/l2tp_ppp.c | 4 ++-- net/llc/af_llc.c | 2 +- net/llc/llc_conn.c | 6 +++--- net/netlink/af_netlink.c | 11 +++++------ net/netrom/af_netrom.c | 4 ++-- net/nfc/af_nfc.c | 2 +- net/nfc/llcp.h | 2 +- net/nfc/llcp_core.c | 2 +- net/nfc/llcp_sock.c | 8 ++++---- net/nfc/nfc.h | 2 +- net/nfc/rawsock.c | 4 ++-- net/packet/af_packet.c | 2 +- net/phonet/af_phonet.c | 2 +- net/phonet/pep.c | 2 +- net/rds/af_rds.c | 2 +- net/rose/af_rose.c | 4 ++-- net/rxrpc/af_rxrpc.c | 2 +- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 2 +- net/tipc/socket.c | 2 +- net/unix/af_unix.c | 8 ++++---- net/vmw_vsock/af_vsock.c | 7 ++++--- net/vmw_vsock/vmci_transport.c | 2 +- net/x25/af_x25.c | 8 ++++---- 59 files changed, 109 insertions(+), 108 deletions(-) (limited to 'include') diff --git a/crypto/af_alg.c b/crypto/af_alg.c index f22cc56fd1b3..5ad0d5354535 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -244,7 +244,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) if (!type) goto unlock; - sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto); + sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, 0); err = -ENOMEM; if (!sk2) goto unlock; @@ -324,7 +324,7 @@ static int alg_create(struct net *net, struct socket *sock, int protocol, return -EPROTONOSUPPORT; err = -ENOMEM; - sk = sk_alloc(net, PF_ALG, GFP_KERNEL, &alg_proto); + sk = sk_alloc(net, PF_ALG, GFP_KERNEL, &alg_proto, kern); if (!sk) goto out; diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 8dc7290089bb..0d29b5a6356d 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -601,14 +601,14 @@ static const struct proto_ops data_sock_ops = { }; static int -data_sock_create(struct net *net, struct socket *sock, int protocol) +data_sock_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; if (sock->type != SOCK_DGRAM) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto); + sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto, kern); if (!sk) return -ENOMEM; @@ -756,14 +756,14 @@ static const struct proto_ops base_sock_ops = { static int -base_sock_create(struct net *net, struct socket *sock, int protocol) +base_sock_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto); + sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto, kern); if (!sk) return -ENOMEM; @@ -785,7 +785,7 @@ mISDN_sock_create(struct net *net, struct socket *sock, int proto, int kern) switch (proto) { case ISDN_P_BASE: - err = base_sock_create(net, sock, proto); + err = base_sock_create(net, sock, proto, kern); break; case ISDN_P_TE_S0: case ISDN_P_NT_S0: @@ -799,7 +799,7 @@ mISDN_sock_create(struct net *net, struct socket *sock, int proto, int kern) case ISDN_P_B_L2DTMF: case ISDN_P_B_L2DSP: case ISDN_P_B_L2DSPHDLC: - err = data_sock_create(net, sock, proto); + err = data_sock_create(net, sock, proto, kern); break; default: return err; diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 8c350c5d54ad..0398631a3c24 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -476,7 +476,7 @@ static int macvtap_open(struct inode *inode, struct file *file) err = -ENOMEM; q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, - &macvtap_proto); + &macvtap_proto, 0); if (!q) goto out; diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index aa1dd926623a..f86c5ab334aa 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -546,11 +546,11 @@ static struct proto pppoe_sk_proto __read_mostly = { * Initialize a new struct sock. * **********************************************************************/ -static int pppoe_create(struct net *net, struct socket *sock) +static int pppoe_create(struct net *net, struct socket *sock, int kern) { struct sock *sk; - sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, kern); if (!sk) return -ENOMEM; diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c index 2940e9fe351b..0e1b30622477 100644 --- a/drivers/net/ppp/pppox.c +++ b/drivers/net/ppp/pppox.c @@ -118,7 +118,7 @@ static int pppox_create(struct net *net, struct socket *sock, int protocol, !try_module_get(pppox_protos[protocol]->owner)) goto out; - rc = pppox_protos[protocol]->create(net, sock); + rc = pppox_protos[protocol]->create(net, sock, kern); module_put(pppox_protos[protocol]->owner); out: diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index e3bfbd4d0136..14839bc0aaf5 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -561,14 +561,14 @@ static void pptp_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); } -static int pptp_create(struct net *net, struct socket *sock) +static int pptp_create(struct net *net, struct socket *sock, int kern) { int error = -ENOMEM; struct sock *sk; struct pppox_sock *po; struct pptp_opt *opt; - sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pptp_sk_proto); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pptp_sk_proto, kern); if (!sk) goto out; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3262f3e2b8b2..1a1c4f7b3ec5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2148,7 +2148,7 @@ static int tun_chr_open(struct inode *inode, struct file * file) DBG1(KERN_INFO, "tunX: tun_chr_open\n"); tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, - &tun_proto); + &tun_proto, 0); if (!tfile) return -ENOMEM; RCU_INIT_POINTER(tfile->tun, NULL); diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 66a7d7600f43..b49cf923becc 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -74,7 +74,7 @@ static inline struct sock *sk_pppox(struct pppox_sock *po) struct module; struct pppox_proto { - int (*create)(struct net *net, struct socket *sock); + int (*create)(struct net *net, struct socket *sock, int kern); int (*ioctl)(struct socket *sock, unsigned int cmd, unsigned long arg); struct module *owner; diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 172632dd9930..db639a4c5ab8 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -74,7 +74,7 @@ void vsock_pending_work(struct work_struct *work); struct sock *__vsock_create(struct net *net, struct socket *sock, struct sock *parent, - gfp_t priority, unsigned short type); + gfp_t priority, unsigned short type, int kern); /**** TRANSPORT ****/ diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index 0134681acc4c..fe994d2e5286 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -96,7 +96,7 @@ static __inline__ char llc_backlog_type(struct sk_buff *skb) } struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot); + struct proto *prot, int kern); void llc_sk_free(struct sock *sk); void llc_sk_reset(struct sock *sk); diff --git a/include/net/sock.h b/include/net/sock.h index 3a4898ec8c67..d8dcf91732b0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1514,7 +1514,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) struct sock *sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot); + struct proto *prot, int kern); void sk_free(struct sock *sk); void sk_release_kernel(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 3b7ad43c7dad..d5871ac493eb 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1030,7 +1030,7 @@ static int atalk_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) goto out; rc = -ENOMEM; - sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto); + sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, kern); if (!sk) goto out; rc = 0; diff --git a/net/atm/common.c b/net/atm/common.c index ed0466637e13..49a872db7e42 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -141,7 +141,7 @@ static struct proto vcc_proto = { .release_cb = vcc_release_cb, }; -int vcc_create(struct net *net, struct socket *sock, int protocol, int family) +int vcc_create(struct net *net, struct socket *sock, int protocol, int family, int kern) { struct sock *sk; struct atm_vcc *vcc; @@ -149,7 +149,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family) sock->sk = NULL; if (sock->type == SOCK_STREAM) return -EINVAL; - sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto); + sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, kern); if (!sk) return -ENOMEM; sock_init_data(sock, sk); diff --git a/net/atm/common.h b/net/atm/common.h index 4d6f5b2068ac..959436b87182 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -10,7 +10,7 @@ #include /* for poll_table */ -int vcc_create(struct net *net, struct socket *sock, int protocol, int family); +int vcc_create(struct net *net, struct socket *sock, int protocol, int family, int kern); int vcc_release(struct socket *sock); int vcc_connect(struct socket *sock, int itf, short vpi, int vci); int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, diff --git a/net/atm/pvc.c b/net/atm/pvc.c index ae0324021407..040207ec399f 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -136,7 +136,7 @@ static int pvc_create(struct net *net, struct socket *sock, int protocol, return -EAFNOSUPPORT; sock->ops = &pvc_proto_ops; - return vcc_create(net, sock, protocol, PF_ATMPVC); + return vcc_create(net, sock, protocol, PF_ATMPVC, kern); } static const struct net_proto_family pvc_family_ops = { diff --git a/net/atm/svc.c b/net/atm/svc.c index 1ba23f5018e7..3fa0a9ee98d1 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -660,7 +660,7 @@ static int svc_create(struct net *net, struct socket *sock, int protocol, return -EAFNOSUPPORT; sock->ops = &svc_proto_ops; - error = vcc_create(net, sock, protocol, AF_ATMSVC); + error = vcc_create(net, sock, protocol, AF_ATMSVC, kern); if (error) return error; ATM_SD(sock)->local.sas_family = AF_ATMSVC; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 330c1f4a5a0b..4273533d22b1 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -855,7 +855,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; } - sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto); + sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, kern); if (sk == NULL) return -ENOMEM; @@ -881,7 +881,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) struct sock *sk; ax25_cb *ax25, *oax25; - sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot); + sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot, 0); if (sk == NULL) return NULL; diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index bde2bdd9e929..b5116fa9835e 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -202,7 +202,7 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index d82787d417bd..ce86a7bae844 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -205,7 +205,7 @@ static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 56f9edbf3d05..5b14dcafcd08 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1377,7 +1377,7 @@ static int hci_sock_create(struct net *net, struct socket *sock, int protocol, sock->ops = &hci_sock_ops; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index cb3fdde1968a..008ba439bd62 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -235,7 +235,7 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a7278f05eafb..244287706f91 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -43,7 +43,7 @@ static struct bt_sock_list l2cap_sk_list = { static const struct proto_ops l2cap_sock_ops; static void l2cap_sock_init(struct sock *sk, struct sock *parent); static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, - int proto, gfp_t prio); + int proto, gfp_t prio, int kern); bool l2cap_is_socket(struct socket *sock) { @@ -1193,7 +1193,7 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) } sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, - GFP_ATOMIC); + GFP_ATOMIC, 0); if (!sk) { release_sock(parent); return NULL; @@ -1523,12 +1523,12 @@ static struct proto l2cap_proto = { }; static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, - int proto, gfp_t prio) + int proto, gfp_t prio, int kern) { struct sock *sk; struct l2cap_chan *chan; - sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, kern); if (!sk) return NULL; @@ -1574,7 +1574,7 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol, sock->ops = &l2cap_sock_ops; - sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); + sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 825e8fb5114b..b2338e971b33 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -269,12 +269,12 @@ static struct proto rfcomm_proto = { .obj_size = sizeof(struct rfcomm_pinfo) }; -static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) +static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern) { struct rfcomm_dlc *d; struct sock *sk; - sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, kern); if (!sk) return NULL; @@ -324,7 +324,7 @@ static int rfcomm_sock_create(struct net *net, struct socket *sock, sock->ops = &rfcomm_sock_ops; - sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC); + sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; @@ -969,7 +969,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * goto done; } - sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC); + sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC, 0); if (!sk) goto done; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 4322c833e748..6b6e59dc54cf 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -460,11 +460,11 @@ static struct proto sco_proto = { .obj_size = sizeof(struct sco_pinfo) }; -static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) +static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern) { struct sock *sk; - sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, kern); if (!sk) return NULL; @@ -501,7 +501,7 @@ static int sco_sock_create(struct net *net, struct socket *sock, int protocol, sock->ops = &sco_sock_ops; - sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC); + sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; @@ -1026,7 +1026,7 @@ static void sco_conn_ready(struct sco_conn *conn) bh_lock_sock(parent); sk = sco_sock_alloc(sock_net(parent), NULL, - BTPROTO_SCO, GFP_ATOMIC); + BTPROTO_SCO, GFP_ATOMIC, 0); if (!sk) { bh_unlock_sock(parent); sco_conn_unlock(conn); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 4ec0c803aef1..78a04ebb113c 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1047,7 +1047,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol, * is really not used at all in the net/core or socket.c but the * initialization makes sure that sock->state is not uninitialized. */ - sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot); + sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot, kern); if (!sk) return -ENOMEM; diff --git a/net/can/af_can.c b/net/can/af_can.c index 32d710eaf1fc..d4d404bdfc9a 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -179,7 +179,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol, sock->ops = cp->ops; - sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot); + sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot, kern); if (!sk) { err = -ENOMEM; goto errout; diff --git a/net/core/sock.c b/net/core/sock.c index e891bcf325ca..cbc3789b830c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1396,9 +1396,10 @@ EXPORT_SYMBOL_GPL(sock_update_netprioidx); * @family: protocol family * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) * @prot: struct proto associated with this new sock instance + * @kern: is this to be a kernel socket? */ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot) + struct proto *prot, int kern) { struct sock *sk; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 754484b3cd0e..675cf94e04f8 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -468,10 +468,10 @@ static struct proto dn_proto = { .obj_size = sizeof(struct dn_sock), }; -static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp) +static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp, int kern) { struct dn_scp *scp; - struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto); + struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, kern); if (!sk) goto out; @@ -693,7 +693,7 @@ static int dn_create(struct net *net, struct socket *sock, int protocol, } - if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL)) == NULL) + if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL, kern)) == NULL) return -ENOBUFS; sk->sk_protocol = protocol; @@ -1096,7 +1096,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags) cb = DN_SKB_CB(skb); sk->sk_ack_backlog--; - newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation); + newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, 0); if (newsk == NULL) { release_sock(sk); kfree_skb(skb); diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index b60c65f70346..7aaaf967df58 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -1014,7 +1014,7 @@ static int ieee802154_create(struct net *net, struct socket *sock, } rc = -ENOMEM; - sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto); + sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto, kern); if (!sk) goto out; rc = 0; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 09f4d024dfe5..e2dd9cb99d61 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -317,7 +317,7 @@ lookup_protocol: WARN_ON(!answer_prot->slab); err = -ENOBUFS; - sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); + sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern); if (!sk) goto out; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4632afa57e05..f3866c0b6cfe 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -167,7 +167,7 @@ lookup_protocol: WARN_ON(!answer_prot->slab); err = -ENOBUFS; - sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot); + sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern); if (!sk) goto out; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 4ea5d7497b5f..48d0dc89b58d 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1347,7 +1347,7 @@ static int ipx_create(struct net *net, struct socket *sock, int protocol, goto out; rc = -ENOMEM; - sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto); + sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, kern); if (!sk) goto out; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index ee0ea25c8e7a..fae6822cc367 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1100,7 +1100,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol, } /* Allocate networking socket */ - sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto); + sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto, kern); if (sk == NULL) return -ENOMEM; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 6daa52a18d40..918151c11348 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -535,12 +535,12 @@ static void iucv_sock_init(struct sock *sk, struct sock *parent) sk->sk_type = parent->sk_type; } -static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio, int kern) { struct sock *sk; struct iucv_sock *iucv; - sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto); + sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto, kern); if (!sk) return NULL; iucv = iucv_sk(sk); @@ -602,7 +602,7 @@ static int iucv_sock_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; } - sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL); + sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern); if (!sk) return -ENOMEM; @@ -1723,7 +1723,7 @@ static int iucv_callback_connreq(struct iucv_path *path, } /* Create the new socket */ - nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); + nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0); if (!nsk) { err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); @@ -1933,7 +1933,7 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb) goto out; } - nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); + nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0); bh_lock_sock(sk); if ((sk->sk_state != IUCV_LISTEN) || sk_acceptq_is_full(sk) || diff --git a/net/key/af_key.c b/net/key/af_key.c index f0d52d721b3a..9e834ec475a9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -149,7 +149,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol, return -EPROTONOSUPPORT; err = -ENOMEM; - sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto); + sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, kern); if (sk == NULL) goto out; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index e9b0dec56b8e..f56c9f69e9f2 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -542,12 +542,12 @@ static int pppol2tp_backlog_recv(struct sock *sk, struct sk_buff *skb) /* socket() handler. Initialize a new struct sock. */ -static int pppol2tp_create(struct net *net, struct socket *sock) +static int pppol2tp_create(struct net *net, struct socket *sock, int kern) { int error = -ENOMEM; struct sock *sk; - sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto, kern); if (!sk) goto out; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 17a8dff06090..8fd9febaa5ba 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -168,7 +168,7 @@ static int llc_ui_create(struct net *net, struct socket *sock, int protocol, if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) { rc = -ENOMEM; - sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto); + sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto, kern); if (sk) { rc = 0; llc_ui_sk_init(sock, sk); diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 81a61fce3afb..3e821daf9dd4 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -768,7 +768,7 @@ static struct sock *llc_create_incoming_sock(struct sock *sk, struct llc_addr *daddr) { struct sock *newsk = llc_sk_alloc(sock_net(sk), sk->sk_family, GFP_ATOMIC, - sk->sk_prot); + sk->sk_prot, 0); struct llc_sock *newllc, *llc = llc_sk(sk); if (!newsk) @@ -931,9 +931,9 @@ static void llc_sk_init(struct sock *sk) * Allocates a LLC sock and initializes it. Returns the new LLC sock * or %NULL if there's no memory available for one */ -struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot) +struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern) { - struct sock *sk = sk_alloc(net, family, priority, prot); + struct sock *sk = sk_alloc(net, family, priority, prot, kern); if (!sk) goto out; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a5fff75accf8..be6665ab7f40 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1119,14 +1119,15 @@ static struct proto netlink_proto = { }; static int __netlink_create(struct net *net, struct socket *sock, - struct mutex *cb_mutex, int protocol) + struct mutex *cb_mutex, int protocol, + int kern) { struct sock *sk; struct netlink_sock *nlk; sock->ops = &netlink_ops; - sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto); + sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern); if (!sk) return -ENOMEM; @@ -1188,7 +1189,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, if (err < 0) goto out; - err = __netlink_create(net, sock, cb_mutex, protocol); + err = __netlink_create(net, sock, cb_mutex, protocol, kern); if (err < 0) goto out_module; @@ -2515,14 +2516,12 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - /* * We have to just have a reference on the net from sk, but don't * get_net it. Besides, we cannot get and then put the net here. * So we create one inside init_net and the move it to net. */ - - if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0) + if (__netlink_create(&init_net, sock, cb_mutex, unit, 0) < 0) goto out_sock_release_nosk; sk = sock->sk; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index b987fd56c3c5..ed212ffc1d9d 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -433,7 +433,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto); + sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, kern); if (sk == NULL) return -ENOMEM; @@ -476,7 +476,7 @@ static struct sock *nr_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot); + sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot, 0); if (sk == NULL) return NULL; diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c index 2277276f52bc..54e40fa47822 100644 --- a/net/nfc/af_nfc.c +++ b/net/nfc/af_nfc.c @@ -40,7 +40,7 @@ static int nfc_sock_create(struct net *net, struct socket *sock, int proto, read_lock(&proto_tab_lock); if (proto_tab[proto] && try_module_get(proto_tab[proto]->owner)) { - rc = proto_tab[proto]->create(net, sock, proto_tab[proto]); + rc = proto_tab[proto]->create(net, sock, proto_tab[proto], kern); module_put(proto_tab[proto]->owner); } read_unlock(&proto_tab_lock); diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index de1789e3cc82..1f68724d44d3 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -225,7 +225,7 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, struct sk_buff *skb, u8 direction); /* Sock API */ -struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp); +struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp, int kern); void nfc_llcp_sock_free(struct nfc_llcp_sock *sock); void nfc_llcp_accept_unlink(struct sock *sk); void nfc_llcp_accept_enqueue(struct sock *parent, struct sock *sk); diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index b18f07ccb504..98876274a1ee 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -934,7 +934,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, sock->ssap = ssap; } - new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type, GFP_ATOMIC); + new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type, GFP_ATOMIC, 0); if (new_sk == NULL) { reason = LLCP_DM_REJ; release_sock(&sock->sk); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 9578bd6a4f3e..b7de0da46acd 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -942,12 +942,12 @@ static void llcp_sock_destruct(struct sock *sk) } } -struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp) +struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp, int kern) { struct sock *sk; struct nfc_llcp_sock *llcp_sock; - sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto); + sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto, kern); if (!sk) return NULL; @@ -993,7 +993,7 @@ void nfc_llcp_sock_free(struct nfc_llcp_sock *sock) } static int llcp_sock_create(struct net *net, struct socket *sock, - const struct nfc_protocol *nfc_proto) + const struct nfc_protocol *nfc_proto, int kern) { struct sock *sk; @@ -1009,7 +1009,7 @@ static int llcp_sock_create(struct net *net, struct socket *sock, else sock->ops = &llcp_sock_ops; - sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC); + sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC, kern); if (sk == NULL) return -ENOMEM; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index a8ce80b47720..5c93e8412a26 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -30,7 +30,7 @@ struct nfc_protocol { struct proto *proto; struct module *owner; int (*create)(struct net *net, struct socket *sock, - const struct nfc_protocol *nfc_proto); + const struct nfc_protocol *nfc_proto, int kern); }; struct nfc_rawsock { diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 82b4e8024778..e9a91488fe3d 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -334,7 +334,7 @@ static void rawsock_destruct(struct sock *sk) } static int rawsock_create(struct net *net, struct socket *sock, - const struct nfc_protocol *nfc_proto) + const struct nfc_protocol *nfc_proto, int kern) { struct sock *sk; @@ -348,7 +348,7 @@ static int rawsock_create(struct net *net, struct socket *sock, else sock->ops = &rawsock_ops; - sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto); + sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto, kern); if (!sk) return -ENOMEM; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5102c3cc4eec..94713276a1d9 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2832,7 +2832,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto); + sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern); if (sk == NULL) goto out; diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 32ab87d34828..10d42f3220ab 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -97,7 +97,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol, goto out; } - sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot); + sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot, kern); if (sk == NULL) { err = -ENOMEM; goto out; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 6de2aeb98a1f..850a86cde0b3 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -845,7 +845,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) } /* Create a new to-be-accepted sock */ - newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot); + newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 0); if (!newsk) { pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL); err = -ENOBUFS; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 10443377fb9d..3d83641f2861 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -440,7 +440,7 @@ static int rds_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_SEQPACKET || protocol) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto); + sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 8ae603069a1a..36dbc2da3661 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -520,7 +520,7 @@ static int rose_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto); + sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, kern); if (sk == NULL) return -ENOMEM; @@ -559,7 +559,7 @@ static struct sock *rose_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto); + sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto, 0); if (sk == NULL) return NULL; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 0095b9a0b779..25d60ed15284 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -632,7 +632,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, sock->ops = &rxrpc_rpc_ops; sock->state = SS_UNCONNECTED; - sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto); + sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 0e4198ee2370..e703ff7fed40 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -635,7 +635,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; - newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot); + newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0); if (!newsk) goto out; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 53b7acde9aa3..59e80356672b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -550,7 +550,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, struct sctp_association *asoc) { struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL, - sk->sk_prot); + sk->sk_prot, 0); struct inet_sock *newinet; if (!newsk) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9074b5cede38..8f3c8e2cef8e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -342,7 +342,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, } /* Allocate socket's protocol area */ - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern); if (sk == NULL) return -ENOMEM; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5266ea7b922b..941b3d26e3bf 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -620,7 +620,7 @@ static struct proto unix_proto = { */ static struct lock_class_key af_unix_sk_receive_queue_lock_key; -static struct sock *unix_create1(struct net *net, struct socket *sock) +static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) { struct sock *sk = NULL; struct unix_sock *u; @@ -629,7 +629,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) goto out; - sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); + sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern); if (!sk) goto out; @@ -688,7 +688,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; } - return unix_create1(net, sock) ? 0 : -ENOMEM; + return unix_create1(net, sock, kern) ? 0 : -ENOMEM; } static int unix_release(struct socket *sock) @@ -1088,7 +1088,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, err = -ENOMEM; /* create new sock for complete connection */ - newsk = unix_create1(sock_net(sk), NULL); + newsk = unix_create1(sock_net(sk), NULL, 0); if (newsk == NULL) goto out; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 2ec86e652a19..df5fc6b340f1 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -581,13 +581,14 @@ struct sock *__vsock_create(struct net *net, struct socket *sock, struct sock *parent, gfp_t priority, - unsigned short type) + unsigned short type, + int kern) { struct sock *sk; struct vsock_sock *psk; struct vsock_sock *vsk; - sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto); + sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto, kern); if (!sk) return NULL; @@ -1866,7 +1867,7 @@ static int vsock_create(struct net *net, struct socket *sock, sock->state = SS_UNCONNECTED; - return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM; + return __vsock_create(net, sock, NULL, GFP_KERNEL, 0, kern) ? 0 : -ENOMEM; } static const struct net_proto_family vsock_family_ops = { diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index c294da095461..1f63daff3965 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1022,7 +1022,7 @@ static int vmci_transport_recv_listen(struct sock *sk, } pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, - sk->sk_type); + sk->sk_type, 0); if (!pending) { vmci_transport_send_reset(sk, pkt); return -ENOMEM; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index c3ab230e4493..a750f330b8dd 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -515,10 +515,10 @@ static struct proto x25_proto = { .obj_size = sizeof(struct x25_sock), }; -static struct sock *x25_alloc_socket(struct net *net) +static struct sock *x25_alloc_socket(struct net *net, int kern) { struct x25_sock *x25; - struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto); + struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, kern); if (!sk) goto out; @@ -553,7 +553,7 @@ static int x25_create(struct net *net, struct socket *sock, int protocol, goto out; rc = -ENOBUFS; - if ((sk = x25_alloc_socket(net)) == NULL) + if ((sk = x25_alloc_socket(net, kern)) == NULL) goto out; x25 = x25_sk(sk); @@ -602,7 +602,7 @@ static struct sock *x25_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) goto out; - if ((sk = x25_alloc_socket(sock_net(osk))) == NULL) + if ((sk = x25_alloc_socket(sock_net(osk), 0)) == NULL) goto out; x25 = x25_sk(sk); -- cgit v1.2.3 From 26abe14379f8e2fa3fd1bcf97c9a7ad9364886fe Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 21:10:31 -0500 Subject: net: Modify sk_alloc to not reference count the netns of kernel sockets. Now that sk_alloc knows when a kernel socket is being allocated modify it to not reference count the network namespace of kernel sockets. Keep track of if a socket needs reference counting by adding a flag to struct sock called sk_net_refcnt. Update all of the callers of sock_create_kern to stop using sk_change_net and sk_release_kernel as those hacks are no longer needed, to avoid reference counting a kernel socket. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/inet_common.h | 2 +- include/net/sock.h | 2 ++ net/core/sock.c | 8 ++++++-- net/ipv4/af_inet.c | 4 +--- net/ipv4/udp_tunnel.c | 8 +++----- net/ipv6/ip6_udp_tunnel.c | 6 ++---- net/l2tp/l2tp_core.c | 15 ++++++--------- net/netfilter/ipvs/ip_vs_sync.c | 30 +++++++++--------------------- 8 files changed, 30 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 4a92423eefa5..279f83591971 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -41,7 +41,7 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, static inline void inet_ctl_sock_destroy(struct sock *sk) { - sk_release_kernel(sk); + sock_release(sk->sk_socket); } #endif diff --git a/include/net/sock.h b/include/net/sock.h index d8dcf91732b0..9e6b2c0b4741 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -184,6 +184,7 @@ struct sock_common { unsigned char skc_reuse:4; unsigned char skc_reuseport:1; unsigned char skc_ipv6only:1; + unsigned char skc_net_refcnt:1; int skc_bound_dev_if; union { struct hlist_node skc_bind_node; @@ -323,6 +324,7 @@ struct sock { #define sk_reuse __sk_common.skc_reuse #define sk_reuseport __sk_common.skc_reuseport #define sk_ipv6only __sk_common.skc_ipv6only +#define sk_net_refcnt __sk_common.skc_net_refcnt #define sk_bound_dev_if __sk_common.skc_bound_dev_if #define sk_bind_node __sk_common.skc_bind_node #define sk_prot __sk_common.skc_prot diff --git a/net/core/sock.c b/net/core/sock.c index cbc3789b830c..9e8968e9ebeb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1412,7 +1412,10 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, */ sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); - sock_net_set(sk, get_net(net)); + sk->sk_net_refcnt = kern ? 0 : 1; + if (likely(sk->sk_net_refcnt)) + get_net(net); + sock_net_set(sk, net); atomic_set(&sk->sk_wmem_alloc, 1); sock_update_classid(sk); @@ -1446,7 +1449,8 @@ static void __sk_free(struct sock *sk) if (sk->sk_peer_cred) put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); - put_net(sock_net(sk)); + if (likely(sk->sk_net_refcnt)) + put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e2dd9cb99d61..235d36afece3 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1430,7 +1430,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, struct net *net) { struct socket *sock; - int rc = sock_create_kern(&init_net, family, type, protocol, &sock); + int rc = sock_create_kern(net, family, type, protocol, &sock); if (rc == 0) { *sk = sock->sk; @@ -1440,8 +1440,6 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, * we do not wish this socket to see incoming packets. */ (*sk)->sk_prot->unhash(*sk); - - sk_change_net(*sk, net); } return rc; } diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 4e2837476967..933ea903f7b8 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -15,12 +15,10 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, struct socket *sock = NULL; struct sockaddr_in udp_addr; - err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &sock); + err = sock_create_kern(net, AF_INET, SOCK_DGRAM, 0, &sock); if (err < 0) goto error; - sk_change_net(sock->sk, net); - udp_addr.sin_family = AF_INET; udp_addr.sin_addr = cfg->local_ip; udp_addr.sin_port = cfg->local_udp_port; @@ -47,7 +45,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, error: if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); } *sockp = NULL; return err; @@ -101,7 +99,7 @@ void udp_tunnel_sock_release(struct socket *sock) { rcu_assign_sk_user_data(sock->sk, NULL); kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); } EXPORT_SYMBOL_GPL(udp_tunnel_sock_release); diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 478576b61214..e1a1136bda7c 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -19,12 +19,10 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, int err; struct socket *sock = NULL; - err = sock_create_kern(&init_net, AF_INET6, SOCK_DGRAM, 0, &sock); + err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, 0, &sock); if (err < 0) goto error; - sk_change_net(sock->sk, net); - udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6, sizeof(udp6_addr.sin6_addr)); @@ -55,7 +53,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, error: if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); } *sockp = NULL; return err; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index ae513a2fe7f3..f6b090df3930 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1334,9 +1334,10 @@ static void l2tp_tunnel_del_work(struct work_struct *work) if (sock) inet_shutdown(sock, 2); } else { - if (sock) + if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sk); + sock_release(sock); + } } l2tp_tunnel_sock_put(sk); @@ -1399,13 +1400,11 @@ static int l2tp_tunnel_sock_create(struct net *net, if (cfg->local_ip6 && cfg->peer_ip6) { struct sockaddr_l2tpip6 ip6_addr = {0}; - err = sock_create_kern(&init_net, AF_INET6, SOCK_DGRAM, + err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, IPPROTO_L2TP, &sock); if (err < 0) goto out; - sk_change_net(sock->sk, net); - ip6_addr.l2tp_family = AF_INET6; memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6, sizeof(ip6_addr.l2tp_addr)); @@ -1429,13 +1428,11 @@ static int l2tp_tunnel_sock_create(struct net *net, { struct sockaddr_l2tpip ip_addr = {0}; - err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, + err = sock_create_kern(net, AF_INET, SOCK_DGRAM, IPPROTO_L2TP, &sock); if (err < 0) goto out; - sk_change_net(sock->sk, net); - ip_addr.l2tp_family = AF_INET; ip_addr.l2tp_addr = cfg->local_ip; ip_addr.l2tp_conn_id = tunnel_id; @@ -1462,7 +1459,7 @@ out: *sockp = sock; if ((err < 0) && sock) { kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); *sockp = NULL; } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 2e9a5b5d1239..b08ba9538d12 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1457,18 +1457,12 @@ static struct socket *make_send_sock(struct net *net, int id) struct socket *sock; int result; - /* First create a socket move it to right name space later */ - result = sock_create_kern(&init_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + /* First create a socket */ + result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); } - /* - * Kernel sockets that are a part of a namespace, should not - * hold a reference to a namespace in order to allow to stop it. - * After sk_change_net should be released using sk_release_kernel. - */ - sk_change_net(sock->sk, net); result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); @@ -1497,7 +1491,7 @@ static struct socket *make_send_sock(struct net *net, int id) return sock; error: - sk_release_kernel(sock->sk); + sock_release(sock); return ERR_PTR(result); } @@ -1518,17 +1512,11 @@ static struct socket *make_receive_sock(struct net *net, int id) int result; /* First create a socket */ - result = sock_create_kern(&init_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); } - /* - * Kernel sockets that are a part of a namespace, should not - * hold a reference to a namespace in order to allow to stop it. - * After sk_change_net should be released using sk_release_kernel. - */ - sk_change_net(sock->sk, net); /* it is equivalent to the REUSEADDR option in user-space */ sock->sk->sk_reuse = SK_CAN_REUSE; result = sysctl_sync_sock_size(ipvs); @@ -1554,7 +1542,7 @@ static struct socket *make_receive_sock(struct net *net, int id) return sock; error: - sk_release_kernel(sock->sk); + sock_release(sock); return ERR_PTR(result); } @@ -1692,7 +1680,7 @@ done: ip_vs_sync_buff_release(sb); /* release the sending multicast socket */ - sk_release_kernel(tinfo->sock->sk); + sock_release(tinfo->sock); kfree(tinfo); return 0; @@ -1729,7 +1717,7 @@ static int sync_thread_backup(void *data) } /* release the sending multicast socket */ - sk_release_kernel(tinfo->sock->sk); + sock_release(tinfo->sock); kfree(tinfo->buf); kfree(tinfo); @@ -1854,11 +1842,11 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) return 0; outsocket: - sk_release_kernel(sock->sk); + sock_release(sock); outtinfo: if (tinfo) { - sk_release_kernel(tinfo->sock->sk); + sock_release(tinfo->sock); kfree(tinfo->buf); kfree(tinfo); } -- cgit v1.2.3 From affb9792f1d99e1e4d64411e147b648d65f2576e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 21:12:13 -0500 Subject: net: kill sk_change_net and sk_release_kernel These functions are no longer needed and no longer used kill them. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/sock.h | 17 ----------------- net/core/sock.c | 19 ------------------- 2 files changed, 36 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 9e6b2c0b4741..d882f4c8e438 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1518,7 +1518,6 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern); void sk_free(struct sock *sk); -void sk_release_kernel(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, @@ -2194,22 +2193,6 @@ void sock_net_set(struct sock *sk, struct net *net) write_pnet(&sk->sk_net, net); } -/* - * Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace. - * They should not hold a reference to a namespace in order to allow - * to stop it. - * Sockets after sk_change_net should be released using sk_release_kernel - */ -static inline void sk_change_net(struct sock *sk, struct net *net) -{ - struct net *current_net = sock_net(sk); - - if (!net_eq(current_net, net)) { - put_net(current_net); - sock_net_set(sk, net); - } -} - static inline struct sock *skb_steal_sock(struct sk_buff *skb) { if (skb->sk) { diff --git a/net/core/sock.c b/net/core/sock.c index 9e8968e9ebeb..c18738a795b0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1466,25 +1466,6 @@ void sk_free(struct sock *sk) } EXPORT_SYMBOL(sk_free); -/* - * Last sock_put should drop reference to sk->sk_net. It has already - * been dropped in sk_change_net. Taking reference to stopping namespace - * is not an option. - * Take reference to a socket to remove it from hash _alive_ and after that - * destroy it in the context of init_net. - */ -void sk_release_kernel(struct sock *sk) -{ - if (sk == NULL || sk->sk_socket == NULL) - return; - - sock_hold(sk); - sock_net_set(sk, get_net(&init_net)); - sock_release(sk->sk_socket); - sock_put(sk); -} -EXPORT_SYMBOL(sk_release_kernel); - static void sk_update_clone(const struct sock *sk, struct sock *newsk) { if (mem_cgroup_sockets_enabled && sk->sk_cgrp) -- cgit v1.2.3 From 6791e4661c4bd3e9f193a84247f2c389578a4336 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Sat, 9 May 2015 00:01:55 -0700 Subject: bonding: Allow userspace to set actors' system_priority in AD system This patch allows user to randomize the system-priority in an ad-system. The allowed range is 1 - 0xFFFF while default value is 0xFFFF. If user does not specify this value, the system defaults to 0xFFFF, which is what it was before this patch. Following example code could set the value - # modprobe bonding mode=4 # sys_prio=$(( 1 + RANDOM + RANDOM )) # echo $sys_prio > /sys/class/net/bond0/bonding/ad_actor_sys_prio # echo +eth1 > /sys/class/net/bond0/bonding/slaves ... # ip link set bond0 up Signed-off-by: Mahesh Bandewar Reviewed-by: Nikolay Aleksandrov [jt: * fixed up style issues reported by checkpatch * changed how the default value is set in bond_check_params(), this makes the default consistent between what gets set for a new bond and what the default is claimed to be in the bonding options.] Signed-off-by: Jonathan Toppins Signed-off-by: David S. Miller --- Documentation/networking/bonding.txt | 9 +++++++++ drivers/net/bonding/bond_3ad.c | 5 ++++- drivers/net/bonding/bond_main.c | 12 ++++++++++++ drivers/net/bonding/bond_options.c | 28 +++++++++++++++++++++++++++- drivers/net/bonding/bond_procfs.c | 2 ++ drivers/net/bonding/bond_sysfs.c | 15 +++++++++++++++ include/net/bond_options.h | 1 + include/net/bonding.h | 1 + 8 files changed, 71 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index 83bf4986baea..34946115acec 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -178,6 +178,15 @@ active_slave active slave, or the empty string if there is no active slave or the current mode does not use an active slave. +ad_actor_sys_prio + + In an AD system, this specifies the system priority. The allowed range + is 1 - 65535. If the value is not specified, it takes 65535 as the + default value. + + This parameter has effect only in 802.3ad mode and is available through + SysFs interface. + ad_select Specifies the 802.3ad aggregation selection logic to use. The diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index fbd54f0e32e8..4c003bc87d4b 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1908,7 +1908,8 @@ void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution) BOND_AD_INFO(bond).aggregator_identifier = 0; - BOND_AD_INFO(bond).system.sys_priority = 0xFFFF; + BOND_AD_INFO(bond).system.sys_priority = + bond->params.ad_actor_sys_prio; BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); /* initialize how many times this module is called in one @@ -1959,6 +1960,8 @@ void bond_3ad_bind_slave(struct slave *slave) port->sm_vars &= ~AD_PORT_LACP_ENABLED; /* actor system is the bond's system */ port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; + port->actor_system_priority = + BOND_AD_INFO(bond).system.sys_priority; /* tx timer(to verify that no more than MAX_TX_IN_SECOND * lacpdu's are sent in one second) */ diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d5fe5d5f490f..5f2f28f0e927 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4140,6 +4140,7 @@ static int bond_check_params(struct bond_params *params) struct bond_opt_value newval; const struct bond_opt_value *valptr; int arp_all_targets_value; + u16 ad_actor_sys_prio = 0; /* Convert string parameters. */ if (mode) { @@ -4434,6 +4435,16 @@ static int bond_check_params(struct bond_params *params) fail_over_mac_value = BOND_FOM_NONE; } + bond_opt_initstr(&newval, "default"); + valptr = bond_opt_parse( + bond_opt_get(BOND_OPT_AD_ACTOR_SYS_PRIO), + &newval); + if (!valptr) { + pr_err("Error: No ad_actor_sys_prio default value"); + return -EINVAL; + } + ad_actor_sys_prio = valptr->value; + if (lp_interval == 0) { pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n", INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL); @@ -4462,6 +4473,7 @@ static int bond_check_params(struct bond_params *params) params->lp_interval = lp_interval; params->packets_per_slave = packets_per_slave; params->tlb_dynamic_lb = 1; /* Default value */ + params->ad_actor_sys_prio = ad_actor_sys_prio; if (packets_per_slave > 0) { params->reciprocal_packets_per_slave = reciprocal_value(packets_per_slave); diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 4df28943d222..d2b47e5e99f7 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -70,6 +70,8 @@ static int bond_option_slaves_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, + const struct bond_opt_value *newval); static const struct bond_opt_value bond_mode_tbl[] = { @@ -186,6 +188,12 @@ static const struct bond_opt_value bond_tlb_dynamic_lb_tbl[] = { { NULL, -1, 0} }; +static const struct bond_opt_value bond_ad_actor_sys_prio_tbl[] = { + { "minval", 1, BOND_VALFLAG_MIN}, + { "maxval", 65535, BOND_VALFLAG_MAX | BOND_VALFLAG_DEFAULT}, + { NULL, -1, 0}, +}; + static const struct bond_option bond_opts[BOND_OPT_LAST] = { [BOND_OPT_MODE] = { .id = BOND_OPT_MODE, @@ -379,7 +387,15 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .values = bond_tlb_dynamic_lb_tbl, .flags = BOND_OPTFLAG_IFDOWN, .set = bond_option_tlb_dynamic_lb_set, - } + }, + [BOND_OPT_AD_ACTOR_SYS_PRIO] = { + .id = BOND_OPT_AD_ACTOR_SYS_PRIO, + .name = "ad_actor_sys_prio", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .flags = BOND_OPTFLAG_IFDOWN, + .values = bond_ad_actor_sys_prio_tbl, + .set = bond_option_ad_actor_sys_prio_set, + }, }; /* Searches for an option by name */ @@ -1349,3 +1365,13 @@ static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, return 0; } + +static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + netdev_info(bond->dev, "Setting ad_actor_sys_prio to (%llu)\n", + newval->value); + + bond->params.ad_actor_sys_prio = newval->value; + return 0; +} diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index b20b35acb47d..11369299e7e5 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -135,6 +135,8 @@ static void bond_info_show_master(struct seq_file *seq) bond->params.ad_select); seq_printf(seq, "Aggregator selection policy (ad_select): %s\n", optval->string); + seq_printf(seq, "System priority: %d\n", + BOND_AD_INFO(bond).system.sys_priority); if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { seq_printf(seq, "bond %s has no active aggregator\n", diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 7e9e151d4d61..4a7626611ca6 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -692,6 +692,20 @@ static ssize_t bonding_show_packets_per_slave(struct device *d, static DEVICE_ATTR(packets_per_slave, S_IRUGO | S_IWUSR, bonding_show_packets_per_slave, bonding_sysfs_store_option); +static ssize_t bonding_show_ad_actor_sys_prio(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + + if (BOND_MODE(bond) == BOND_MODE_8023AD) + return sprintf(buf, "%hu\n", bond->params.ad_actor_sys_prio); + + return 0; +} +static DEVICE_ATTR(ad_actor_sys_prio, S_IRUGO | S_IWUSR, + bonding_show_ad_actor_sys_prio, bonding_sysfs_store_option); + static struct attribute *per_bond_attrs[] = { &dev_attr_slaves.attr, &dev_attr_mode.attr, @@ -725,6 +739,7 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_lp_interval.attr, &dev_attr_packets_per_slave.attr, &dev_attr_tlb_dynamic_lb.attr, + &dev_attr_ad_actor_sys_prio.attr, NULL, }; diff --git a/include/net/bond_options.h b/include/net/bond_options.h index ea6546d2c946..894002a2620f 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -63,6 +63,7 @@ enum { BOND_OPT_LP_INTERVAL, BOND_OPT_SLAVES, BOND_OPT_TLB_DYNAMIC_LB, + BOND_OPT_AD_ACTOR_SYS_PRIO, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 78ed135e9dea..405cf87a450a 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -136,6 +136,7 @@ struct bond_params { int packets_per_slave; int tlb_dynamic_lb; struct reciprocal_value reciprocal_packets_per_slave; + u16 ad_actor_sys_prio; }; struct bond_parm_tbl { -- cgit v1.2.3 From 74514957552edd4661a4608618121f3c71d4e891 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Sat, 9 May 2015 00:01:56 -0700 Subject: bonding: Allow userspace to set actors' macaddr in an AD-system. In an AD system, the communication between actor and partner is the business between these two entities. In the current setup anyone on the same L2 can "guess" the LACPDU contents and then possibly send the spoofed LACPDUs and trick the partner causing connectivity issues for the AD system. This patch allows to use a random mac-address obscuring it's identity making it harder for someone in the L2 is do the same thing. This patch allows user-space to choose the mac-address for the AD-system. This mac-address can not be NULL or a Multicast. If the mac-address is set from user-space; kernel will honor it and will not overwrite it. In the absence (value from user space); the logic will default to using the masters' mac as the mac-address for the AD-system. It can be set using example code below - # modprobe bonding mode=4 # sys_mac_addr=$(printf '%02x:%02x:%02x:%02x:%02x:%02x' \ $(( (RANDOM & 0xFE) | 0x02 )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF ))) # echo $sys_mac_addr > /sys/class/net/bond0/bonding/ad_actor_system # echo +eth1 > /sys/class/net/bond0/bonding/slaves ... # ip link set bond0 up Signed-off-by: Mahesh Bandewar Reviewed-by: Nikolay Aleksandrov [jt: fixed up style issues reported by checkpatch] Signed-off-by: Jonathan Toppins Signed-off-by: David S. Miller --- Documentation/networking/bonding.txt | 12 ++++++++++++ drivers/net/bonding/bond_3ad.c | 7 ++++++- drivers/net/bonding/bond_main.c | 1 + drivers/net/bonding/bond_options.c | 27 +++++++++++++++++++++++++++ drivers/net/bonding/bond_procfs.c | 6 ++++++ drivers/net/bonding/bond_sysfs.c | 16 ++++++++++++++++ include/net/bond_options.h | 1 + include/net/bonding.h | 1 + 8 files changed, 70 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index 34946115acec..2c197b68baf0 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -187,6 +187,18 @@ ad_actor_sys_prio This parameter has effect only in 802.3ad mode and is available through SysFs interface. +ad_actor_system + + In an AD system, this specifies the mac-address for the actor in + protocol packet exchanges (LACPDUs). The value cannot be NULL or + multicast. It is preferred to have the local-admin bit set for this + mac but driver does not enforce it. If the value is not given then + system defaults to using the masters' mac address as actors' system + address. + + This parameter has effect only in 802.3ad mode and is available through + SysFs interface. + ad_select Specifies the 802.3ad aggregation selection logic to use. The diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 4c003bc87d4b..012f7bc22d91 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1910,7 +1910,12 @@ void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution) BOND_AD_INFO(bond).system.sys_priority = bond->params.ad_actor_sys_prio; - BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); + if (is_zero_ether_addr(bond->params.ad_actor_system)) + BOND_AD_INFO(bond).system.sys_mac_addr = + *((struct mac_addr *)bond->dev->dev_addr); + else + BOND_AD_INFO(bond).system.sys_mac_addr = + *((struct mac_addr *)bond->params.ad_actor_system); /* initialize how many times this module is called in one * second (should be about every 100ms) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5f2f28f0e927..a4e2f27ef683 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4474,6 +4474,7 @@ static int bond_check_params(struct bond_params *params) params->packets_per_slave = packets_per_slave; params->tlb_dynamic_lb = 1; /* Default value */ params->ad_actor_sys_prio = ad_actor_sys_prio; + eth_zero_addr(params->ad_actor_system); if (packets_per_slave > 0) { params->reciprocal_packets_per_slave = reciprocal_value(packets_per_slave); diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index d2b47e5e99f7..cdcef217ac84 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -72,6 +72,8 @@ static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_ad_actor_system_set(struct bonding *bond, + const struct bond_opt_value *newval); static const struct bond_opt_value bond_mode_tbl[] = { @@ -396,6 +398,13 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .values = bond_ad_actor_sys_prio_tbl, .set = bond_option_ad_actor_sys_prio_set, }, + [BOND_OPT_AD_ACTOR_SYSTEM] = { + .id = BOND_OPT_AD_ACTOR_SYSTEM, + .name = "ad_actor_system", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .flags = BOND_OPTFLAG_RAWVAL | BOND_OPTFLAG_IFDOWN, + .set = bond_option_ad_actor_system_set, + }, }; /* Searches for an option by name */ @@ -1375,3 +1384,21 @@ static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, bond->params.ad_actor_sys_prio = newval->value; return 0; } + +static int bond_option_ad_actor_system_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + u8 macaddr[ETH_ALEN]; + int i; + + i = sscanf(newval->string, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + &macaddr[0], &macaddr[1], &macaddr[2], + &macaddr[3], &macaddr[4], &macaddr[5]); + if (i != ETH_ALEN || !is_valid_ether_addr(macaddr)) { + netdev_err(bond->dev, "Invalid MAC address.\n"); + return -EINVAL; + } + + ether_addr_copy(bond->params.ad_actor_system, macaddr); + return 0; +} diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index 11369299e7e5..e7f3047a26df 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -137,6 +137,8 @@ static void bond_info_show_master(struct seq_file *seq) optval->string); seq_printf(seq, "System priority: %d\n", BOND_AD_INFO(bond).system.sys_priority); + seq_printf(seq, "System MAC address: %pM\n", + &BOND_AD_INFO(bond).system.sys_mac_addr); if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { seq_printf(seq, "bond %s has no active aggregator\n", @@ -200,6 +202,8 @@ static void bond_info_show_slave(struct seq_file *seq, seq_puts(seq, "details actor lacp pdu:\n"); seq_printf(seq, " system priority: %d\n", port->actor_system_priority); + seq_printf(seq, " system mac address: %pM\n", + &port->actor_system); seq_printf(seq, " port key: %d\n", port->actor_oper_port_key); seq_printf(seq, " port priority: %d\n", @@ -212,6 +216,8 @@ static void bond_info_show_slave(struct seq_file *seq, seq_puts(seq, "details partner lacp pdu:\n"); seq_printf(seq, " system priority: %d\n", port->partner_oper.system_priority); + seq_printf(seq, " system mac address: %pM\n", + &port->partner_oper.system); seq_printf(seq, " oper key: %d\n", port->partner_oper.key); seq_printf(seq, " port priority: %d\n", diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 4a7626611ca6..09fefa50d055 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -706,6 +706,21 @@ static ssize_t bonding_show_ad_actor_sys_prio(struct device *d, static DEVICE_ATTR(ad_actor_sys_prio, S_IRUGO | S_IWUSR, bonding_show_ad_actor_sys_prio, bonding_sysfs_store_option); +static ssize_t bonding_show_ad_actor_system(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + + if (BOND_MODE(bond) == BOND_MODE_8023AD) + return sprintf(buf, "%pM\n", bond->params.ad_actor_system); + + return 0; +} + +static DEVICE_ATTR(ad_actor_system, S_IRUGO | S_IWUSR, + bonding_show_ad_actor_system, bonding_sysfs_store_option); + static struct attribute *per_bond_attrs[] = { &dev_attr_slaves.attr, &dev_attr_mode.attr, @@ -740,6 +755,7 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_packets_per_slave.attr, &dev_attr_tlb_dynamic_lb.attr, &dev_attr_ad_actor_sys_prio.attr, + &dev_attr_ad_actor_system.attr, NULL, }; diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 894002a2620f..eeeefa1d3cd8 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -64,6 +64,7 @@ enum { BOND_OPT_SLAVES, BOND_OPT_TLB_DYNAMIC_LB, BOND_OPT_AD_ACTOR_SYS_PRIO, + BOND_OPT_AD_ACTOR_SYSTEM, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 405cf87a450a..650f38693956 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -137,6 +137,7 @@ struct bond_params { int tlb_dynamic_lb; struct reciprocal_value reciprocal_packets_per_slave; u16 ad_actor_sys_prio; + u8 ad_actor_system[ETH_ALEN]; }; struct bond_parm_tbl { -- cgit v1.2.3 From d22a5fc0c32edcf5c3bb973ee8c9a2606ba500a8 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Sat, 9 May 2015 00:01:57 -0700 Subject: bonding: Implement user key part of port_key in an AD system. The port key has three components - user-key, speed-part, and duplex-part. The LSBit is for the duplex-part, next 5 bits are for the speed while the remaining 10 bits are the user defined key bits. Get these 10 bits from the user-space (through the SysFs interface) and use it to form the admin port-key. Allowed range for the user-key is 0 - 1023 (10 bits). If it is not provided then use zero for the user-key-bits (default). It can set using following example code - # modprobe bonding mode=4 # usr_port_key=$(( RANDOM & 0x3FF )) # echo $usr_port_key > /sys/class/net/bond0/bonding/ad_user_port_key # echo +eth1 > /sys/class/net/bond0/bonding/slaves ... # ip link set bond0 up Signed-off-by: Mahesh Bandewar Reviewed-by: Nikolay Aleksandrov [jt: * fixed up style issues reported by checkpatch * fixed up context from change in ad_actor_sys_prio patch] Signed-off-by: Jonathan Toppins Signed-off-by: David S. Miller --- Documentation/networking/bonding.txt | 63 ++++++++++++++++++++++++++++++++++++ drivers/net/bonding/bond_3ad.c | 14 ++++---- drivers/net/bonding/bond_main.c | 10 ++++++ drivers/net/bonding/bond_options.c | 26 +++++++++++++++ drivers/net/bonding/bond_sysfs.c | 15 +++++++++ include/net/bond_options.h | 1 + include/net/bonding.h | 1 + 7 files changed, 123 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index 2c197b68baf0..334b49ef02d1 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -51,6 +51,7 @@ Table of Contents 3.4 Configuring Bonding Manually via Sysfs 3.5 Configuration with Interfaces Support 3.6 Overriding Configuration for Special Cases +3.7 Configuring LACP for 802.3ad mode in a more secure way 4. Querying Bonding Configuration 4.1 Bonding Configuration @@ -241,6 +242,21 @@ ad_select This option was added in bonding version 3.4.0. +ad_user_port_key + + In an AD system, the port-key has three parts as shown below - + + Bits Use + 00 Duplex + 01-05 Speed + 06-15 User-defined + + This defines the upper 10 bits of the port key. The values can be + from 0 - 1023. If not given, the system defaults to 0. + + This parameter has effect only in 802.3ad mode and is available through + SysFs interface. + all_slaves_active Specifies that duplicate frames (received on inactive ports) should be @@ -1643,6 +1659,53 @@ output port selection. This feature first appeared in bonding driver version 3.7.0 and support for output slave selection was limited to round-robin and active-backup modes. +3.7 Configuring LACP for 802.3ad mode in a more secure way +---------------------------------------------------------- + +When using 802.3ad bonding mode, the Actor (host) and Partner (switch) +exchange LACPDUs. These LACPDUs cannot be sniffed, because they are +destined to link local mac addresses (which switches/bridges are not +supposed to forward). However, most of the values are easily predictable +or are simply the machine's MAC address (which is trivially known to all +other hosts in the same L2). This implies that other machines in the L2 +domain can spoof LACPDU packets from other hosts to the switch and potentially +cause mayhem by joining (from the point of view of the switch) another +machine's aggregate, thus receiving a portion of that hosts incoming +traffic and / or spoofing traffic from that machine themselves (potentially +even successfully terminating some portion of flows). Though this is not +a likely scenario, one could avoid this possibility by simply configuring +few bonding parameters: + + (a) ad_actor_system : You can set a random mac-address that can be used for + these LACPDU exchanges. The value can not be either NULL or Multicast. + Also it's preferable to set the local-admin bit. Following shell code + generates a random mac-address as described above. + + # sys_mac_addr=$(printf '%02x:%02x:%02x:%02x:%02x:%02x' \ + $(( (RANDOM & 0xFE) | 0x02 )) \ + $(( RANDOM & 0xFF )) \ + $(( RANDOM & 0xFF )) \ + $(( RANDOM & 0xFF )) \ + $(( RANDOM & 0xFF )) \ + $(( RANDOM & 0xFF ))) + # echo $sys_mac_addr > /sys/class/net/bond0/bonding/ad_actor_system + + (b) ad_actor_sys_prio : Randomize the system priority. The default value + is 65535, but system can take the value from 1 - 65535. Following shell + code generates random priority and sets it. + + # sys_prio=$(( 1 + RANDOM + RANDOM )) + # echo $sys_prio > /sys/class/net/bond0/bonding/ad_actor_sys_prio + + (c) ad_user_port_key : Use the user portion of the port-key. The default + keeps this empty. These are the upper 10 bits of the port-key and value + ranges from 0 - 1023. Following shell code generates these 10 bits and + sets it. + + # usr_port_key=$(( RANDOM & 0x3FF )) + # echo $usr_port_key > /sys/class/net/bond0/bonding/ad_user_port_key + + 4 Querying Bonding Configuration ================================= diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 012f7bc22d91..7fde4d5c2b28 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -75,10 +75,10 @@ /* Port Key definitions * key is determined according to the link speed, duplex and * user key (which is yet not supported) - * -------------------------------------------------------------- - * Port key : | User key | Speed | Duplex | - * -------------------------------------------------------------- - * 16 6 1 0 + * -------------------------------------------------------------- + * Port key | User key (10 bits) | Speed (5 bits) | Duplex| + * -------------------------------------------------------------- + * |15 6|5 1|0 */ #define AD_DUPLEX_KEY_MASKS 0x1 #define AD_SPEED_KEY_MASKS 0x3E @@ -1951,10 +1951,10 @@ void bond_3ad_bind_slave(struct slave *slave) port->slave = slave; port->actor_port_number = SLAVE_AD_INFO(slave)->id; - /* key is determined according to the link speed, duplex and user key(which - * is yet not supported) + /* key is determined according to the link speed, duplex and + * user key */ - port->actor_admin_port_key = 0; + port->actor_admin_port_key = bond->params.ad_user_port_key << 6; port->actor_admin_port_key |= __get_duplex(port); port->actor_admin_port_key |= (__get_link_speed(port) << 1); port->actor_oper_port_key = port->actor_admin_port_key; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a4e2f27ef683..2ee13be7551b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4141,6 +4141,7 @@ static int bond_check_params(struct bond_params *params) const struct bond_opt_value *valptr; int arp_all_targets_value; u16 ad_actor_sys_prio = 0; + u16 ad_user_port_key = 0; /* Convert string parameters. */ if (mode) { @@ -4445,6 +4446,14 @@ static int bond_check_params(struct bond_params *params) } ad_actor_sys_prio = valptr->value; + valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_USER_PORT_KEY), + &newval); + if (!valptr) { + pr_err("Error: No ad_user_port_key default value"); + return -EINVAL; + } + ad_user_port_key = valptr->value; + if (lp_interval == 0) { pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n", INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL); @@ -4475,6 +4484,7 @@ static int bond_check_params(struct bond_params *params) params->tlb_dynamic_lb = 1; /* Default value */ params->ad_actor_sys_prio = ad_actor_sys_prio; eth_zero_addr(params->ad_actor_system); + params->ad_user_port_key = ad_user_port_key; if (packets_per_slave > 0) { params->reciprocal_packets_per_slave = reciprocal_value(packets_per_slave); diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index cdcef217ac84..c85da05721e6 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -74,6 +74,8 @@ static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_ad_actor_system_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_ad_user_port_key_set(struct bonding *bond, + const struct bond_opt_value *newval); static const struct bond_opt_value bond_mode_tbl[] = { @@ -196,6 +198,12 @@ static const struct bond_opt_value bond_ad_actor_sys_prio_tbl[] = { { NULL, -1, 0}, }; +static const struct bond_opt_value bond_ad_user_port_key_tbl[] = { + { "minval", 0, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, + { "maxval", 1023, BOND_VALFLAG_MAX}, + { NULL, -1, 0}, +}; + static const struct bond_option bond_opts[BOND_OPT_LAST] = { [BOND_OPT_MODE] = { .id = BOND_OPT_MODE, @@ -405,6 +413,14 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .flags = BOND_OPTFLAG_RAWVAL | BOND_OPTFLAG_IFDOWN, .set = bond_option_ad_actor_system_set, }, + [BOND_OPT_AD_USER_PORT_KEY] = { + .id = BOND_OPT_AD_USER_PORT_KEY, + .name = "ad_user_port_key", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .flags = BOND_OPTFLAG_IFDOWN, + .values = bond_ad_user_port_key_tbl, + .set = bond_option_ad_user_port_key_set, + } }; /* Searches for an option by name */ @@ -1402,3 +1418,13 @@ static int bond_option_ad_actor_system_set(struct bonding *bond, ether_addr_copy(bond->params.ad_actor_system, macaddr); return 0; } + +static int bond_option_ad_user_port_key_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + netdev_info(bond->dev, "Setting ad_user_port_key to (%llu)\n", + newval->value); + + bond->params.ad_user_port_key = newval->value; + return 0; +} diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 09fefa50d055..143a2abd1c1c 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -721,6 +721,20 @@ static ssize_t bonding_show_ad_actor_system(struct device *d, static DEVICE_ATTR(ad_actor_system, S_IRUGO | S_IWUSR, bonding_show_ad_actor_system, bonding_sysfs_store_option); +static ssize_t bonding_show_ad_user_port_key(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + + if (BOND_MODE(bond) == BOND_MODE_8023AD) + return sprintf(buf, "%hu\n", bond->params.ad_user_port_key); + + return 0; +} +static DEVICE_ATTR(ad_user_port_key, S_IRUGO | S_IWUSR, + bonding_show_ad_user_port_key, bonding_sysfs_store_option); + static struct attribute *per_bond_attrs[] = { &dev_attr_slaves.attr, &dev_attr_mode.attr, @@ -756,6 +770,7 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_tlb_dynamic_lb.attr, &dev_attr_ad_actor_sys_prio.attr, &dev_attr_ad_actor_system.attr, + &dev_attr_ad_user_port_key.attr, NULL, }; diff --git a/include/net/bond_options.h b/include/net/bond_options.h index eeeefa1d3cd8..c28aca25320e 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -65,6 +65,7 @@ enum { BOND_OPT_TLB_DYNAMIC_LB, BOND_OPT_AD_ACTOR_SYS_PRIO, BOND_OPT_AD_ACTOR_SYSTEM, + BOND_OPT_AD_USER_PORT_KEY, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 650f38693956..20defc0353d1 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -137,6 +137,7 @@ struct bond_params { int tlb_dynamic_lb; struct reciprocal_value reciprocal_packets_per_slave; u16 ad_actor_sys_prio; + u16 ad_user_port_key; u8 ad_actor_system[ETH_ALEN]; }; -- cgit v1.2.3 From 171a42c38c6e1a5a076d6276e94e55a0b5b7868c Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Sat, 9 May 2015 00:01:58 -0700 Subject: bonding: add netlink support for sys prio, actor sys mac, and port key Adds netlink support for the following bonding options: * BOND_OPT_AD_ACTOR_SYS_PRIO * BOND_OPT_AD_ACTOR_SYSTEM * BOND_OPT_AD_USER_PORT_KEY When setting the actor system mac address we assume the netlink message contains a binary mac and not a string representation of a mac. Signed-off-by: Andy Gospodarek [jt: completed the setting side of the netlink attributes] Signed-off-by: Jonathan Toppins Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_netlink.c | 50 ++++++++++++++++++++++++++++++++++++++ drivers/net/bonding/bond_options.c | 30 ++++++++++++++++------- include/uapi/linux/if_link.h | 3 +++ 3 files changed, 74 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 7b1124366011..f7015eb4f8db 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -94,6 +94,10 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { [IFLA_BOND_AD_LACP_RATE] = { .type = NLA_U8 }, [IFLA_BOND_AD_SELECT] = { .type = NLA_U8 }, [IFLA_BOND_AD_INFO] = { .type = NLA_NESTED }, + [IFLA_BOND_AD_ACTOR_SYS_PRIO] = { .type = NLA_U16 }, + [IFLA_BOND_AD_USER_PORT_KEY] = { .type = NLA_U16 }, + [IFLA_BOND_AD_ACTOR_SYSTEM] = { .type = NLA_BINARY, + .len = ETH_ALEN }, }; static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { @@ -379,6 +383,36 @@ static int bond_changelink(struct net_device *bond_dev, if (err) return err; } + if (data[IFLA_BOND_AD_ACTOR_SYS_PRIO]) { + int actor_sys_prio = + nla_get_u16(data[IFLA_BOND_AD_ACTOR_SYS_PRIO]); + + bond_opt_initval(&newval, actor_sys_prio); + err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYS_PRIO, &newval); + if (err) + return err; + } + + if (data[IFLA_BOND_AD_USER_PORT_KEY]) { + int port_key = + nla_get_u16(data[IFLA_BOND_AD_USER_PORT_KEY]); + + bond_opt_initval(&newval, port_key); + err = __bond_opt_set(bond, BOND_OPT_AD_USER_PORT_KEY, &newval); + if (err) + return err; + } + + if (data[IFLA_BOND_AD_ACTOR_SYSTEM]) { + if (nla_len(data[IFLA_BOND_AD_ACTOR_SYSTEM]) != ETH_ALEN) + return -EINVAL; + + bond_opt_initval(&newval, + nla_get_be64(data[IFLA_BOND_AD_ACTOR_SYSTEM])); + err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYSTEM, &newval); + if (err) + return err; + } return 0; } @@ -426,6 +460,9 @@ static size_t bond_get_size(const struct net_device *bond_dev) nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_ACTOR_KEY */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_PARTNER_KEY*/ nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_INFO_PARTNER_MAC*/ + nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_ACTOR_SYS_PRIO */ + nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_USER_PORT_KEY */ + nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_ACTOR_SYSTEM */ 0; } @@ -551,6 +588,19 @@ static int bond_fill_info(struct sk_buff *skb, if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct ad_info info; + if (nla_put_u16(skb, IFLA_BOND_AD_ACTOR_SYS_PRIO, + bond->params.ad_actor_sys_prio)) + goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_BOND_AD_USER_PORT_KEY, + bond->params.ad_user_port_key)) + goto nla_put_failure; + + if (nla_put(skb, IFLA_BOND_AD_ACTOR_SYSTEM, + sizeof(bond->params.ad_actor_system), + &bond->params.ad_actor_system)) + goto nla_put_failure; + if (!bond_3ad_get_active_agg_info(bond, &info)) { struct nlattr *nest; diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index c85da05721e6..9a32bbd7724e 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1394,7 +1394,7 @@ static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting ad_actor_sys_prio to (%llu)\n", + netdev_info(bond->dev, "Setting ad_actor_sys_prio to %llu\n", newval->value); bond->params.ad_actor_sys_prio = newval->value; @@ -1405,24 +1405,36 @@ static int bond_option_ad_actor_system_set(struct bonding *bond, const struct bond_opt_value *newval) { u8 macaddr[ETH_ALEN]; + u8 *mac; int i; - i = sscanf(newval->string, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", - &macaddr[0], &macaddr[1], &macaddr[2], - &macaddr[3], &macaddr[4], &macaddr[5]); - if (i != ETH_ALEN || !is_valid_ether_addr(macaddr)) { - netdev_err(bond->dev, "Invalid MAC address.\n"); - return -EINVAL; + if (newval->string) { + i = sscanf(newval->string, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + &macaddr[0], &macaddr[1], &macaddr[2], + &macaddr[3], &macaddr[4], &macaddr[5]); + if (i != ETH_ALEN) + goto err; + mac = macaddr; + } else { + mac = (u8 *)&newval->value; } - ether_addr_copy(bond->params.ad_actor_system, macaddr); + if (!is_valid_ether_addr(mac)) + goto err; + + netdev_info(bond->dev, "Setting ad_actor_system to %pM\n", mac); + ether_addr_copy(bond->params.ad_actor_system, mac); return 0; + +err: + netdev_err(bond->dev, "Invalid MAC address.\n"); + return -EINVAL; } static int bond_option_ad_user_port_key_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting ad_user_port_key to (%llu)\n", + netdev_info(bond->dev, "Setting ad_user_port_key to %llu\n", newval->value); bond->params.ad_user_port_key = newval->value; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d9cd19214b98..6d6e502e1051 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -417,6 +417,9 @@ enum { IFLA_BOND_AD_LACP_RATE, IFLA_BOND_AD_SELECT, IFLA_BOND_AD_INFO, + IFLA_BOND_AD_ACTOR_SYS_PRIO, + IFLA_BOND_AD_USER_PORT_KEY, + IFLA_BOND_AD_ACTOR_SYSTEM, __IFLA_BOND_MAX, }; -- cgit v1.2.3 From d2788d34885d4ce5ba17a8996fd95d28942e574e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 9 May 2015 22:51:32 +0200 Subject: net: sched: further simplify handle_ing Ingress qdisc has no other purpose than calling into tc_classify() that executes attached classifier(s) and action(s). It has a 1:1 relationship to dev->ingress_queue. After having commit 087c1a601ad7 ("net: sched: run ingress qdisc without locks") removed the central ingress lock, one major contention point is gone. The extra indirection layers however, are not necessary for calling into ingress qdisc. pktgen calling locally into netif_receive_skb() with a dummy u32, single CPU result on a Supermicro X10SLM-F, Xeon E3-1240: before ~21,1 Mpps, after patch ~22,9 Mpps. We can redirect the private classifier list to the netdev directly, without changing any classifier API bits (!) and execute on that from handle_ing() side. The __QDISC_STATE_DEACTIVATE test can be removed, ingress qdisc doesn't have a queue and thus dev_deactivate_queue() is also not applicable, ingress_cl_list provides similar behaviour. In other words, ingress qdisc acts like TCQ_F_BUILTIN qdisc. One next possible step is the removal of the dev's ingress (dummy) netdev_queue, and to only have the list member in the netdevice itself. Note, the filter chain is RCU protected and individual filter elements are being kfree'd by sched subsystem after RCU grace period. RCU read lock is being held by __netif_receive_skb_core(). Joint work with Alexei Starovoitov. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ net/core/dev.c | 30 ++++++++++++++---------- net/sched/sch_ingress.c | 58 ++++++++--------------------------------------- 3 files changed, 31 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1899c74a7127..c4e1caf6056f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1655,7 +1655,11 @@ struct net_device { rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; +#if CONFIG_NET_CLS_ACT + struct tcf_proto __rcu *ingress_cl_list; +#endif struct netdev_queue __rcu *ingress_queue; + unsigned char broadcast[MAX_ADDR_LEN]; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rx_cpu_rmap; diff --git a/net/core/dev.c b/net/core/dev.c index 8a757464bfa2..e5f77c40bbd1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3525,31 +3525,37 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { - struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); - struct Qdisc *q; + struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list); + struct tcf_result cl_res; /* If there's at least one ingress present somewhere (so * we get here via enabled static key), remaining devices * that are not configured with an ingress qdisc will bail - * out w/o the rcu_dereference(). + * out here. */ - if (!rxq || (q = rcu_dereference(rxq->qdisc)) == &noop_qdisc) + if (!cl) return skb; - if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; } + qdisc_bstats_update_cpu(cl->q, skb); skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { - switch (qdisc_enqueue_root(skb, q)) { - case TC_ACT_SHOT: - case TC_ACT_STOLEN: - kfree_skb(skb); - return NULL; - } + switch (tc_classify(skb, cl, &cl_res)) { + case TC_ACT_OK: + case TC_ACT_RECLASSIFY: + skb->tc_index = TC_H_MIN(cl_res.classid); + break; + case TC_ACT_SHOT: + qdisc_qstats_drop_cpu(cl->q); + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + kfree_skb(skb); + return NULL; + default: + break; } return skb; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index a89cc3278bfb..e7c648fa9dc3 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -12,16 +12,10 @@ #include #include #include + #include #include - -struct ingress_qdisc_data { - struct tcf_proto __rcu *filter_list; -}; - -/* ------------------------- Class/flow operations ------------------------- */ - static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) { return NULL; @@ -49,45 +43,11 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker) static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch, unsigned long cl) { - struct ingress_qdisc_data *p = qdisc_priv(sch); - - return &p->filter_list; -} - -/* --------------------------- Qdisc operations ---------------------------- */ + struct net_device *dev = qdisc_dev(sch); -static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct ingress_qdisc_data *p = qdisc_priv(sch); - struct tcf_result res; - struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); - int result; - - result = tc_classify(skb, fl, &res); - - qdisc_bstats_update_cpu(sch, skb); - switch (result) { - case TC_ACT_SHOT: - result = TC_ACT_SHOT; - qdisc_qstats_drop_cpu(sch); - break; - case TC_ACT_STOLEN: - case TC_ACT_QUEUED: - result = TC_ACT_STOLEN; - break; - case TC_ACT_RECLASSIFY: - case TC_ACT_OK: - skb->tc_index = TC_H_MIN(res.classid); - default: - result = TC_ACT_OK; - break; - } - - return result; + return &dev->ingress_cl_list; } -/* ------------------------------------------------------------- */ - static int ingress_init(struct Qdisc *sch, struct nlattr *opt) { net_inc_ingress_queue(); @@ -98,9 +58,9 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) static void ingress_destroy(struct Qdisc *sch) { - struct ingress_qdisc_data *p = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); - tcf_destroy_chain(&p->filter_list); + tcf_destroy_chain(&dev->ingress_cl_list); net_dec_ingress_queue(); } @@ -111,6 +71,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; + return nla_nest_end(skb, nest); nla_put_failure: @@ -131,8 +92,6 @@ static const struct Qdisc_class_ops ingress_class_ops = { static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .cl_ops = &ingress_class_ops, .id = "ingress", - .priv_size = sizeof(struct ingress_qdisc_data), - .enqueue = ingress_enqueue, .init = ingress_init, .destroy = ingress_destroy, .dump = ingress_dump, @@ -149,6 +108,7 @@ static void __exit ingress_module_exit(void) unregister_qdisc(&ingress_qdisc_ops); } -module_init(ingress_module_init) -module_exit(ingress_module_exit) +module_init(ingress_module_init); +module_exit(ingress_module_exit); + MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 4cda01e86f68dacc758b2daf6e8809f2ce915b85 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 May 2015 19:28:49 +0200 Subject: net: sched: fix typo in net_device ifdef This should have been #ifdef not #if. Reported-by: Fengguang Wu Fixes: d2788d34885d ("net: sched: further simplify handle_ing") Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c4e1caf6056f..a6d706b2a947 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1655,7 +1655,7 @@ struct net_device { rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; -#if CONFIG_NET_CLS_ACT +#ifdef CONFIG_NET_CLS_ACT struct tcf_proto __rcu *ingress_cl_list; #endif struct netdev_queue __rcu *ingress_queue; -- cgit v1.2.3 From b396cca6fafccf16206a5d041d59c9e6b65b6f5a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 May 2015 09:06:56 -0700 Subject: net: sched: deprecate enqueue_root() Only left enqueue_root() user is netem, and it looks not necessary : qdisc_skb_cb(skb)->pkt_len is preserved after one skb_clone() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sch_generic.h | 6 ------ net/sched/sch_netem.c | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 994b5a092f33..1b0a2e88ed2b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -501,12 +501,6 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) return sch->enqueue(skb, sch); } -static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch) -{ - qdisc_skb_cb(skb)->pkt_len = skb->len; - return qdisc_enqueue(skb, sch) & NET_XMIT_MASK; -} - static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) { return q->flags & TCQ_F_CPUSTATS; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 956ead2cab9a..5abd1d9de989 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -440,9 +440,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { struct Qdisc *rootq = qdisc_root(sch); u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ - q->duplicate = 0; - qdisc_enqueue_root(skb2, rootq); + q->duplicate = 0; + rootq->enqueue(skb2, rootq); q->duplicate = dupsave; } -- cgit v1.2.3 From 0e39250845c0f91acc64264709b25f7f9b85c2c3 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 6 May 2015 21:11:51 -0700 Subject: net: Store virtual address instead of page in netdev_alloc_cache This change makes it so that we store the virtual address of the page in the netdev_alloc_cache instead of the page pointer. The idea behind this is to avoid multiple calls to page_address since the virtual address is required for every access, but the page pointer is only needed at allocation or reset of the page. While I was at it I also reordered the netdev_alloc_cache structure a bit so that the size is always 16 bytes by dropping size in the case where PAGE_SIZE is greater than or equal to 32KB. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 ++--- net/core/skbuff.c | 55 +++++++++++++++++++++++++++++--------------------- 2 files changed, 34 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9c2f793573fa..8b9a2c35a9d7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2128,9 +2128,8 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } -#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) -#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) -#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE +#define NETDEV_FRAG_PAGE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) +#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(NETDEV_FRAG_PAGE_MAX_SIZE) void *netdev_alloc_frag(unsigned int fragsz); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d6851ca32598..a3062ec341c3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -348,7 +348,13 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) EXPORT_SYMBOL(build_skb); struct netdev_alloc_cache { - struct page_frag frag; + void * va; +#if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) + __u16 offset; + __u16 size; +#else + __u32 offset; +#endif /* we maintain a pagecount bias, so that we dont dirty cache line * containing page->_count every time we allocate a fragment. */ @@ -361,21 +367,20 @@ static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache); static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, gfp_t gfp_mask) { - const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER; struct page *page = NULL; gfp_t gfp = gfp_mask; - if (order) { - gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | - __GFP_NOMEMALLOC; - page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); - nc->frag.size = PAGE_SIZE << (page ? order : 0); - } - +#if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) + gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | + __GFP_NOMEMALLOC; + page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, + NETDEV_FRAG_PAGE_MAX_ORDER); + nc->size = page ? NETDEV_FRAG_PAGE_MAX_SIZE : PAGE_SIZE; +#endif if (unlikely(!page)) page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); - nc->frag.page = page; + nc->va = page ? page_address(page) : NULL; return page; } @@ -383,19 +388,20 @@ static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, static void *__alloc_page_frag(struct netdev_alloc_cache *nc, unsigned int fragsz, gfp_t gfp_mask) { - struct page *page = nc->frag.page; - unsigned int size; + unsigned int size = PAGE_SIZE; + struct page *page; int offset; - if (unlikely(!page)) { + if (unlikely(!nc->va)) { refill: page = __page_frag_refill(nc, gfp_mask); if (!page) return NULL; - /* if size can vary use frag.size else just use PAGE_SIZE */ - size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE; - +#if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) + /* if size can vary use size else just use PAGE_SIZE */ + size = nc->size; +#endif /* Even if we own the page, we do not use atomic_set(). * This would break get_page_unless_zero() users. */ @@ -404,17 +410,20 @@ refill: /* reset page count bias and offset to start of new frag */ nc->pfmemalloc = page->pfmemalloc; nc->pagecnt_bias = size; - nc->frag.offset = size; + nc->offset = size; } - offset = nc->frag.offset - fragsz; + offset = nc->offset - fragsz; if (unlikely(offset < 0)) { + page = virt_to_page(nc->va); + if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count)) goto refill; - /* if size can vary use frag.size else just use PAGE_SIZE */ - size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE; - +#if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) + /* if size can vary use size else just use PAGE_SIZE */ + size = nc->size; +#endif /* OK, page count is 0, we can safely set it */ atomic_set(&page->_count, size); @@ -424,9 +433,9 @@ refill: } nc->pagecnt_bias--; - nc->frag.offset = offset; + nc->offset = offset; - return page_address(page) + offset; + return nc->va + offset; } static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) -- cgit v1.2.3 From b63ae8ca096dfdbfeef6a209c30a93a966518853 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 6 May 2015 21:11:57 -0700 Subject: mm/net: Rename and move page fragment handling from net/ to mm/ This change moves the __alloc_page_frag functionality out of the networking stack and into the page allocation portion of mm. The idea it so help make this maintainable by placing it with other page allocation functions. Since we are moving it from skbuff.c to page_alloc.c I have also renamed the basic defines and structure from netdev_alloc_cache to page_frag_cache to reflect that this is now part of a different kernel subsystem. I have also added a simple __free_page_frag function which can handle freeing the frags based on the skb->head pointer. The model for this is based off of __free_pages since we don't actually need to deal with all of the cases that put_page handles. I incorporated the virt_to_head_page call and compound_order into the function as it actually allows for a signficant size reduction by reducing code duplication. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/gfp.h | 5 +++ include/linux/mm_types.h | 18 +++++++++ include/linux/skbuff.h | 3 -- mm/page_alloc.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++ net/core/skbuff.c | 100 +++-------------------------------------------- 5 files changed, 127 insertions(+), 97 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 97a9373e61e8..70a7fee1efb3 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -366,6 +366,11 @@ extern void free_pages(unsigned long addr, unsigned int order); extern void free_hot_cold_page(struct page *page, bool cold); extern void free_hot_cold_page_list(struct list_head *list, bool cold); +struct page_frag_cache; +extern void *__alloc_page_frag(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask); +extern void __free_page_frag(void *addr); + extern void __free_kmem_pages(struct page *page, unsigned int order); extern void free_kmem_pages(unsigned long addr, unsigned int order); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8d37e26a1007..0038ac7466fd 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -226,6 +226,24 @@ struct page_frag { #endif }; +#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) +#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) + +struct page_frag_cache { + void * va; +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + __u16 offset; + __u16 size; +#else + __u32 offset; +#endif + /* we maintain a pagecount bias, so that we dont dirty cache line + * containing page->_count every time we allocate a fragment. + */ + unsigned int pagecnt_bias; + bool pfmemalloc; +}; + typedef unsigned long __nocast vm_flags_t; /* diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8b9a2c35a9d7..0039fcc45b3b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2128,9 +2128,6 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } -#define NETDEV_FRAG_PAGE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) -#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(NETDEV_FRAG_PAGE_MAX_SIZE) - void *netdev_alloc_frag(unsigned int fragsz); struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ebffa0e4a9c0..2fd31aebef30 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2966,6 +2966,104 @@ void free_pages(unsigned long addr, unsigned int order) EXPORT_SYMBOL(free_pages); +/* + * Page Fragment: + * An arbitrary-length arbitrary-offset area of memory which resides + * within a 0 or higher order page. Multiple fragments within that page + * are individually refcounted, in the page's reference counter. + * + * The page_frag functions below provide a simple allocation framework for + * page fragments. This is used by the network stack and network device + * drivers to provide a backing region of memory for use as either an + * sk_buff->head, or to be used in the "frags" portion of skb_shared_info. + */ +static struct page *__page_frag_refill(struct page_frag_cache *nc, + gfp_t gfp_mask) +{ + struct page *page = NULL; + gfp_t gfp = gfp_mask; + +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | + __GFP_NOMEMALLOC; + page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, + PAGE_FRAG_CACHE_MAX_ORDER); + nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE; +#endif + if (unlikely(!page)) + page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); + + nc->va = page ? page_address(page) : NULL; + + return page; +} + +void *__alloc_page_frag(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask) +{ + unsigned int size = PAGE_SIZE; + struct page *page; + int offset; + + if (unlikely(!nc->va)) { +refill: + page = __page_frag_refill(nc, gfp_mask); + if (!page) + return NULL; + +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + /* if size can vary use size else just use PAGE_SIZE */ + size = nc->size; +#endif + /* Even if we own the page, we do not use atomic_set(). + * This would break get_page_unless_zero() users. + */ + atomic_add(size - 1, &page->_count); + + /* reset page count bias and offset to start of new frag */ + nc->pfmemalloc = page->pfmemalloc; + nc->pagecnt_bias = size; + nc->offset = size; + } + + offset = nc->offset - fragsz; + if (unlikely(offset < 0)) { + page = virt_to_page(nc->va); + + if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count)) + goto refill; + +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + /* if size can vary use size else just use PAGE_SIZE */ + size = nc->size; +#endif + /* OK, page count is 0, we can safely set it */ + atomic_set(&page->_count, size); + + /* reset page count bias and offset to start of new frag */ + nc->pagecnt_bias = size; + offset = size - fragsz; + } + + nc->pagecnt_bias--; + nc->offset = offset; + + return nc->va + offset; +} +EXPORT_SYMBOL(__alloc_page_frag); + +/* + * Frees a page fragment allocated out of either a compound or order 0 page. + */ +void __free_page_frag(void *addr) +{ + struct page *page = virt_to_head_page(addr); + + if (unlikely(put_page_testzero(page))) + __free_pages_ok(page, compound_order(page)); +} +EXPORT_SYMBOL(__free_page_frag); + /* * alloc_kmem_pages charges newly allocated pages to the kmem resource counter * of the current memory cgroup. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a3062ec341c3..dcc0e07abf47 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -347,100 +347,12 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) } EXPORT_SYMBOL(build_skb); -struct netdev_alloc_cache { - void * va; -#if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) - __u16 offset; - __u16 size; -#else - __u32 offset; -#endif - /* we maintain a pagecount bias, so that we dont dirty cache line - * containing page->_count every time we allocate a fragment. - */ - unsigned int pagecnt_bias; - bool pfmemalloc; -}; -static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); -static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache); - -static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, - gfp_t gfp_mask) -{ - struct page *page = NULL; - gfp_t gfp = gfp_mask; - -#if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) - gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | - __GFP_NOMEMALLOC; - page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, - NETDEV_FRAG_PAGE_MAX_ORDER); - nc->size = page ? NETDEV_FRAG_PAGE_MAX_SIZE : PAGE_SIZE; -#endif - if (unlikely(!page)) - page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); - - nc->va = page ? page_address(page) : NULL; - - return page; -} - -static void *__alloc_page_frag(struct netdev_alloc_cache *nc, - unsigned int fragsz, gfp_t gfp_mask) -{ - unsigned int size = PAGE_SIZE; - struct page *page; - int offset; - - if (unlikely(!nc->va)) { -refill: - page = __page_frag_refill(nc, gfp_mask); - if (!page) - return NULL; - -#if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) - /* if size can vary use size else just use PAGE_SIZE */ - size = nc->size; -#endif - /* Even if we own the page, we do not use atomic_set(). - * This would break get_page_unless_zero() users. - */ - atomic_add(size - 1, &page->_count); - - /* reset page count bias and offset to start of new frag */ - nc->pfmemalloc = page->pfmemalloc; - nc->pagecnt_bias = size; - nc->offset = size; - } - - offset = nc->offset - fragsz; - if (unlikely(offset < 0)) { - page = virt_to_page(nc->va); - - if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count)) - goto refill; - -#if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) - /* if size can vary use size else just use PAGE_SIZE */ - size = nc->size; -#endif - /* OK, page count is 0, we can safely set it */ - atomic_set(&page->_count, size); - - /* reset page count bias and offset to start of new frag */ - nc->pagecnt_bias = size; - offset = size - fragsz; - } - - nc->pagecnt_bias--; - nc->offset = offset; - - return nc->va + offset; -} +static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); +static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache); static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { - struct netdev_alloc_cache *nc; + struct page_frag_cache *nc; unsigned long flags; void *data; @@ -466,7 +378,7 @@ EXPORT_SYMBOL(netdev_alloc_frag); static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { - struct netdev_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); + struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); return __alloc_page_frag(nc, fragsz, gfp_mask); } @@ -493,7 +405,7 @@ EXPORT_SYMBOL(napi_alloc_frag); struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, gfp_t gfp_mask) { - struct netdev_alloc_cache *nc; + struct page_frag_cache *nc; unsigned long flags; struct sk_buff *skb; bool pfmemalloc; @@ -556,7 +468,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb); struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, gfp_t gfp_mask) { - struct netdev_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); + struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); struct sk_buff *skb; void *data; -- cgit v1.2.3 From 181edb2bfa22b50817684135ab6430ed2808abf0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 6 May 2015 21:12:03 -0700 Subject: net: Add skb_free_frag to replace use of put_page in freeing skb->head This change adds a function called skb_free_frag which is meant to compliment the function netdev_alloc_frag. The general idea is to enable a more lightweight version of page freeing since we don't actually need all the overhead of a put_page, and we don't quite fit the model of __free_pages. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ net/core/skbuff.c | 10 ++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0039fcc45b3b..c0b574a414e7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2182,6 +2182,11 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); } +static inline void skb_free_frag(void *addr) +{ + __free_page_frag(addr); +} + void *napi_alloc_frag(unsigned int fragsz); struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int length, gfp_t gfp_mask); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index dcc0e07abf47..d67e612bf0ef 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -436,7 +436,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, skb = __build_skb(data, len); if (unlikely(!skb)) { - put_page(virt_to_head_page(data)); + skb_free_frag(data); return NULL; } @@ -490,7 +490,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, skb = __build_skb(data, len); if (unlikely(!skb)) { - put_page(virt_to_head_page(data)); + skb_free_frag(data); return NULL; } @@ -549,10 +549,12 @@ static void skb_clone_fraglist(struct sk_buff *skb) static void skb_free_head(struct sk_buff *skb) { + unsigned char *head = skb->head; + if (skb->head_frag) - put_page(virt_to_head_page(skb->head)); + skb_free_frag(head); else - kfree(skb->head); + kfree(head); } static void skb_release_data(struct sk_buff *skb) -- cgit v1.2.3 From a3eb95f891d6130b1fc03dd07a8b54cf0a5c8ab8 Mon Sep 17 00:00:00 2001 From: David Ward Date: Sat, 9 May 2015 22:01:46 -0400 Subject: net_sched: gred: add TCA_GRED_LIMIT attribute In a GRED qdisc, if the default "virtual queue" (VQ) does not have drop parameters configured, then packets for the default VQ are not subjected to RED and are only dropped if the queue is larger than the net_device's tx_queue_len. This behavior is useful for WRED mode, since these packets will still influence the calculated average queue length and (therefore) the drop probability for all of the other VQs. However, for some drivers tx_queue_len is zero. In other cases the user may wish to make the limit the same for all VQs (including the default VQ with no drop parameters). This change adds a TCA_GRED_LIMIT attribute to set the GRED queue limit, in bytes, during qdisc setup. (This limit is in bytes to be consistent with the drop parameters.) The default limit is the same as for a bfifo queue (tx_queue_len * psched_mtu). If the drop parameters of any VQ are configured with a smaller limit than the GRED queue limit, that VQ will still observe the smaller limit instead. Signed-off-by: David Ward Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 3 ++- net/sched/sch_gred.c | 28 ++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 69d88b309cc7..8d2530daca9f 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -268,7 +268,8 @@ enum { TCA_GRED_STAB, TCA_GRED_DPS, TCA_GRED_MAX_P, - __TCA_GRED_MAX, + TCA_GRED_LIMIT, + __TCA_GRED_MAX, }; #define TCA_GRED_MAX (__TCA_GRED_MAX - 1) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index a4ca4517cdc8..826e2994152b 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -165,7 +165,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) * if no default DP has been configured. This * allows for DP flows to be left untouched. */ - if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len) + if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= + sch->limit)) return qdisc_enqueue_tail(skb, sch); else goto drop; @@ -397,7 +398,10 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp, q->DP = dp; q->prio = prio; - q->limit = ctl->limit; + if (ctl->limit > sch->limit) + q->limit = sch->limit; + else + q->limit = ctl->limit; if (q->backlog == 0) red_end_of_idle_period(&q->vars); @@ -414,6 +418,7 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = { [TCA_GRED_STAB] = { .len = 256 }, [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) }, [TCA_GRED_MAX_P] = { .type = NLA_U32 }, + [TCA_GRED_LIMIT] = { .type = NLA_U32 }, }; static int gred_change(struct Qdisc *sch, struct nlattr *opt) @@ -433,11 +438,15 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt) if (err < 0) return err; - if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) + if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) { + if (tb[TCA_GRED_LIMIT] != NULL) + sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); return gred_change_table_def(sch, opt); + } if (tb[TCA_GRED_PARMS] == NULL || - tb[TCA_GRED_STAB] == NULL) + tb[TCA_GRED_STAB] == NULL || + tb[TCA_GRED_LIMIT] != NULL) return -EINVAL; max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0; @@ -501,6 +510,14 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) return -EINVAL; + if (tb[TCA_GRED_LIMIT]) + sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); + else { + u32 qlen = qdisc_dev(sch)->tx_queue_len ? : 1; + + sch->limit = qlen * psched_mtu(qdisc_dev(sch)); + } + return gred_change_table_def(sch, tb[TCA_GRED_DPS]); } @@ -531,6 +548,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p)) goto nla_put_failure; + if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit)) + goto nla_put_failure; + parms = nla_nest_start(skb, TCA_GRED_PARMS); if (parms == NULL) goto nla_put_failure; -- cgit v1.2.3 From ebb9a03a590e2325f747be43c8db450e92509501 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 10 May 2015 09:47:46 -0700 Subject: switchdev: s/netdev_switch_/switchdev_/ and s/NETDEV_SWITCH_/SWITCHDEV_/ Turned out that "switchdev" sticks. So just unify all related terms to use this prefix. Signed-off-by: Jiri Pirko Signed-off-by: Scott Feldman Acked-by: Roopa Prabhu Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 4 +- drivers/net/ethernet/rocker/rocker.c | 10 +-- drivers/net/team/team.c | 4 +- include/net/switchdev.h | 111 +++++++++++++++-------------- net/bridge/br.c | 22 +++--- net/bridge/br_netlink.c | 6 +- net/bridge/br_stp.c | 2 +- net/core/net-sysfs.c | 2 +- net/core/rtnetlink.c | 2 +- net/ipv4/fib_trie.c | 40 +++++------ net/switchdev/switchdev.c | 134 +++++++++++++++++------------------ 11 files changed, 166 insertions(+), 171 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2ee13be7551b..a475a0389cc7 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4039,8 +4039,8 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_add_slave = bond_enslave, .ndo_del_slave = bond_release, .ndo_fix_features = bond_fix_features, - .ndo_bridge_setlink = ndo_dflt_netdev_switch_port_bridge_setlink, - .ndo_bridge_dellink = ndo_dflt_netdev_switch_port_bridge_dellink, + .ndo_bridge_setlink = ndo_dflt_switchdev_port_bridge_setlink, + .ndo_bridge_dellink = ndo_dflt_switchdev_port_bridge_dellink, .ndo_features_check = passthru_features_check, }; diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 0c6f0a8b42dd..418f62b19545 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -3377,17 +3377,17 @@ static void rocker_port_fdb_learn_work(struct work_struct *work) container_of(work, struct rocker_fdb_learn_work, work); bool removing = (lw->flags & ROCKER_OP_FLAG_REMOVE); bool learned = (lw->flags & ROCKER_OP_FLAG_LEARNED); - struct netdev_switch_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info; info.addr = lw->addr; info.vid = lw->vid; if (learned && removing) - call_netdev_switch_notifiers(NETDEV_SWITCH_FDB_DEL, - lw->dev, &info.info); + call_switchdev_notifiers(SWITCHDEV_FDB_DEL, + lw->dev, &info.info); else if (learned && !removing) - call_netdev_switch_notifiers(NETDEV_SWITCH_FDB_ADD, - lw->dev, &info.info); + call_switchdev_notifiers(SWITCHDEV_FDB_ADD, + lw->dev, &info.info); kfree(work); } diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 6928448f6b7f..cfe84965afb6 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1977,8 +1977,8 @@ static const struct net_device_ops team_netdev_ops = { .ndo_del_slave = team_del_slave, .ndo_fix_features = team_fix_features, .ndo_change_carrier = team_change_carrier, - .ndo_bridge_setlink = ndo_dflt_netdev_switch_port_bridge_setlink, - .ndo_bridge_dellink = ndo_dflt_netdev_switch_port_bridge_dellink, + .ndo_bridge_setlink = ndo_dflt_switchdev_port_bridge_setlink, + .ndo_bridge_dellink = ndo_dflt_switchdev_port_bridge_dellink, .ndo_features_check = passthru_features_check, }; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d2e69ee3019a..cd921fa0d63a 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -43,124 +43,125 @@ struct swdev_ops { u8 tos, u8 type, u32 tb_id); }; -enum netdev_switch_notifier_type { - NETDEV_SWITCH_FDB_ADD = 1, - NETDEV_SWITCH_FDB_DEL, +enum switchdev_notifier_type { + SWITCHDEV_FDB_ADD = 1, + SWITCHDEV_FDB_DEL, }; -struct netdev_switch_notifier_info { +struct switchdev_notifier_info { struct net_device *dev; }; -struct netdev_switch_notifier_fdb_info { - struct netdev_switch_notifier_info info; /* must be first */ +struct switchdev_notifier_fdb_info { + struct switchdev_notifier_info info; /* must be first */ const unsigned char *addr; u16 vid; }; static inline struct net_device * -netdev_switch_notifier_info_to_dev(const struct netdev_switch_notifier_info *info) +switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info) { return info->dev; } #ifdef CONFIG_NET_SWITCHDEV -int netdev_switch_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid); -int netdev_switch_port_stp_update(struct net_device *dev, u8 state); -int register_netdev_switch_notifier(struct notifier_block *nb); -int unregister_netdev_switch_notifier(struct notifier_block *nb); -int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, - struct netdev_switch_notifier_info *info); -int netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id); -int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); -void netdev_switch_fib_ipv4_abort(struct fib_info *fi); +int switchdev_parent_id_get(struct net_device *dev, + struct netdev_phys_item_id *psid); +int switchdev_port_stp_update(struct net_device *dev, u8 state); +int register_switchdev_notifier(struct notifier_block *nb); +int unregister_switchdev_notifier(struct notifier_block *nb); +int call_switchdev_notifiers(unsigned long val, struct net_device *dev, + struct switchdev_notifier_info *info); +int switchdev_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags); +int switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags); +int ndo_dflt_switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags); +int ndo_dflt_switchdev_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags); +int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 nlflags, u32 tb_id); +int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id); +void switchdev_fib_ipv4_abort(struct fib_info *fi); #else -static inline int netdev_switch_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) +static inline int switchdev_parent_id_get(struct net_device *dev, + struct netdev_phys_item_id *psid) { return -EOPNOTSUPP; } -static inline int netdev_switch_port_stp_update(struct net_device *dev, - u8 state) +static inline int switchdev_port_stp_update(struct net_device *dev, + u8 state) { return -EOPNOTSUPP; } -static inline int register_netdev_switch_notifier(struct notifier_block *nb) +static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; } -static inline int unregister_netdev_switch_notifier(struct notifier_block *nb) +static inline int unregister_switchdev_notifier(struct notifier_block *nb) { return 0; } -static inline int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, - struct netdev_switch_notifier_info *info) +static inline int call_switchdev_notifiers(unsigned long val, + struct net_device *dev, + struct switchdev_notifier_info *info) { return NOTIFY_DONE; } -static inline int netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) +static inline int switchdev_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags) { return -EOPNOTSUPP; } -static inline int netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) +static inline int switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags) { return -EOPNOTSUPP; } -static inline int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) +static inline int ndo_dflt_switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags) { return 0; } -static inline int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) +static inline int ndo_dflt_switchdev_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags) { return 0; } -static inline int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, u32 tb_id) +static inline int switchdev_fib_ipv4_add(u32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 nlflags, u32 tb_id) { return 0; } -static inline int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) +static inline int switchdev_fib_ipv4_del(u32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) { return 0; } -static inline void netdev_switch_fib_ipv4_abort(struct fib_info *fi) +static inline void switchdev_fib_ipv4_abort(struct fib_info *fi) { } diff --git a/net/bridge/br.c b/net/bridge/br.c index 02c24cf63c34..a1abe4936fe1 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -121,13 +121,13 @@ static struct notifier_block br_device_notifier = { .notifier_call = br_device_event }; -static int br_netdev_switch_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int br_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) { - struct net_device *dev = netdev_switch_notifier_info_to_dev(ptr); + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); struct net_bridge_port *p; struct net_bridge *br; - struct netdev_switch_notifier_fdb_info *fdb_info; + struct switchdev_notifier_fdb_info *fdb_info; int err = NOTIFY_DONE; rtnl_lock(); @@ -138,14 +138,14 @@ static int br_netdev_switch_event(struct notifier_block *unused, br = p->br; switch (event) { - case NETDEV_SWITCH_FDB_ADD: + case SWITCHDEV_FDB_ADD: fdb_info = ptr; err = br_fdb_external_learn_add(br, p, fdb_info->addr, fdb_info->vid); if (err) err = notifier_from_errno(err); break; - case NETDEV_SWITCH_FDB_DEL: + case SWITCHDEV_FDB_DEL: fdb_info = ptr; err = br_fdb_external_learn_del(br, p, fdb_info->addr, fdb_info->vid); @@ -159,8 +159,8 @@ out: return err; } -static struct notifier_block br_netdev_switch_notifier = { - .notifier_call = br_netdev_switch_event, +static struct notifier_block br_switchdev_notifier = { + .notifier_call = br_switchdev_event, }; static void __net_exit br_net_exit(struct net *net) @@ -214,7 +214,7 @@ static int __init br_init(void) if (err) goto err_out3; - err = register_netdev_switch_notifier(&br_netdev_switch_notifier); + err = register_switchdev_notifier(&br_switchdev_notifier); if (err) goto err_out4; @@ -235,7 +235,7 @@ static int __init br_init(void) return 0; err_out5: - unregister_netdev_switch_notifier(&br_netdev_switch_notifier); + unregister_switchdev_notifier(&br_switchdev_notifier); err_out4: unregister_netdevice_notifier(&br_device_notifier); err_out3: @@ -253,7 +253,7 @@ static void __exit br_deinit(void) { stp_proto_unregister(&br_stp_proto); br_netlink_fini(); - unregister_netdev_switch_notifier(&br_netdev_switch_notifier); + unregister_switchdev_notifier(&br_switchdev_notifier); unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); unregister_pernet_subsys(&br_net_ops); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 4b5c236998ff..dc234533e204 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -631,8 +631,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) if (p && !(flags & BRIDGE_FLAGS_SELF)) { /* set bridge attributes in hardware if supported */ - ret_offload = netdev_switch_port_bridge_setlink(dev, nlh, - flags); + ret_offload = switchdev_port_bridge_setlink(dev, nlh, flags); if (ret_offload && ret_offload != -EOPNOTSUPP) br_warn(p->br, "error setting attrs on port %u(%s)\n", (unsigned int)p->port_no, p->dev->name); @@ -671,8 +670,7 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) if (p && !(flags & BRIDGE_FLAGS_SELF)) { /* del bridge attributes in hardware */ - ret_offload = netdev_switch_port_bridge_dellink(dev, nlh, - flags); + ret_offload = switchdev_port_bridge_dellink(dev, nlh, flags); if (ret_offload && ret_offload != -EOPNOTSUPP) br_warn(p->br, "error deleting attrs on port %u (%s)\n", (unsigned int)p->port_no, p->dev->name); diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index fb3ebe615513..28e3f4bc01e0 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -42,7 +42,7 @@ void br_set_state(struct net_bridge_port *p, unsigned int state) int err; p->state = state; - err = netdev_switch_port_stp_update(p->dev, state); + err = switchdev_port_stp_update(p->dev, state); if (err && err != -EOPNOTSUPP) br_warn(p->br, "error setting offload STP state on port %u(%s)\n", (unsigned int) p->port_no, p->dev->name); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 4238d6da5c60..be86a7ce9282 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -460,7 +460,7 @@ static ssize_t phys_switch_id_show(struct device *dev, if (dev_isalive(netdev)) { struct netdev_phys_item_id ppid; - ret = netdev_switch_parent_id_get(netdev, &ppid); + ret = switchdev_parent_id_get(netdev, &ppid); if (!ret) ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 83e08323fdcd..fcd41fcd7e70 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1006,7 +1006,7 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) int err; struct netdev_phys_item_id psid; - err = netdev_switch_parent_id_get(dev, &psid); + err = switchdev_parent_id_get(dev, &psid); if (err) { if (err == -EOPNOTSUPP) return 0; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index e13fcc602da2..03444c6ae342 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1165,13 +1165,13 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_state = state & ~FA_S_ACCESSED; new_fa->fa_slen = fa->fa_slen; - err = netdev_switch_fib_ipv4_add(key, plen, fi, - new_fa->fa_tos, - cfg->fc_type, - cfg->fc_nlflags, - tb->tb_id); + err = switchdev_fib_ipv4_add(key, plen, fi, + new_fa->fa_tos, + cfg->fc_type, + cfg->fc_nlflags, + tb->tb_id); if (err) { - netdev_switch_fib_ipv4_abort(fi); + switchdev_fib_ipv4_abort(fi); kmem_cache_free(fn_alias_kmem, new_fa); goto out; } @@ -1215,12 +1215,10 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->tb_id = tb->tb_id; /* (Optionally) offload fib entry to switch hardware. */ - err = netdev_switch_fib_ipv4_add(key, plen, fi, tos, - cfg->fc_type, - cfg->fc_nlflags, - tb->tb_id); + err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, + cfg->fc_nlflags, tb->tb_id); if (err) { - netdev_switch_fib_ipv4_abort(fi); + switchdev_fib_ipv4_abort(fi); goto out_free_new_fa; } @@ -1239,7 +1237,7 @@ succeeded: return 0; out_sw_fib_del: - netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); + switchdev_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: @@ -1517,8 +1515,8 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) if (!fa_to_delete) return -ESRCH; - netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, - cfg->fc_type, tb->tb_id); + switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, + cfg->fc_type, tb->tb_id); rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -1767,10 +1765,9 @@ void fib_table_flush_external(struct fib_table *tb) if (!fi || !(fi->fib_flags & RTNH_F_EXTERNAL)) continue; - netdev_switch_fib_ipv4_del(n->key, - KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, - fa->fa_type, tb->tb_id); + switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, fa->fa_type, + tb->tb_id); } /* update leaf slen */ @@ -1835,10 +1832,9 @@ int fib_table_flush(struct fib_table *tb) continue; } - netdev_switch_fib_ipv4_del(n->key, - KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, - fa->fa_type, tb->tb_id); + switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, fa->fa_type, + tb->tb_id); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 46568b85c333..52613ed49a8c 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -19,14 +19,14 @@ #include /** - * netdev_switch_parent_id_get - Get ID of a switch + * switchdev_parent_id_get - Get ID of a switch * @dev: port device * @psid: switch ID * * Get ID of a switch this port is part of. */ -int netdev_switch_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) +int switchdev_parent_id_get(struct net_device *dev, + struct netdev_phys_item_id *psid) { const struct swdev_ops *ops = dev->swdev_ops; @@ -34,17 +34,17 @@ int netdev_switch_parent_id_get(struct net_device *dev, return -EOPNOTSUPP; return ops->swdev_parent_id_get(dev, psid); } -EXPORT_SYMBOL_GPL(netdev_switch_parent_id_get); +EXPORT_SYMBOL_GPL(switchdev_parent_id_get); /** - * netdev_switch_port_stp_update - Notify switch device port of STP + * switchdev_port_stp_update - Notify switch device port of STP * state change * @dev: port device * @state: port STP state * * Notify switch device port of bridge port STP state change. */ -int netdev_switch_port_stp_update(struct net_device *dev, u8 state) +int switchdev_port_stp_update(struct net_device *dev, u8 state) { const struct swdev_ops *ops = dev->swdev_ops; struct net_device *lower_dev; @@ -55,57 +55,57 @@ int netdev_switch_port_stp_update(struct net_device *dev, u8 state) return ops->swdev_port_stp_update(dev, state); netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = netdev_switch_port_stp_update(lower_dev, state); + err = switchdev_port_stp_update(lower_dev, state); if (err && err != -EOPNOTSUPP) return err; } return err; } -EXPORT_SYMBOL_GPL(netdev_switch_port_stp_update); +EXPORT_SYMBOL_GPL(switchdev_port_stp_update); -static DEFINE_MUTEX(netdev_switch_mutex); -static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain); +static DEFINE_MUTEX(switchdev_mutex); +static RAW_NOTIFIER_HEAD(switchdev_notif_chain); /** - * register_netdev_switch_notifier - Register notifier + * register_switchdev_notifier - Register notifier * @nb: notifier_block * * Register switch device notifier. This should be used by code * which needs to monitor events happening in particular device. * Return values are same as for atomic_notifier_chain_register(). */ -int register_netdev_switch_notifier(struct notifier_block *nb) +int register_switchdev_notifier(struct notifier_block *nb) { int err; - mutex_lock(&netdev_switch_mutex); - err = raw_notifier_chain_register(&netdev_switch_notif_chain, nb); - mutex_unlock(&netdev_switch_mutex); + mutex_lock(&switchdev_mutex); + err = raw_notifier_chain_register(&switchdev_notif_chain, nb); + mutex_unlock(&switchdev_mutex); return err; } -EXPORT_SYMBOL_GPL(register_netdev_switch_notifier); +EXPORT_SYMBOL_GPL(register_switchdev_notifier); /** - * unregister_netdev_switch_notifier - Unregister notifier + * unregister_switchdev_notifier - Unregister notifier * @nb: notifier_block * * Unregister switch device notifier. * Return values are same as for atomic_notifier_chain_unregister(). */ -int unregister_netdev_switch_notifier(struct notifier_block *nb) +int unregister_switchdev_notifier(struct notifier_block *nb) { int err; - mutex_lock(&netdev_switch_mutex); - err = raw_notifier_chain_unregister(&netdev_switch_notif_chain, nb); - mutex_unlock(&netdev_switch_mutex); + mutex_lock(&switchdev_mutex); + err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb); + mutex_unlock(&switchdev_mutex); return err; } -EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier); +EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); /** - * call_netdev_switch_notifiers - Call notifiers + * call_switchdev_notifiers - Call notifiers * @val: value passed unmodified to notifier function * @dev: port device * @info: notifier information data @@ -114,21 +114,21 @@ EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier); * when it needs to propagate hardware event. * Return values are same as for atomic_notifier_call_chain(). */ -int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, - struct netdev_switch_notifier_info *info) +int call_switchdev_notifiers(unsigned long val, struct net_device *dev, + struct switchdev_notifier_info *info) { int err; info->dev = dev; - mutex_lock(&netdev_switch_mutex); - err = raw_notifier_call_chain(&netdev_switch_notif_chain, val, info); - mutex_unlock(&netdev_switch_mutex); + mutex_lock(&switchdev_mutex); + err = raw_notifier_call_chain(&switchdev_notif_chain, val, info); + mutex_unlock(&switchdev_mutex); return err; } -EXPORT_SYMBOL_GPL(call_netdev_switch_notifiers); +EXPORT_SYMBOL_GPL(call_switchdev_notifiers); /** - * netdev_switch_port_bridge_setlink - Notify switch device port of bridge + * switchdev_port_bridge_setlink - Notify switch device port of bridge * port attributes * * @dev: port device @@ -137,8 +137,8 @@ EXPORT_SYMBOL_GPL(call_netdev_switch_notifiers); * * Notify switch device port of bridge port attributes */ -int netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) +int switchdev_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) { const struct net_device_ops *ops = dev->netdev_ops; @@ -150,10 +150,10 @@ int netdev_switch_port_bridge_setlink(struct net_device *dev, return ops->ndo_bridge_setlink(dev, nlh, flags); } -EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_setlink); +EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink); /** - * netdev_switch_port_bridge_dellink - Notify switch device port of bridge + * switchdev_port_bridge_dellink - Notify switch device port of bridge * port attribute delete * * @dev: port device @@ -162,8 +162,8 @@ EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_setlink); * * Notify switch device port of bridge port attribute delete */ -int netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) +int switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) { const struct net_device_ops *ops = dev->netdev_ops; @@ -175,11 +175,11 @@ int netdev_switch_port_bridge_dellink(struct net_device *dev, return ops->ndo_bridge_dellink(dev, nlh, flags); } -EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_dellink); +EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink); /** - * ndo_dflt_netdev_switch_port_bridge_setlink - default ndo bridge setlink - * op for master devices + * ndo_dflt_switchdev_port_bridge_setlink - default ndo bridge setlink + * op for master devices * * @dev: port device * @nlh: netlink msg with bridge port attributes @@ -187,8 +187,8 @@ EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_dellink); * * Notify master device slaves of bridge port attributes */ -int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) +int ndo_dflt_switchdev_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) { struct net_device *lower_dev; struct list_head *iter; @@ -198,18 +198,18 @@ int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, return ret; netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = netdev_switch_port_bridge_setlink(lower_dev, nlh, flags); + err = switchdev_port_bridge_setlink(lower_dev, nlh, flags); if (err && err != -EOPNOTSUPP) ret = err; } return ret; } -EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_setlink); +EXPORT_SYMBOL_GPL(ndo_dflt_switchdev_port_bridge_setlink); /** - * ndo_dflt_netdev_switch_port_bridge_dellink - default ndo bridge dellink - * op for master devices + * ndo_dflt_switchdev_port_bridge_dellink - default ndo bridge dellink + * op for master devices * * @dev: port device * @nlh: netlink msg with bridge port attributes @@ -217,8 +217,8 @@ EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_setlink); * * Notify master device slaves of bridge port attribute deletes */ -int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) +int ndo_dflt_switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) { struct net_device *lower_dev; struct list_head *iter; @@ -228,16 +228,16 @@ int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, return ret; netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = netdev_switch_port_bridge_dellink(lower_dev, nlh, flags); + err = switchdev_port_bridge_dellink(lower_dev, nlh, flags); if (err && err != -EOPNOTSUPP) ret = err; } return ret; } -EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_dellink); +EXPORT_SYMBOL_GPL(ndo_dflt_switchdev_port_bridge_dellink); -static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev) +static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) { const struct swdev_ops *ops = dev->swdev_ops; struct net_device *lower_dev; @@ -253,7 +253,7 @@ static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev) return dev; netdev_for_each_lower_dev(dev, lower_dev, iter) { - port_dev = netdev_switch_get_lowest_dev(lower_dev); + port_dev = switchdev_get_lowest_dev(lower_dev); if (port_dev) return port_dev; } @@ -261,7 +261,7 @@ static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev) return NULL; } -static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi) +static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) { struct netdev_phys_item_id psid; struct netdev_phys_item_id prev_psid; @@ -276,11 +276,11 @@ static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi) if (!nh->nh_dev) return NULL; - dev = netdev_switch_get_lowest_dev(nh->nh_dev); + dev = switchdev_get_lowest_dev(nh->nh_dev); if (!dev) return NULL; - if (netdev_switch_parent_id_get(dev, &psid)) + if (switchdev_parent_id_get(dev, &psid)) return NULL; if (nhsel > 0) { @@ -297,7 +297,7 @@ static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi) } /** - * netdev_switch_fib_ipv4_add - Add IPv4 route entry to switch + * switchdev_fib_ipv4_add - Add IPv4 route entry to switch * * @dst: route's IPv4 destination address * @dst_len: destination address length (prefix length) @@ -309,8 +309,8 @@ static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi) * * Add IPv4 route entry to switch device. */ -int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id) +int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 nlflags, u32 tb_id) { struct net_device *dev; const struct swdev_ops *ops; @@ -328,7 +328,7 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, if (fi->fib_net->ipv4.fib_offload_disabled) return 0; - dev = netdev_switch_get_dev_by_nhs(fi); + dev = switchdev_get_dev_by_nhs(fi); if (!dev) return 0; ops = dev->swdev_ops; @@ -343,10 +343,10 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, return err; } -EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_add); +EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); /** - * netdev_switch_fib_ipv4_del - Delete IPv4 route entry from switch + * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch * * @dst: route's IPv4 destination address * @dst_len: destination address length (prefix length) @@ -357,8 +357,8 @@ EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_add); * * Delete IPv4 route entry from switch device. */ -int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) +int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) { struct net_device *dev; const struct swdev_ops *ops; @@ -367,7 +367,7 @@ int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, if (!(fi->fib_flags & RTNH_F_EXTERNAL)) return 0; - dev = netdev_switch_get_dev_by_nhs(fi); + dev = switchdev_get_dev_by_nhs(fi); if (!dev) return 0; ops = dev->swdev_ops; @@ -381,14 +381,14 @@ int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, return err; } -EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_del); +EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del); /** - * netdev_switch_fib_ipv4_abort - Abort an IPv4 FIB operation + * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation * * @fi: route FIB info structure */ -void netdev_switch_fib_ipv4_abort(struct fib_info *fi) +void switchdev_fib_ipv4_abort(struct fib_info *fi) { /* There was a problem installing this route to the offload * device. For now, until we come up with more refined @@ -401,4 +401,4 @@ void netdev_switch_fib_ipv4_abort(struct fib_info *fi) fib_flush_external(fi->fib_net); fi->fib_net->ipv4.fib_offload_disabled = true; } -EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_abort); +EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); -- cgit v1.2.3 From 9d47c0a2d958e06322c88245749278633d333cca Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 10 May 2015 09:47:47 -0700 Subject: switchdev: s/swdev_/switchdev_/ Turned out that "switchdev" sticks. So just unify all related terms to use this prefix. Signed-off-by: Jiri Pirko Signed-off-by: Scott Feldman Acked-by: Roopa Prabhu Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 37 +++++++++++++++++---------------- include/linux/netdevice.h | 2 +- include/net/switchdev.h | 30 +++++++++++++-------------- net/dsa/slave.c | 8 ++++---- net/switchdev/switchdev.c | 40 ++++++++++++++++++------------------ 5 files changed, 59 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 418f62b19545..7473874daf1f 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4221,8 +4221,8 @@ static const struct net_device_ops rocker_port_netdev_ops = { * swdev interface ********************/ -static int rocker_port_swdev_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) +static int rocker_port_switchdev_parent_id_get(struct net_device *dev, + struct netdev_phys_item_id *psid) { struct rocker_port *rocker_port = netdev_priv(dev); struct rocker *rocker = rocker_port->rocker; @@ -4232,18 +4232,19 @@ static int rocker_port_swdev_parent_id_get(struct net_device *dev, return 0; } -static int rocker_port_swdev_port_stp_update(struct net_device *dev, u8 state) +static int rocker_port_switchdev_port_stp_update(struct net_device *dev, + u8 state) { struct rocker_port *rocker_port = netdev_priv(dev); return rocker_port_stp_update(rocker_port, state); } -static int rocker_port_swdev_fib_ipv4_add(struct net_device *dev, - __be32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, u32 tb_id) +static int rocker_port_switchdev_fib_ipv4_add(struct net_device *dev, + __be32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 nlflags, u32 tb_id) { struct rocker_port *rocker_port = netdev_priv(dev); int flags = 0; @@ -4252,10 +4253,10 @@ static int rocker_port_swdev_fib_ipv4_add(struct net_device *dev, fi, tb_id, flags); } -static int rocker_port_swdev_fib_ipv4_del(struct net_device *dev, - __be32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) +static int rocker_port_switchdev_fib_ipv4_del(struct net_device *dev, + __be32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) { struct rocker_port *rocker_port = netdev_priv(dev); int flags = ROCKER_OP_FLAG_REMOVE; @@ -4264,11 +4265,11 @@ static int rocker_port_swdev_fib_ipv4_del(struct net_device *dev, fi, tb_id, flags); } -static const struct swdev_ops rocker_port_swdev_ops = { - .swdev_parent_id_get = rocker_port_swdev_parent_id_get, - .swdev_port_stp_update = rocker_port_swdev_port_stp_update, - .swdev_fib_ipv4_add = rocker_port_swdev_fib_ipv4_add, - .swdev_fib_ipv4_del = rocker_port_swdev_fib_ipv4_del, +static const struct switchdev_ops rocker_port_switchdev_ops = { + .switchdev_parent_id_get = rocker_port_switchdev_parent_id_get, + .switchdev_port_stp_update = rocker_port_switchdev_port_stp_update, + .switchdev_fib_ipv4_add = rocker_port_switchdev_fib_ipv4_add, + .switchdev_fib_ipv4_del = rocker_port_switchdev_fib_ipv4_del, }; /******************** @@ -4623,7 +4624,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) rocker_port_dev_addr_init(rocker, rocker_port); dev->netdev_ops = &rocker_port_netdev_ops; dev->ethtool_ops = &rocker_port_ethtool_ops; - dev->swdev_ops = &rocker_port_swdev_ops; + dev->switchdev_ops = &rocker_port_switchdev_ops; netif_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx, NAPI_POLL_WEIGHT); netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a6d706b2a947..2b39235b9f13 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1567,7 +1567,7 @@ struct net_device { const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; #ifdef CONFIG_NET_SWITCHDEV - const struct swdev_ops *swdev_ops; + const struct switchdev_ops *switchdev_ops; #endif const struct header_ops *header_ops; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index cd921fa0d63a..97b556daeef7 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -19,28 +19,28 @@ struct fib_info; /** * struct switchdev_ops - switchdev operations * - * @swdev_parent_id_get: Called to get an ID of the switch chip this port + * @switchdev_parent_id_get: Called to get an ID of the switch chip this port * is part of. If driver implements this, it indicates that it * represents a port of a switch chip. * - * @swdev_port_stp_update: Called to notify switch device port of bridge + * @switchdev_port_stp_update: Called to notify switch device port of bridge * port STP state change. * - * @swdev_fib_ipv4_add: Called to add/modify IPv4 route to switch device. + * @switchdev_fib_ipv4_add: Called to add/modify IPv4 route to switch device. * - * @swdev_fib_ipv4_del: Called to delete IPv4 route from switch device. + * @switchdev_fib_ipv4_del: Called to delete IPv4 route from switch device. */ -struct swdev_ops { - int (*swdev_parent_id_get)(struct net_device *dev, - struct netdev_phys_item_id *psid); - int (*swdev_port_stp_update)(struct net_device *dev, u8 state); - int (*swdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, - u32 tb_id); - int (*swdev_fib_ipv4_del)(struct net_device *dev, __be32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); +struct switchdev_ops { + int (*switchdev_parent_id_get)(struct net_device *dev, + struct netdev_phys_item_id *psid); + int (*switchdev_port_stp_update)(struct net_device *dev, u8 state); + int (*switchdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, + int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 nlflags, + u32 tb_id); + int (*switchdev_fib_ipv4_del)(struct net_device *dev, __be32 dst, + int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id); }; enum switchdev_notifier_type { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 03e041addea3..1546acf6ebd3 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -675,9 +675,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_get_iflink = dsa_slave_get_iflink, }; -static const struct swdev_ops dsa_slave_swdev_ops = { - .swdev_parent_id_get = dsa_slave_parent_id_get, - .swdev_port_stp_update = dsa_slave_stp_update, +static const struct switchdev_ops dsa_slave_switchdev_ops = { + .switchdev_parent_id_get = dsa_slave_parent_id_get, + .switchdev_port_stp_update = dsa_slave_stp_update, }; static void dsa_slave_adjust_link(struct net_device *dev) @@ -866,7 +866,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, eth_hw_addr_inherit(slave_dev, master); slave_dev->tx_queue_len = 0; slave_dev->netdev_ops = &dsa_slave_netdev_ops; - slave_dev->swdev_ops = &dsa_slave_swdev_ops; + slave_dev->switchdev_ops = &dsa_slave_switchdev_ops; netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, NULL); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 52613ed49a8c..b7f44a23def5 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -28,11 +28,11 @@ int switchdev_parent_id_get(struct net_device *dev, struct netdev_phys_item_id *psid) { - const struct swdev_ops *ops = dev->swdev_ops; + const struct switchdev_ops *ops = dev->switchdev_ops; - if (!ops || !ops->swdev_parent_id_get) + if (!ops || !ops->switchdev_parent_id_get) return -EOPNOTSUPP; - return ops->swdev_parent_id_get(dev, psid); + return ops->switchdev_parent_id_get(dev, psid); } EXPORT_SYMBOL_GPL(switchdev_parent_id_get); @@ -46,13 +46,13 @@ EXPORT_SYMBOL_GPL(switchdev_parent_id_get); */ int switchdev_port_stp_update(struct net_device *dev, u8 state) { - const struct swdev_ops *ops = dev->swdev_ops; + const struct switchdev_ops *ops = dev->switchdev_ops; struct net_device *lower_dev; struct list_head *iter; int err = -EOPNOTSUPP; - if (ops && ops->swdev_port_stp_update) - return ops->swdev_port_stp_update(dev, state); + if (ops && ops->switchdev_port_stp_update) + return ops->switchdev_port_stp_update(dev, state); netdev_for_each_lower_dev(dev, lower_dev, iter) { err = switchdev_port_stp_update(lower_dev, state); @@ -239,17 +239,17 @@ EXPORT_SYMBOL_GPL(ndo_dflt_switchdev_port_bridge_dellink); static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) { - const struct swdev_ops *ops = dev->swdev_ops; + const struct switchdev_ops *ops = dev->switchdev_ops; struct net_device *lower_dev; struct net_device *port_dev; struct list_head *iter; /* Recusively search down until we find a sw port dev. - * (A sw port dev supports swdev_parent_id_get). + * (A sw port dev supports switchdev_parent_id_get). */ if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD && - ops && ops->swdev_parent_id_get) + ops && ops->switchdev_parent_id_get) return dev; netdev_for_each_lower_dev(dev, lower_dev, iter) { @@ -313,7 +313,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 nlflags, u32 tb_id) { struct net_device *dev; - const struct swdev_ops *ops; + const struct switchdev_ops *ops; int err = 0; /* Don't offload route if using custom ip rules or if @@ -331,12 +331,12 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, dev = switchdev_get_dev_by_nhs(fi); if (!dev) return 0; - ops = dev->swdev_ops; + ops = dev->switchdev_ops; - if (ops->swdev_fib_ipv4_add) { - err = ops->swdev_fib_ipv4_add(dev, htonl(dst), dst_len, - fi, tos, type, nlflags, - tb_id); + if (ops->switchdev_fib_ipv4_add) { + err = ops->switchdev_fib_ipv4_add(dev, htonl(dst), dst_len, + fi, tos, type, nlflags, + tb_id); if (!err) fi->fib_flags |= RTNH_F_EXTERNAL; } @@ -361,7 +361,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 tb_id) { struct net_device *dev; - const struct swdev_ops *ops; + const struct switchdev_ops *ops; int err = 0; if (!(fi->fib_flags & RTNH_F_EXTERNAL)) @@ -370,11 +370,11 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, dev = switchdev_get_dev_by_nhs(fi); if (!dev) return 0; - ops = dev->swdev_ops; + ops = dev->switchdev_ops; - if (ops->swdev_fib_ipv4_del) { - err = ops->swdev_fib_ipv4_del(dev, htonl(dst), dst_len, - fi, tos, type, tb_id); + if (ops->switchdev_fib_ipv4_del) { + err = ops->switchdev_fib_ipv4_del(dev, htonl(dst), dst_len, + fi, tos, type, tb_id); if (!err) fi->fib_flags &= ~RTNH_F_EXTERNAL; } -- cgit v1.2.3 From 3094333d9089d43e8b8f0418676fa6ae06c27b51 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:47:48 -0700 Subject: switchdev: introduce get/set attrs ops Add two new swdev ops for get/set switch port attributes. Most swdev interactions on a port are gets or sets on port attributes, so rather than adding ops for each attribute, let's define clean get/set ops for all attributes, and then we can have clear, consistent rules on how attributes propagate on stacked devs. Add the basic algorithms for get/set attr ops. Use the same recusive algo to walk lower devs we've used for STP updates, for example. For get, compare attr value for each lower dev and only return success if attr values match across all lower devs. For sets, set the same attr value for all lower devs. We'll use a two-phase prepare-commit transaction model for sets. In the first phase, the driver(s) are asked if attr set is OK. If all OK, the commit attr set in second phase. A driver would NACK the prepare phase if it can't set the attr due to lack of resources or support, within it's control. RTNL lock must be held across both phases because we'll recurse all lower devs first in prepare phase, and then recurse all lower devs again in commit phase. If any lower dev fails the prepare phase, we need to abort the transaction for all lower devs. If lower dev recusion isn't desired, allow a flag SWITCHDEV_F_NO_RECURSE to indicate get/set only work on port (lowest) device. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 43 ++++++++++++ net/switchdev/switchdev.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 212 insertions(+) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 97b556daeef7..282043844bcf 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -14,6 +14,25 @@ #include #include +#define SWITCHDEV_F_NO_RECURSE BIT(0) + +enum switchdev_trans { + SWITCHDEV_TRANS_NONE, + SWITCHDEV_TRANS_PREPARE, + SWITCHDEV_TRANS_ABORT, + SWITCHDEV_TRANS_COMMIT, +}; + +enum switchdev_attr_id { + SWITCHDEV_ATTR_UNDEFINED, +}; + +struct switchdev_attr { + enum switchdev_attr_id id; + enum switchdev_trans trans; + u32 flags; +}; + struct fib_info; /** @@ -23,6 +42,10 @@ struct fib_info; * is part of. If driver implements this, it indicates that it * represents a port of a switch chip. * + * @switchdev_port_attr_get: Get a port attribute (see switchdev_attr). + * + * @switchdev_port_attr_set: Set a port attribute (see switchdev_attr). + * * @switchdev_port_stp_update: Called to notify switch device port of bridge * port STP state change. * @@ -33,6 +56,10 @@ struct fib_info; struct switchdev_ops { int (*switchdev_parent_id_get)(struct net_device *dev, struct netdev_phys_item_id *psid); + int (*switchdev_port_attr_get)(struct net_device *dev, + struct switchdev_attr *attr); + int (*switchdev_port_attr_set)(struct net_device *dev, + struct switchdev_attr *attr); int (*switchdev_port_stp_update)(struct net_device *dev, u8 state); int (*switchdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, int dst_len, struct fib_info *fi, @@ -68,6 +95,10 @@ switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info) int switchdev_parent_id_get(struct net_device *dev, struct netdev_phys_item_id *psid); +int switchdev_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr); +int switchdev_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr); int switchdev_port_stp_update(struct net_device *dev, u8 state); int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); @@ -95,6 +126,18 @@ static inline int switchdev_parent_id_get(struct net_device *dev, return -EOPNOTSUPP; } +static inline int switchdev_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) +{ + return -EOPNOTSUPP; +} + +static inline int switchdev_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr) +{ + return -EOPNOTSUPP; +} + static inline int switchdev_port_stp_update(struct net_device *dev, u8 state) { diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index b7f44a23def5..8f47187dc185 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -36,6 +36,175 @@ int switchdev_parent_id_get(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_parent_id_get); +/** + * switchdev_port_attr_get - Get port attribute + * + * @dev: port device + * @attr: attribute to get + */ +int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) +{ + const struct switchdev_ops *ops = dev->switchdev_ops; + struct net_device *lower_dev; + struct list_head *iter; + struct switchdev_attr first = { + .id = SWITCHDEV_ATTR_UNDEFINED + }; + int err = -EOPNOTSUPP; + + if (ops && ops->switchdev_port_attr_get) + return ops->switchdev_port_attr_get(dev, attr); + + if (attr->flags & SWITCHDEV_F_NO_RECURSE) + return err; + + /* Switch device port(s) may be stacked under + * bond/team/vlan dev, so recurse down to get attr on + * each port. Return -ENODATA if attr values don't + * compare across ports. + */ + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = switchdev_port_attr_get(lower_dev, attr); + if (err) + break; + if (first.id == SWITCHDEV_ATTR_UNDEFINED) + first = *attr; + else if (memcmp(&first, attr, sizeof(*attr))) + return -ENODATA; + } + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_port_attr_get); + +static int __switchdev_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr) +{ + const struct switchdev_ops *ops = dev->switchdev_ops; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (ops && ops->switchdev_port_attr_set) + return ops->switchdev_port_attr_set(dev, attr); + + if (attr->flags & SWITCHDEV_F_NO_RECURSE) + return err; + + /* Switch device port(s) may be stacked under + * bond/team/vlan dev, so recurse down to set attr on + * each port. + */ + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = __switchdev_port_attr_set(lower_dev, attr); + if (err) + break; + } + + return err; +} + +struct switchdev_attr_set_work { + struct work_struct work; + struct net_device *dev; + struct switchdev_attr attr; +}; + +static void switchdev_port_attr_set_work(struct work_struct *work) +{ + struct switchdev_attr_set_work *asw = + container_of(work, struct switchdev_attr_set_work, work); + int err; + + rtnl_lock(); + err = switchdev_port_attr_set(asw->dev, &asw->attr); + BUG_ON(err); + rtnl_unlock(); + + dev_put(asw->dev); + kfree(work); +} + +static int switchdev_port_attr_set_defer(struct net_device *dev, + struct switchdev_attr *attr) +{ + struct switchdev_attr_set_work *asw; + + asw = kmalloc(sizeof(*asw), GFP_ATOMIC); + if (!asw) + return -ENOMEM; + + INIT_WORK(&asw->work, switchdev_port_attr_set_work); + + dev_hold(dev); + asw->dev = dev; + memcpy(&asw->attr, attr, sizeof(asw->attr)); + + schedule_work(&asw->work); + + return 0; +} + +/** + * switchdev_port_attr_set - Set port attribute + * + * @dev: port device + * @attr: attribute to set + * + * Use a 2-phase prepare-commit transaction model to ensure + * system is not left in a partially updated state due to + * failure from driver/device. + */ +int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) +{ + int err; + + if (!rtnl_is_locked()) { + /* Running prepare-commit transaction across stacked + * devices requires nothing moves, so if rtnl_lock is + * not held, schedule a worker thread to hold rtnl_lock + * while setting attr. + */ + + return switchdev_port_attr_set_defer(dev, attr); + } + + /* Phase I: prepare for attr set. Driver/device should fail + * here if there are going to be issues in the commit phase, + * such as lack of resources or support. The driver/device + * should reserve resources needed for the commit phase here, + * but should not commit the attr. + */ + + attr->trans = SWITCHDEV_TRANS_PREPARE; + err = __switchdev_port_attr_set(dev, attr); + if (err) { + /* Prepare phase failed: abort the transaction. Any + * resources reserved in the prepare phase are + * released. + */ + + attr->trans = SWITCHDEV_TRANS_ABORT; + __switchdev_port_attr_set(dev, attr); + + return err; + } + + /* Phase II: commit attr set. This cannot fail as a fault + * of driver/device. If it does, it's a bug in the driver/device + * because the driver said everythings was OK in phase I. + */ + + attr->trans = SWITCHDEV_TRANS_COMMIT; + err = __switchdev_port_attr_set(dev, attr); + BUG_ON(err); + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_port_attr_set); + /** * switchdev_port_stp_update - Notify switch device port of STP * state change -- cgit v1.2.3 From f8e20a9f87d33865cc1d67f13da0db8d457fc3c9 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:47:49 -0700 Subject: switchdev: convert parent_id_get to switchdev attr get Switch ID is just a gettable port attribute. Convert switchdev op switchdev_parent_id_get to a switchdev attr. Note: for sysfs and netlink interfaces, SWITCHDEV_ATTR_PORT_PARENT_ID is called with SWITCHDEV_F_NO_RECUSE to limit switch ID user-visiblity to only port netdevs. So when a port is stacked under bond/bridge, the user can only query switch id via the switch ports, but not via the upper devices Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 17 +++++++++++----- include/net/switchdev.h | 18 ++++------------- net/core/net-sysfs.c | 10 +++++++--- net/core/rtnetlink.c | 9 ++++++--- net/dsa/slave.c | 16 ++++++++++----- net/switchdev/switchdev.c | 38 +++++++++++------------------------- 6 files changed, 51 insertions(+), 57 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 7473874daf1f..24282991d9fd 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4221,14 +4221,21 @@ static const struct net_device_ops rocker_port_netdev_ops = { * swdev interface ********************/ -static int rocker_port_switchdev_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) +static int rocker_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) { struct rocker_port *rocker_port = netdev_priv(dev); struct rocker *rocker = rocker_port->rocker; - psid->id_len = sizeof(rocker->hw.id); - memcpy(&psid->id, &rocker->hw.id, psid->id_len); + switch (attr->id) { + case SWITCHDEV_ATTR_PORT_PARENT_ID: + attr->ppid.id_len = sizeof(rocker->hw.id); + memcpy(&attr->ppid.id, &rocker->hw.id, attr->ppid.id_len); + break; + default: + return -EOPNOTSUPP; + } + return 0; } @@ -4266,7 +4273,7 @@ static int rocker_port_switchdev_fib_ipv4_del(struct net_device *dev, } static const struct switchdev_ops rocker_port_switchdev_ops = { - .switchdev_parent_id_get = rocker_port_switchdev_parent_id_get, + .switchdev_port_attr_get = rocker_port_attr_get, .switchdev_port_stp_update = rocker_port_switchdev_port_stp_update, .switchdev_fib_ipv4_add = rocker_port_switchdev_fib_ipv4_add, .switchdev_fib_ipv4_del = rocker_port_switchdev_fib_ipv4_del, diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 282043844bcf..93316e7ab372 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -25,12 +25,16 @@ enum switchdev_trans { enum switchdev_attr_id { SWITCHDEV_ATTR_UNDEFINED, + SWITCHDEV_ATTR_PORT_PARENT_ID, }; struct switchdev_attr { enum switchdev_attr_id id; enum switchdev_trans trans; u32 flags; + union { + struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ + }; }; struct fib_info; @@ -38,10 +42,6 @@ struct fib_info; /** * struct switchdev_ops - switchdev operations * - * @switchdev_parent_id_get: Called to get an ID of the switch chip this port - * is part of. If driver implements this, it indicates that it - * represents a port of a switch chip. - * * @switchdev_port_attr_get: Get a port attribute (see switchdev_attr). * * @switchdev_port_attr_set: Set a port attribute (see switchdev_attr). @@ -54,8 +54,6 @@ struct fib_info; * @switchdev_fib_ipv4_del: Called to delete IPv4 route from switch device. */ struct switchdev_ops { - int (*switchdev_parent_id_get)(struct net_device *dev, - struct netdev_phys_item_id *psid); int (*switchdev_port_attr_get)(struct net_device *dev, struct switchdev_attr *attr); int (*switchdev_port_attr_set)(struct net_device *dev, @@ -93,8 +91,6 @@ switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info) #ifdef CONFIG_NET_SWITCHDEV -int switchdev_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid); int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr); int switchdev_port_attr_set(struct net_device *dev, @@ -120,12 +116,6 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi); #else -static inline int switchdev_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) -{ - return -EOPNOTSUPP; -} - static inline int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index be86a7ce9282..5a9ce96f6d27 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -458,11 +458,15 @@ static ssize_t phys_switch_id_show(struct device *dev, return restart_syscall(); if (dev_isalive(netdev)) { - struct netdev_phys_item_id ppid; + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .flags = SWITCHDEV_F_NO_RECURSE, + }; - ret = switchdev_parent_id_get(netdev, &ppid); + ret = switchdev_port_attr_get(netdev, &attr); if (!ret) - ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id); + ret = sprintf(buf, "%*phN\n", attr.ppid.id_len, + attr.ppid.id); } rtnl_unlock(); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fcd41fcd7e70..c6c6b2c34926 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1004,16 +1004,19 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { int err; - struct netdev_phys_item_id psid; + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .flags = SWITCHDEV_F_NO_RECURSE, + }; - err = switchdev_parent_id_get(dev, &psid); + err = switchdev_port_attr_get(dev, &attr); if (err) { if (err == -EOPNOTSUPP) return 0; return err; } - if (nla_put(skb, IFLA_PHYS_SWITCH_ID, psid.id_len, psid.id)) + if (nla_put(skb, IFLA_PHYS_SWITCH_ID, attr.ppid.id_len, attr.ppid.id)) return -EMSGSIZE; return 0; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 1546acf6ebd3..de705b674ac9 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -382,14 +382,20 @@ static int dsa_slave_bridge_port_leave(struct net_device *dev) return ret; } -static int dsa_slave_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) +static int dsa_slave_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - psid->id_len = sizeof(ds->index); - memcpy(&psid->id, &ds->index, psid->id_len); + switch (attr->id) { + case SWITCHDEV_ATTR_PORT_PARENT_ID: + attr->ppid.id_len = sizeof(ds->index); + memcpy(&attr->ppid.id, &ds->index, attr->ppid.id_len); + break; + default: + return -EOPNOTSUPP; + } return 0; } @@ -676,7 +682,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = { }; static const struct switchdev_ops dsa_slave_switchdev_ops = { - .switchdev_parent_id_get = dsa_slave_parent_id_get, + .switchdev_port_attr_get = dsa_slave_port_attr_get, .switchdev_port_stp_update = dsa_slave_stp_update, }; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 8f47187dc185..117fd0797abd 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -18,24 +18,6 @@ #include #include -/** - * switchdev_parent_id_get - Get ID of a switch - * @dev: port device - * @psid: switch ID - * - * Get ID of a switch this port is part of. - */ -int switchdev_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) -{ - const struct switchdev_ops *ops = dev->switchdev_ops; - - if (!ops || !ops->switchdev_parent_id_get) - return -EOPNOTSUPP; - return ops->switchdev_parent_id_get(dev, psid); -} -EXPORT_SYMBOL_GPL(switchdev_parent_id_get); - /** * switchdev_port_attr_get - Get port attribute * @@ -414,11 +396,10 @@ static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) struct list_head *iter; /* Recusively search down until we find a sw port dev. - * (A sw port dev supports switchdev_parent_id_get). + * (A sw port dev supports switchdev_port_attr_get). */ - if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD && - ops && ops->switchdev_parent_id_get) + if (ops && ops->switchdev_port_attr_get) return dev; netdev_for_each_lower_dev(dev, lower_dev, iter) { @@ -432,8 +413,10 @@ static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) { - struct netdev_phys_item_id psid; - struct netdev_phys_item_id prev_psid; + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + }; + struct switchdev_attr prev_attr; struct net_device *dev = NULL; int nhsel; @@ -449,17 +432,18 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) if (!dev) return NULL; - if (switchdev_parent_id_get(dev, &psid)) + if (switchdev_port_attr_get(dev, &attr)) return NULL; if (nhsel > 0) { - if (prev_psid.id_len != psid.id_len) + if (prev_attr.ppid.id_len != attr.ppid.id_len) return NULL; - if (memcmp(prev_psid.id, psid.id, psid.id_len)) + if (memcmp(prev_attr.ppid.id, attr.ppid.id, + attr.ppid.id_len)) return NULL; } - prev_psid = psid; + prev_attr = attr; } return dev; -- cgit v1.2.3 From 3563606258cf3b8f02eabddb1cb45a94c44d9611 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:47:51 -0700 Subject: switchdev: convert STP update to switchdev attr set STP update is just a settable port attribute, so convert switchdev_port_stp_update to an attr set. For DSA, the prepare phase is skipped and STP updates are only done in the commit phase. This is because currently the DSA drivers don't need to allocate any memory for STP updates and the STP update will not fail to HW (unless something horrible goes wrong on the MDIO bus, in which case the prepare phase wouldn't have been able to predict anyway). Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 14 +++++--------- include/net/switchdev.h | 13 ++----------- net/bridge/br_stp.c | 6 +++++- net/dsa/slave.c | 20 +++++++++++++++++++- net/switchdev/switchdev.c | 28 ---------------------------- 5 files changed, 31 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index fa0fa545c0d1..36827d39a70a 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4395,14 +4395,6 @@ static int rocker_port_attr_get(struct net_device *dev, return 0; } -static int rocker_port_switchdev_port_stp_update(struct net_device *dev, - u8 state) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - - return rocker_port_stp_update(rocker_port, SWITCHDEV_TRANS_NONE, state); -} - static void rocker_port_trans_abort(struct rocker_port *rocker_port) { struct list_head *mem, *tmp; @@ -4431,6 +4423,10 @@ static int rocker_port_attr_set(struct net_device *dev, } switch (attr->id) { + case SWITCHDEV_ATTR_PORT_STP_STATE: + err = rocker_port_stp_update(rocker_port, attr->trans, + attr->stp_state); + break; default: err = -EOPNOTSUPP; break; @@ -4466,7 +4462,7 @@ static int rocker_port_switchdev_fib_ipv4_del(struct net_device *dev, static const struct switchdev_ops rocker_port_switchdev_ops = { .switchdev_port_attr_get = rocker_port_attr_get, - .switchdev_port_stp_update = rocker_port_switchdev_port_stp_update, + .switchdev_port_attr_set = rocker_port_attr_set, .switchdev_fib_ipv4_add = rocker_port_switchdev_fib_ipv4_add, .switchdev_fib_ipv4_del = rocker_port_switchdev_fib_ipv4_del, }; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 93316e7ab372..aec5e49661a2 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -26,6 +26,7 @@ enum switchdev_trans { enum switchdev_attr_id { SWITCHDEV_ATTR_UNDEFINED, SWITCHDEV_ATTR_PORT_PARENT_ID, + SWITCHDEV_ATTR_PORT_STP_STATE, }; struct switchdev_attr { @@ -34,6 +35,7 @@ struct switchdev_attr { u32 flags; union { struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ + u8 stp_state; /* PORT_STP_STATE */ }; }; @@ -46,9 +48,6 @@ struct fib_info; * * @switchdev_port_attr_set: Set a port attribute (see switchdev_attr). * - * @switchdev_port_stp_update: Called to notify switch device port of bridge - * port STP state change. - * * @switchdev_fib_ipv4_add: Called to add/modify IPv4 route to switch device. * * @switchdev_fib_ipv4_del: Called to delete IPv4 route from switch device. @@ -58,7 +57,6 @@ struct switchdev_ops { struct switchdev_attr *attr); int (*switchdev_port_attr_set)(struct net_device *dev, struct switchdev_attr *attr); - int (*switchdev_port_stp_update)(struct net_device *dev, u8 state); int (*switchdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 nlflags, @@ -95,7 +93,6 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr); int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr); -int switchdev_port_stp_update(struct net_device *dev, u8 state); int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); int call_switchdev_notifiers(unsigned long val, struct net_device *dev, @@ -128,12 +125,6 @@ static inline int switchdev_port_attr_set(struct net_device *dev, return -EOPNOTSUPP; } -static inline int switchdev_port_stp_update(struct net_device *dev, - u8 state) -{ - return -EOPNOTSUPP; -} - static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 28e3f4bc01e0..b9300da31565 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -39,10 +39,14 @@ void br_log_state(const struct net_bridge_port *p) void br_set_state(struct net_bridge_port *p, unsigned int state) { + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_STP_STATE, + .stp_state = state, + }; int err; p->state = state; - err = switchdev_port_stp_update(p->dev, state); + err = switchdev_port_attr_set(p->dev, &attr); if (err && err != -EOPNOTSUPP) br_warn(p->br, "error setting offload STP state on port %u(%s)\n", (unsigned int) p->port_no, p->dev->name); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index de705b674ac9..3fb5210e318c 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -345,6 +345,24 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state) return ret; } +static int dsa_slave_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr) +{ + int ret = 0; + + switch (attr->id) { + case SWITCHDEV_ATTR_PORT_STP_STATE: + if (attr->trans == SWITCHDEV_TRANS_COMMIT) + ret = dsa_slave_stp_update(dev, attr->stp_state); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + return ret; +} + static int dsa_slave_bridge_port_join(struct net_device *dev, struct net_device *br) { @@ -683,7 +701,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = { static const struct switchdev_ops dsa_slave_switchdev_ops = { .switchdev_port_attr_get = dsa_slave_port_attr_get, - .switchdev_port_stp_update = dsa_slave_stp_update, + .switchdev_port_attr_set = dsa_slave_port_attr_set, }; static void dsa_slave_adjust_link(struct net_device *dev) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 117fd0797abd..a3c359004902 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -187,34 +187,6 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) } EXPORT_SYMBOL_GPL(switchdev_port_attr_set); -/** - * switchdev_port_stp_update - Notify switch device port of STP - * state change - * @dev: port device - * @state: port STP state - * - * Notify switch device port of bridge port STP state change. - */ -int switchdev_port_stp_update(struct net_device *dev, u8 state) -{ - const struct switchdev_ops *ops = dev->switchdev_ops; - struct net_device *lower_dev; - struct list_head *iter; - int err = -EOPNOTSUPP; - - if (ops && ops->switchdev_port_stp_update) - return ops->switchdev_port_stp_update(dev, state); - - netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = switchdev_port_stp_update(lower_dev, state); - if (err && err != -EOPNOTSUPP) - return err; - } - - return err; -} -EXPORT_SYMBOL_GPL(switchdev_port_stp_update); - static DEFINE_MUTEX(switchdev_mutex); static RAW_NOTIFIER_HEAD(switchdev_notif_chain); -- cgit v1.2.3 From 491d0f1533ac750260406dbf84cdad44fd3d8a29 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:47:52 -0700 Subject: switchdev: introduce switchdev add/del obj ops Like switchdev attr get/set, add new switchdev obj add/del. switchdev objs will be things like VLANs or FIB entries, so add/del fits better for objects than get/set used for attributes. Use same two-phase prepare-commit transaction model as in attr set. Signed-off-by: Scott Feldman Acked-by: Sridhar Samudrala Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 31 ++++++++++++++ net/switchdev/switchdev.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index aec5e49661a2..4f43300df72c 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -41,6 +41,15 @@ struct switchdev_attr { struct fib_info; +enum switchdev_obj_id { + SWITCHDEV_OBJ_UNDEFINED, +}; + +struct switchdev_obj { + enum switchdev_obj_id id; + enum switchdev_trans trans; +}; + /** * struct switchdev_ops - switchdev operations * @@ -48,6 +57,10 @@ struct fib_info; * * @switchdev_port_attr_set: Set a port attribute (see switchdev_attr). * + * @switchdev_port_obj_add: Add an object to port (see switchdev_obj). + * + * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj). + * * @switchdev_fib_ipv4_add: Called to add/modify IPv4 route to switch device. * * @switchdev_fib_ipv4_del: Called to delete IPv4 route from switch device. @@ -57,6 +70,10 @@ struct switchdev_ops { struct switchdev_attr *attr); int (*switchdev_port_attr_set)(struct net_device *dev, struct switchdev_attr *attr); + int (*switchdev_port_obj_add)(struct net_device *dev, + struct switchdev_obj *obj); + int (*switchdev_port_obj_del)(struct net_device *dev, + struct switchdev_obj *obj); int (*switchdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 nlflags, @@ -93,6 +110,8 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr); int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr); +int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj); +int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj); int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); int call_switchdev_notifiers(unsigned long val, struct net_device *dev, @@ -125,6 +144,18 @@ static inline int switchdev_port_attr_set(struct net_device *dev, return -EOPNOTSUPP; } +static inline int switchdev_port_obj_add(struct net_device *dev, + struct switchdev_obj *obj) +{ + return -EOPNOTSUPP; +} + +static inline int switchdev_port_obj_del(struct net_device *dev, + struct switchdev_obj *obj) +{ + return -EOPNOTSUPP; +} + static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index a3c359004902..3d4d99a70b80 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -187,6 +187,113 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) } EXPORT_SYMBOL_GPL(switchdev_port_attr_set); +int __switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) +{ + const struct switchdev_ops *ops = dev->switchdev_ops; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (ops && ops->switchdev_port_obj_add) + return ops->switchdev_port_obj_add(dev, obj); + + /* Switch device port(s) may be stacked under + * bond/team/vlan dev, so recurse down to add object on + * each port. + */ + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = __switchdev_port_obj_add(lower_dev, obj); + if (err) + break; + } + + return err; +} + +/** + * switchdev_port_obj_add - Add port object + * + * @dev: port device + * @obj: object to add + * + * Use a 2-phase prepare-commit transaction model to ensure + * system is not left in a partially updated state due to + * failure from driver/device. + * + * rtnl_lock must be held. + */ +int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) +{ + int err; + + ASSERT_RTNL(); + + /* Phase I: prepare for obj add. Driver/device should fail + * here if there are going to be issues in the commit phase, + * such as lack of resources or support. The driver/device + * should reserve resources needed for the commit phase here, + * but should not commit the obj. + */ + + obj->trans = SWITCHDEV_TRANS_PREPARE; + err = __switchdev_port_obj_add(dev, obj); + if (err) { + /* Prepare phase failed: abort the transaction. Any + * resources reserved in the prepare phase are + * released. + */ + + obj->trans = SWITCHDEV_TRANS_ABORT; + __switchdev_port_obj_add(dev, obj); + + return err; + } + + /* Phase II: commit obj add. This cannot fail as a fault + * of driver/device. If it does, it's a bug in the driver/device + * because the driver said everythings was OK in phase I. + */ + + obj->trans = SWITCHDEV_TRANS_COMMIT; + err = __switchdev_port_obj_add(dev, obj); + WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_port_obj_add); + +/** + * switchdev_port_obj_del - Delete port object + * + * @dev: port device + * @obj: object to delete + */ +int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj) +{ + const struct switchdev_ops *ops = dev->switchdev_ops; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (ops && ops->switchdev_port_obj_del) + return ops->switchdev_port_obj_del(dev, obj); + + /* Switch device port(s) may be stacked under + * bond/team/vlan dev, so recurse down to delete object on + * each port. + */ + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = switchdev_port_obj_del(lower_dev, obj); + if (err) + break; + } + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_port_obj_del); + static DEFINE_MUTEX(switchdev_mutex); static RAW_NOTIFIER_HEAD(switchdev_notif_chain); -- cgit v1.2.3 From 6fc3016da7c1587aa59e71f8c4dbc4cf1343eab2 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:47:53 -0700 Subject: switchdev: add port vlan obj VLAN obj has flags (PVID and untagged) as well as start and end vid ranges. The switchdev driver can optimize programing the device using the ranges. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 4f43300df72c..e598c2d45786 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -43,11 +43,19 @@ struct fib_info; enum switchdev_obj_id { SWITCHDEV_OBJ_UNDEFINED, + SWITCHDEV_OBJ_PORT_VLAN, }; struct switchdev_obj { enum switchdev_obj_id id; enum switchdev_trans trans; + union { + struct switchdev_obj_vlan { /* PORT_VLAN */ + u16 flags; + u16 vid_start; + u16 vid_end; + } vlan; + }; }; /** -- cgit v1.2.3 From 6004c86718998aee1337efd3b087d6e17284632d Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:47:55 -0700 Subject: switchdev: add bridge port flags attr rocker: use switchdev get/set attr for bridge port flags Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 25 +++++++++++++++++++++++++ include/net/switchdev.h | 2 ++ 2 files changed, 27 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 84084806231a..ceb6a6436144 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4394,6 +4394,9 @@ static int rocker_port_attr_get(struct net_device *dev, attr->ppid.id_len = sizeof(rocker->hw.id); memcpy(&attr->ppid.id, &rocker->hw.id, attr->ppid.id_len); break; + case SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS: + attr->brport_flags = rocker_port->brport_flags; + break; default: return -EOPNOTSUPP; } @@ -4411,6 +4414,24 @@ static void rocker_port_trans_abort(struct rocker_port *rocker_port) } } +static int rocker_port_brport_flags_set(struct rocker_port *rocker_port, + enum switchdev_trans trans, + unsigned long brport_flags) +{ + unsigned long orig_flags; + int err = 0; + + orig_flags = rocker_port->brport_flags; + rocker_port->brport_flags = brport_flags; + if ((orig_flags ^ rocker_port->brport_flags) & BR_LEARNING) + err = rocker_port_set_learning(rocker_port, trans); + + if (trans == SWITCHDEV_TRANS_PREPARE) + rocker_port->brport_flags = orig_flags; + + return err; +} + static int rocker_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) { @@ -4433,6 +4454,10 @@ static int rocker_port_attr_set(struct net_device *dev, err = rocker_port_stp_update(rocker_port, attr->trans, attr->stp_state); break; + case SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS: + err = rocker_port_brport_flags_set(rocker_port, attr->trans, + attr->brport_flags); + break; default: err = -EOPNOTSUPP; break; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index e598c2d45786..6cf6de1a90b4 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -27,6 +27,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_UNDEFINED, SWITCHDEV_ATTR_PORT_PARENT_ID, SWITCHDEV_ATTR_PORT_STP_STATE, + SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, }; struct switchdev_attr { @@ -36,6 +37,7 @@ struct switchdev_attr { union { struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ u8 stp_state; /* PORT_STP_STATE */ + unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ }; }; -- cgit v1.2.3 From e71f220b342d78cfb8ee9f1b60f1351f7183f2a5 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:47:58 -0700 Subject: switchdev: remove old switchdev_port_bridge_setlink New attr-based bridge_setlink can recurse lower devs and recover on err, so remove old wrapper (including ndo_dflt_switchdev_port_bridge_setlink). Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 9 --------- net/switchdev/switchdev.c | 30 ------------------------------ 2 files changed, 39 deletions(-) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 6cf6de1a90b4..ce5ceb2dc677 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -132,8 +132,6 @@ int switchdev_port_bridge_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); int ndo_dflt_switchdev_port_bridge_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); -int ndo_dflt_switchdev_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 nlflags, u32 tb_id); int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, @@ -204,13 +202,6 @@ static inline int ndo_dflt_switchdev_port_bridge_dellink(struct net_device *dev, return 0; } -static inline int ndo_dflt_switchdev_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) -{ - return 0; -} - static inline int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index b01791a9b56d..dcdec9de9137 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -536,36 +536,6 @@ int switchdev_port_bridge_dellink(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink); -/** - * ndo_dflt_switchdev_port_bridge_setlink - default ndo bridge setlink - * op for master devices - * - * @dev: port device - * @nlh: netlink msg with bridge port attributes - * @flags: bridge setlink flags - * - * Notify master device slaves of bridge port attributes - */ -int ndo_dflt_switchdev_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) -{ - struct net_device *lower_dev; - struct list_head *iter; - int ret = 0, err = 0; - - if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) - return ret; - - netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = switchdev_port_bridge_setlink(lower_dev, nlh, flags); - if (err && err != -EOPNOTSUPP) - ret = err; - } - - return ret; -} -EXPORT_SYMBOL_GPL(ndo_dflt_switchdev_port_bridge_setlink); - /** * ndo_dflt_switchdev_port_bridge_dellink - default ndo bridge dellink * op for master devices -- cgit v1.2.3 From 5c34e0221423aeabc0b085adc5fccda3f91e2c49 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:48:00 -0700 Subject: switchdev: add new switchdev_port_bridge_dellink Same change as setlink. Provide the wrapper op for SELF ndo_bridge_dellink and call into the switchdev driver to delete afspec VLANs. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 6 ++++++ net/switchdev/switchdev.c | 24 ++++++++++++------------ 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index ce5ceb2dc677..8ffadca9e760 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -164,6 +164,12 @@ static inline int switchdev_port_obj_del(struct net_device *dev, return -EOPNOTSUPP; } +static inline int switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) +{ + return -EOPNOTSUPP; +} + static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index dcdec9de9137..8ce678e397b4 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -512,27 +512,27 @@ int switchdev_port_bridge_setlink(struct net_device *dev, EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink); /** - * switchdev_port_bridge_dellink - Notify switch device port of bridge - * port attribute delete + * switchdev_port_bridge_dellink - Set bridge port attributes * * @dev: port device - * @nlh: netlink msg with bridge port attributes - * @flags: bridge setlink flags + * @nlh: netlink header + * @flags: netlink flags * - * Notify switch device port of bridge port attribute delete + * Called for SELF on rtnl_bridge_dellink to set bridge port + * attributes. */ int switchdev_port_bridge_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) { - const struct net_device_ops *ops = dev->netdev_ops; - - if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) - return 0; + struct nlattr *afspec; - if (!ops->ndo_bridge_dellink) - return -EOPNOTSUPP; + afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), + IFLA_AF_SPEC); + if (afspec) + return switchdev_port_br_afspec(dev, afspec, + switchdev_port_obj_del); - return ops->ndo_bridge_dellink(dev, nlh, flags); + return 0; } EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink); -- cgit v1.2.3 From 87a5dae59e7abaad911ab719caa5548dd6df5557 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:48:02 -0700 Subject: switchdev: remove unused switchdev_port_bridge_dellink Now we can remove old wrappers for dellink. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 15 --------------- net/switchdev/switchdev.c | 30 ------------------------------ 2 files changed, 45 deletions(-) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 8ffadca9e760..397b1e6e04f6 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -130,8 +130,6 @@ int switchdev_port_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); int switchdev_port_bridge_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); -int ndo_dflt_switchdev_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 nlflags, u32 tb_id); int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, @@ -164,12 +162,6 @@ static inline int switchdev_port_obj_del(struct net_device *dev, return -EOPNOTSUPP; } -static inline int switchdev_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) -{ - return -EOPNOTSUPP; -} - static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; @@ -201,13 +193,6 @@ static inline int switchdev_port_bridge_dellink(struct net_device *dev, return -EOPNOTSUPP; } -static inline int ndo_dflt_switchdev_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) -{ - return 0; -} - static inline int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 8ce678e397b4..0e15b6f6bb56 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -536,36 +536,6 @@ int switchdev_port_bridge_dellink(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink); -/** - * ndo_dflt_switchdev_port_bridge_dellink - default ndo bridge dellink - * op for master devices - * - * @dev: port device - * @nlh: netlink msg with bridge port attributes - * @flags: bridge dellink flags - * - * Notify master device slaves of bridge port attribute deletes - */ -int ndo_dflt_switchdev_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) -{ - struct net_device *lower_dev; - struct list_head *iter; - int ret = 0, err = 0; - - if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) - return ret; - - netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = switchdev_port_bridge_dellink(lower_dev, nlh, flags); - if (err && err != -EOPNOTSUPP) - ret = err; - } - - return ret; -} -EXPORT_SYMBOL_GPL(ndo_dflt_switchdev_port_bridge_dellink); - static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) { const struct switchdev_ops *ops = dev->switchdev_ops; -- cgit v1.2.3 From 8793d0a664a8a2c5e18e929c1f995c784c105705 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:48:04 -0700 Subject: switchdev: add new switchdev_port_bridge_getlink Like bridge_setlink, add switchdev wrapper to handle bridge_getlink and call into port driver to get port attrs. For now, only BR_LEARNING and BR_LEARNING_SYNC are returned. To add more, we'll probably want to break away from ndo_dflt_bridge_getlink() and build the netlink skb directly in the switchdev code. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 10 ++++++++++ net/switchdev/switchdev.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 397b1e6e04f6..e081d67f973a 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -126,6 +126,9 @@ int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); int call_switchdev_notifiers(unsigned long val, struct net_device *dev, struct switchdev_notifier_info *info); +int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, + int nlflags); int switchdev_port_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); int switchdev_port_bridge_dellink(struct net_device *dev, @@ -179,6 +182,13 @@ static inline int call_switchdev_notifiers(unsigned long val, return NOTIFY_DONE; } +static inline int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, + u32 seq, struct net_device *dev, + u32 filter_mask, int nlflags) +{ + return -EOPNOTSUPP; +} + static inline int switchdev_port_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 0e15b6f6bb56..9210355ec965 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -358,6 +358,34 @@ int call_switchdev_notifiers(unsigned long val, struct net_device *dev, } EXPORT_SYMBOL_GPL(call_switchdev_notifiers); +/** + * switchdev_port_bridge_getlink - Get bridge port attributes + * + * @dev: port device + * + * Called for SELF on rtnl_bridge_getlink to get bridge port + * attributes. + */ +int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, + int nlflags) +{ + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, + }; + u16 mode = BRIDGE_MODE_UNDEF; + u32 mask = BR_LEARNING | BR_LEARNING_SYNC; + int err; + + err = switchdev_port_attr_get(dev, &attr); + if (err) + return err; + + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, + attr.brport_flags, mask, nlflags); +} +EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink); + static int switchdev_port_br_setflag(struct net_device *dev, struct nlattr *nlattr, unsigned long brport_flag) -- cgit v1.2.3 From 58c2cb16b116d7feace621bd6b647bbabacfa225 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:48:06 -0700 Subject: switchdev: convert fib_ipv4_add/del over to switchdev_port_obj_add/del The IPv4 FIB ops convert nicely to the switchdev objs and we're left with only four switchdev ops: port get/set and port add/del. Other objs will follow, such as FDB. So go ahead and convert IPv4 FIB over to switchdev obj for consistency, anticipating more objs to come. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 41 +++++++++++------------------- include/net/switchdev.h | 21 ++++++++-------- net/switchdev/switchdev.c | 49 +++++++++++++++++++++++------------- 3 files changed, 55 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 56ee316db1a1..1c906504cfe2 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4449,6 +4449,7 @@ static int rocker_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) { struct rocker_port *rocker_port = netdev_priv(dev); + struct switchdev_obj_ipv4_fib *fib4; int err = 0; switch (obj->trans) { @@ -4467,6 +4468,12 @@ static int rocker_port_obj_add(struct net_device *dev, err = rocker_port_vlans_add(rocker_port, obj->trans, &obj->vlan); break; + case SWITCHDEV_OBJ_IPV4_FIB: + fib4 = &obj->ipv4_fib; + err = rocker_port_fib_ipv4(rocker_port, obj->trans, + fib4->dst, fib4->dst_len, + fib4->fi, fib4->tb_id, 0); + break; default: err = -EOPNOTSUPP; break; @@ -4508,12 +4515,19 @@ static int rocker_port_obj_del(struct net_device *dev, struct switchdev_obj *obj) { struct rocker_port *rocker_port = netdev_priv(dev); + struct switchdev_obj_ipv4_fib *fib4; int err = 0; switch (obj->id) { case SWITCHDEV_OBJ_PORT_VLAN: err = rocker_port_vlans_del(rocker_port, &obj->vlan); break; + case SWITCHDEV_OBJ_IPV4_FIB: + fib4 = &obj->ipv4_fib; + err = rocker_port_fib_ipv4(rocker_port, SWITCHDEV_TRANS_NONE, + fib4->dst, fib4->dst_len, fib4->fi, + fib4->tb_id, ROCKER_OP_FLAG_REMOVE); + break; default: err = -EOPNOTSUPP; break; @@ -4522,38 +4536,11 @@ static int rocker_port_obj_del(struct net_device *dev, return err; } -static int rocker_port_switchdev_fib_ipv4_add(struct net_device *dev, - __be32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, u32 tb_id) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - int flags = 0; - - return rocker_port_fib_ipv4(rocker_port, SWITCHDEV_TRANS_NONE, - dst, dst_len, fi, tb_id, flags); -} - -static int rocker_port_switchdev_fib_ipv4_del(struct net_device *dev, - __be32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - int flags = ROCKER_OP_FLAG_REMOVE; - - return rocker_port_fib_ipv4(rocker_port, SWITCHDEV_TRANS_NONE, - dst, dst_len, fi, tb_id, flags); -} - static const struct switchdev_ops rocker_port_switchdev_ops = { .switchdev_port_attr_get = rocker_port_attr_get, .switchdev_port_attr_set = rocker_port_attr_set, .switchdev_port_obj_add = rocker_port_obj_add, .switchdev_port_obj_del = rocker_port_obj_del, - .switchdev_fib_ipv4_add = rocker_port_switchdev_fib_ipv4_add, - .switchdev_fib_ipv4_del = rocker_port_switchdev_fib_ipv4_del, }; /******************** diff --git a/include/net/switchdev.h b/include/net/switchdev.h index e081d67f973a..3b217b4cca27 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -46,6 +46,7 @@ struct fib_info; enum switchdev_obj_id { SWITCHDEV_OBJ_UNDEFINED, SWITCHDEV_OBJ_PORT_VLAN, + SWITCHDEV_OBJ_IPV4_FIB, }; struct switchdev_obj { @@ -57,6 +58,15 @@ struct switchdev_obj { u16 vid_start; u16 vid_end; } vlan; + struct switchdev_obj_ipv4_fib { /* IPV4_FIB */ + u32 dst; + int dst_len; + struct fib_info *fi; + u8 tos; + u8 type; + u32 nlflags; + u32 tb_id; + } ipv4_fib; }; }; @@ -70,10 +80,6 @@ struct switchdev_obj { * @switchdev_port_obj_add: Add an object to port (see switchdev_obj). * * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj). - * - * @switchdev_fib_ipv4_add: Called to add/modify IPv4 route to switch device. - * - * @switchdev_fib_ipv4_del: Called to delete IPv4 route from switch device. */ struct switchdev_ops { int (*switchdev_port_attr_get)(struct net_device *dev, @@ -84,13 +90,6 @@ struct switchdev_ops { struct switchdev_obj *obj); int (*switchdev_port_obj_del)(struct net_device *dev, struct switchdev_obj *obj); - int (*switchdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, - u32 tb_id); - int (*switchdev_fib_ipv4_del)(struct net_device *dev, __be32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); }; enum switchdev_notifier_type { diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 9210355ec965..65d49d4477b9 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -641,8 +641,19 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 nlflags, u32 tb_id) { + struct switchdev_obj fib_obj = { + .id = SWITCHDEV_OBJ_IPV4_FIB, + .ipv4_fib = { + .dst = htonl(dst), + .dst_len = dst_len, + .fi = fi, + .tos = tos, + .type = type, + .nlflags = nlflags, + .tb_id = tb_id, + }, + }; struct net_device *dev; - const struct switchdev_ops *ops; int err = 0; /* Don't offload route if using custom ip rules or if @@ -660,15 +671,10 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, dev = switchdev_get_dev_by_nhs(fi); if (!dev) return 0; - ops = dev->switchdev_ops; - - if (ops->switchdev_fib_ipv4_add) { - err = ops->switchdev_fib_ipv4_add(dev, htonl(dst), dst_len, - fi, tos, type, nlflags, - tb_id); - if (!err) - fi->fib_flags |= RTNH_F_EXTERNAL; - } + + err = switchdev_port_obj_add(dev, &fib_obj); + if (!err) + fi->fib_flags |= RTNH_F_EXTERNAL; return err; } @@ -689,8 +695,19 @@ EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 tb_id) { + struct switchdev_obj fib_obj = { + .id = SWITCHDEV_OBJ_IPV4_FIB, + .ipv4_fib = { + .dst = htonl(dst), + .dst_len = dst_len, + .fi = fi, + .tos = tos, + .type = type, + .nlflags = 0, + .tb_id = tb_id, + }, + }; struct net_device *dev; - const struct switchdev_ops *ops; int err = 0; if (!(fi->fib_flags & RTNH_F_EXTERNAL)) @@ -699,14 +716,10 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, dev = switchdev_get_dev_by_nhs(fi); if (!dev) return 0; - ops = dev->switchdev_ops; - if (ops->switchdev_fib_ipv4_del) { - err = ops->switchdev_fib_ipv4_del(dev, htonl(dst), dst_len, - fi, tos, type, tb_id); - if (!err) - fi->fib_flags &= ~RTNH_F_EXTERNAL; - } + err = switchdev_port_obj_del(dev, &fib_obj); + if (!err) + fi->fib_flags &= ~RTNH_F_EXTERNAL; return err; } -- cgit v1.2.3 From 7889cbee8357aaed85898d028829dfb4f75bae2c Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:48:07 -0700 Subject: switchdev: remove NETIF_F_HW_SWITCH_OFFLOAD feature flag Roopa said remove the feature flag for this series and she'll work on bringing it back if needed at a later date. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 5 +---- drivers/net/ethernet/rocker/rocker.c | 3 +-- drivers/net/team/team.c | 2 +- include/linux/netdev_features.h | 5 +---- net/core/ethtool.c | 1 - 5 files changed, 4 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 147642dae3d0..a2e25de98bde 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1015,10 +1015,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev, netdev_features_t mask; struct slave *slave; - /* If any slave has the offload feature flag set, - * set the offload flag on the bond. - */ - mask = features | NETIF_F_HW_SWITCH_OFFLOAD; + mask = features; features &= ~NETIF_F_ONE_FOR_ALL; features |= NETIF_F_ALL_FOR_ALL; diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 1c906504cfe2..9715e3322706 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4906,8 +4906,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) rocker_carrier_init(rocker_port); dev->features |= NETIF_F_NETNS_LOCAL | - NETIF_F_HW_VLAN_CTAG_FILTER | - NETIF_F_HW_SWITCH_OFFLOAD; + NETIF_F_HW_VLAN_CTAG_FILTER; err = register_netdev(dev); if (err) { diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index bda32be596ef..1ec035a53c3d 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1924,7 +1924,7 @@ static netdev_features_t team_fix_features(struct net_device *dev, struct team *team = netdev_priv(dev); netdev_features_t mask; - mask = features | NETIF_F_HW_SWITCH_OFFLOAD; + mask = features; features &= ~NETIF_F_ONE_FOR_ALL; features |= NETIF_F_ALL_FOR_ALL; diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 7d59dc6ab789..9672781c593d 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -66,7 +66,6 @@ enum { NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */ NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */ NETIF_F_BUSY_POLL_BIT, /* Busy poll */ - NETIF_F_HW_SWITCH_OFFLOAD_BIT, /* HW switch offload */ /* * Add your fresh new feature above and remember to update @@ -125,7 +124,6 @@ enum { #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) #define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) -#define NETIF_F_HW_SWITCH_OFFLOAD __NETIF_F(HW_SWITCH_OFFLOAD) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ @@ -161,8 +159,7 @@ enum { */ #define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ NETIF_F_SG | NETIF_F_HIGHDMA | \ - NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED | \ - NETIF_F_HW_SWITCH_OFFLOAD) + NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED) /* * If one device doesn't support one of these features, then disable it diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1d00b8922902..eb0c3ace7458 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -98,7 +98,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_RXALL_BIT] = "rx-all", [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", [NETIF_F_BUSY_POLL_BIT] = "busy-poll", - [NETIF_F_HW_SWITCH_OFFLOAD_BIT] = "hw-switch-offload", }; static const char -- cgit v1.2.3 From a2029240e5836e73ebcc1a8ddb8c22d636f89c9a Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 11 May 2015 21:17:53 +0200 Subject: net: deinline netif_tx_stop_all_queues(), remove WARN_ON in netif_tx_stop_queue() These functions compile to 60 bytes of machine code each. With this .config: http://busybox.net/~vda/kernel_config there are 617 calls of netif_tx_stop_queue() and 49 calls of netif_tx_stop_all_queues() in vmlinux. To fix this, remove WARN_ON in netif_tx_stop_queue() as suggested by davem, and deinline netif_tx_stop_all_queues(). Change in code size is about 20k: text data bss dec hex filename 82426986 22255416 20627456 125309858 77813a2 vmlinux.before 82406248 22255416 20627456 125289120 777c2a0 vmlinux gcc-4.7.2 still creates deinlined version of netif_tx_stop_queue sometimes: $ nm --size-sort vmlinux | grep netif_tx_stop_queue | wc -l 190 ffffffff81b558a8 : ffffffff81b558a8: 55 push %rbp ffffffff81b558a9: 48 89 e5 mov %rsp,%rbp ffffffff81b558ac: f0 80 8f e0 01 00 00 lock orb $0x1,0x1e0(%rdi) ffffffff81b558b3: 01 ffffffff81b558b4: 5d pop %rbp ffffffff81b558b5: c3 retq This needs additional fixing. Signed-off-by: Denys Vlasenko CC: Alexei Starovoitov CC: Alexander Duyck CC: Joe Perches CC: David S. Miller CC: Jiri Pirko CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: netfilter-devel@vger.kernel.org Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 +------------- net/core/dev.c | 11 +++++++++++ 2 files changed, 12 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2b39235b9f13..fa57915f440c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2559,10 +2559,6 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) { - if (WARN_ON(!dev_queue)) { - pr_info("netif_stop_queue() cannot be called before register_netdev()\n"); - return; - } set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } @@ -2578,15 +2574,7 @@ static inline void netif_stop_queue(struct net_device *dev) netif_tx_stop_queue(netdev_get_tx_queue(dev, 0)); } -static inline void netif_tx_stop_all_queues(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - netif_tx_stop_queue(txq); - } -} +void netif_tx_stop_all_queues(struct net_device *dev); static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue) { diff --git a/net/core/dev.c b/net/core/dev.c index e5f77c40bbd1..fd012bbe0486 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6301,6 +6301,17 @@ static int netif_alloc_netdev_queues(struct net_device *dev) return 0; } +void netif_tx_stop_all_queues(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + netif_tx_stop_queue(txq); + } +} +EXPORT_SYMBOL(netif_tx_stop_all_queues); + /** * register_netdevice - register a network device * @dev: device to register -- cgit v1.2.3 From cffc642d93f9324a06dfbd7da9af29652952a248 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 11 May 2015 22:22:44 -0700 Subject: test_bpf: add 173 new testcases for eBPF add an exhaustive set of eBPF tests bringing total to: test_bpf: Summary: 233 PASSED, 0 FAILED, [0/226 JIT'ed] Signed-off-by: Michael Holzheu Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 10 + lib/test_bpf.c | 2192 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 2202 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 3c03a6085b82..ce1d72d34382 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -207,6 +207,16 @@ struct bpf_prog_aux; .off = OFF, \ .imm = 0 }) +/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ + +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 3c41049d72d8..8bca780e3613 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1806,6 +1806,2198 @@ static struct bpf_test tests[] = { 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6}, { { 38, 256 } } }, + /* BPF_ALU | BPF_MOV | BPF_X */ + { + "ALU_MOV_X: dst = 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_MOV_X: dst = 4294967295", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 4294967295), + BPF_ALU32_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + { + "ALU64_MOV_X: dst = 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_MOV_X: dst = 4294967295", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 4294967295), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + /* BPF_ALU | BPF_MOV | BPF_K */ + { + "ALU_MOV_K: dst = 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_MOV_K: dst = 4294967295", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 4294967295), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + { + "ALU_MOV_K: 0x0000ffffffff0000 = 0x00000000ffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x00000000ffffffff), + BPF_ALU32_IMM(BPF_MOV, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_MOV_K: dst = 2", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_MOV_K: dst = 2147483647", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, 2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2147483647 } }, + }, + { + "ALU64_OR_K: dst = 0x0", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x0), + BPF_ALU64_IMM(BPF_MOV, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_MOV_K: dst = -1", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_MOV, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_ADD | BPF_X */ + { + "ALU_ADD_X: 1 + 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_ADD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_ADD_X: 1 + 4294967294 = 4294967295", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967294), + BPF_ALU32_REG(BPF_ADD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + { + "ALU64_ADD_X: 1 + 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_ADD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_ADD_X: 1 + 4294967294 = 4294967295", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967294), + BPF_ALU64_REG(BPF_ADD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + /* BPF_ALU | BPF_ADD | BPF_K */ + { + "ALU_ADD_K: 1 + 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_ADD, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_ADD_K: 3 + 0 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_ADD, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_ADD_K: 1 + 4294967294 = 4294967295", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_ADD, R0, 4294967294), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + { + "ALU_ADD_K: 0 + (-1) = 0x00000000ffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0), + BPF_LD_IMM64(R3, 0x00000000ffffffff), + BPF_ALU32_IMM(BPF_ADD, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_ADD_K: 1 + 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_ADD, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_ADD_K: 3 + 0 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_ADD, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_ADD_K: 1 + 2147483646 = 2147483647", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_ADD, R0, 2147483646), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2147483647 } }, + }, + { + "ALU64_ADD_K: 2147483646 + -2147483647 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483646), + BPF_ALU64_IMM(BPF_ADD, R0, -2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + { + "ALU64_ADD_K: 1 + 0 = 1", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x1), + BPF_LD_IMM64(R3, 0x1), + BPF_ALU64_IMM(BPF_ADD, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_ADD_K: 0 + (-1) = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_ADD, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_SUB | BPF_X */ + { + "ALU_SUB_X: 3 - 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_SUB_X: 4294967295 - 4294967294 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967294), + BPF_ALU32_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_SUB_X: 3 - 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_SUB_X: 4294967295 - 4294967294 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967294), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_ALU | BPF_SUB | BPF_K */ + { + "ALU_SUB_K: 3 - 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_SUB, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_SUB_K: 3 - 0 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_SUB, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_SUB_K: 4294967295 - 4294967294 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_SUB, R0, 4294967294), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_SUB_K: 3 - 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_SUB, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_SUB_K: 3 - 0 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_SUB, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_SUB_K: 4294967294 - 4294967295 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967294), + BPF_ALU64_IMM(BPF_SUB, R0, 4294967295), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + { + "ALU64_ADD_K: 2147483646 - 2147483647 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483646), + BPF_ALU64_IMM(BPF_SUB, R0, 2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + /* BPF_ALU | BPF_MUL | BPF_X */ + { + "ALU_MUL_X: 2 * 3 = 6", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 3), + BPF_ALU32_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 6 } }, + }, + { + "ALU_MUL_X: 2 * 0x7FFFFFF8 = 0xFFFFFFF0", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 0x7FFFFFF8), + BPF_ALU32_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xFFFFFFF0 } }, + }, + { + "ALU_MUL_X: -1 * -1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, -1), + BPF_ALU32_IMM(BPF_MOV, R1, -1), + BPF_ALU32_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_MUL_X: 2 * 3 = 6", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 3), + BPF_ALU64_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 6 } }, + }, + { + "ALU64_MUL_X: 1 * 2147483647 = 2147483647", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2147483647), + BPF_ALU64_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2147483647 } }, + }, + /* BPF_ALU | BPF_MUL | BPF_K */ + { + "ALU_MUL_K: 2 * 3 = 6", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MUL, R0, 3), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 6 } }, + }, + { + "ALU_MUL_K: 3 * 1 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MUL, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_MUL_K: 2 * 0x7FFFFFF8 = 0xFFFFFFF0", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MUL, R0, 0x7FFFFFF8), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xFFFFFFF0 } }, + }, + { + "ALU_MUL_K: 1 * (-1) = 0x00000000ffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x1), + BPF_LD_IMM64(R3, 0x00000000ffffffff), + BPF_ALU32_IMM(BPF_MUL, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_MUL_K: 2 * 3 = 6", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU64_IMM(BPF_MUL, R0, 3), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 6 } }, + }, + { + "ALU64_MUL_K: 3 * 1 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_MUL, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_MUL_K: 1 * 2147483647 = 2147483647", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_MUL, R0, 2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2147483647 } }, + }, + { + "ALU64_MUL_K: 1 * -2147483647 = -2147483647", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_MUL, R0, -2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -2147483647 } }, + }, + { + "ALU64_MUL_K: 1 * (-1) = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x1), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_MUL, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_DIV | BPF_X */ + { + "ALU_DIV_X: 6 / 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 6), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_DIV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_DIV_X: 4294967295 / 4294967295 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967295), + BPF_ALU32_REG(BPF_DIV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_DIV_X: 6 / 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 6), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_DIV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_DIV_X: 2147483647 / 2147483647 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483647), + BPF_ALU32_IMM(BPF_MOV, R1, 2147483647), + BPF_ALU64_REG(BPF_DIV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_DIV_X: 0xffffffffffffffff / (-1) = 0x0000000000000001", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffffUL), + BPF_LD_IMM64(R4, 0xffffffffffffffffUL), + BPF_LD_IMM64(R3, 0x0000000000000001UL), + BPF_ALU64_REG(BPF_DIV, R2, R4), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_DIV | BPF_K */ + { + "ALU_DIV_K: 6 / 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 6), + BPF_ALU32_IMM(BPF_DIV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_DIV_K: 3 / 1 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_DIV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_DIV_K: 4294967295 / 4294967295 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_DIV, R0, 4294967295), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_DIV_K: 0xffffffffffffffff / (-1) = 0x1", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffffUL), + BPF_LD_IMM64(R3, 0x1UL), + BPF_ALU32_IMM(BPF_DIV, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_DIV_K: 6 / 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 6), + BPF_ALU64_IMM(BPF_DIV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_DIV_K: 3 / 1 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_DIV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_DIV_K: 2147483647 / 2147483647 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483647), + BPF_ALU64_IMM(BPF_DIV, R0, 2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_DIV_K: 0xffffffffffffffff / (-1) = 0x0000000000000001", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffffUL), + BPF_LD_IMM64(R3, 0x0000000000000001UL), + BPF_ALU64_IMM(BPF_DIV, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_MOD | BPF_X */ + { + "ALU_MOD_X: 3 % 2 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_MOD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_MOD_X: 4294967295 % 4294967293 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967293), + BPF_ALU32_REG(BPF_MOD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_MOD_X: 3 % 2 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_MOD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_MOD_X: 2147483647 % 2147483645 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483647), + BPF_ALU32_IMM(BPF_MOV, R1, 2147483645), + BPF_ALU64_REG(BPF_MOD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + /* BPF_ALU | BPF_MOD | BPF_K */ + { + "ALU_MOD_K: 3 % 2 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOD, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_MOD_K: 3 % 1 = 0", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOD, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + }, + { + "ALU_MOD_K: 4294967295 % 4294967293 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_MOD, R0, 4294967293), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_MOD_K: 3 % 2 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_MOD, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_MOD_K: 3 % 1 = 0", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_MOD, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + }, + { + "ALU64_MOD_K: 2147483647 % 2147483645 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483647), + BPF_ALU64_IMM(BPF_MOD, R0, 2147483645), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + /* BPF_ALU | BPF_AND | BPF_X */ + { + "ALU_AND_X: 3 & 2 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_AND, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_AND_X: 0xffffffff & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffff), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU32_REG(BPF_AND, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_AND_X: 3 & 2 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_AND, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_AND_X: 0xffffffff & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffff), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU64_REG(BPF_AND, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + /* BPF_ALU | BPF_AND | BPF_K */ + { + "ALU_AND_K: 3 & 2 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_AND, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_AND_K: 0xffffffff & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffff), + BPF_ALU32_IMM(BPF_AND, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_AND_K: 3 & 2 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_AND, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_AND_K: 0xffffffff & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffff), + BPF_ALU64_IMM(BPF_AND, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000ffff00000000", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x0000000000000000), + BPF_ALU64_IMM(BPF_AND, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x0000ffffffff0000), + BPF_ALU64_IMM(BPF_AND, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_AND_K: 0xffffffffffffffff & -1 = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffff), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_AND, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_OR | BPF_X */ + { + "ALU_OR_X: 1 | 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_OR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_OR_X: 0x0 | 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU32_REG(BPF_OR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_OR_X: 1 | 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_OR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_OR_X: 0 | 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU64_REG(BPF_OR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + /* BPF_ALU | BPF_OR | BPF_K */ + { + "ALU_OR_K: 1 | 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_OR, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_OR_K: 0 & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_ALU32_IMM(BPF_OR, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_OR_K: 1 | 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_OR, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_OR_K: 0 & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_ALU64_IMM(BPF_OR, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffff00000000", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x0000ffffffff0000), + BPF_ALU64_IMM(BPF_OR, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_OR_K: 0x0000ffffffff0000 | -1 = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_OR, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_OR_K: 0x000000000000000 | -1 = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000000000000000), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_OR, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_XOR | BPF_X */ + { + "ALU_XOR_X: 5 ^ 6 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 5), + BPF_ALU32_IMM(BPF_MOV, R1, 6), + BPF_ALU32_REG(BPF_XOR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_XOR_X: 0x1 ^ 0xffffffff = 0xfffffffe", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU32_REG(BPF_XOR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } }, + }, + { + "ALU64_XOR_X: 5 ^ 6 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 5), + BPF_ALU32_IMM(BPF_MOV, R1, 6), + BPF_ALU64_REG(BPF_XOR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_XOR_X: 1 ^ 0xffffffff = 0xfffffffe", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU64_REG(BPF_XOR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } }, + }, + /* BPF_ALU | BPF_XOR | BPF_K */ + { + "ALU_XOR_K: 5 ^ 6 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 5), + BPF_ALU32_IMM(BPF_XOR, R0, 6), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_XOR_K: 1 ^ 0xffffffff = 0xfffffffe", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_XOR, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } }, + }, + { + "ALU64_XOR_K: 5 ^ 6 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 5), + BPF_ALU64_IMM(BPF_XOR, R0, 6), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_XOR_K: 1 & 0xffffffff = 0xfffffffe", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_XOR, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } }, + }, + { + "ALU64_XOR_K: 0x0000ffffffff0000 ^ 0x0 = 0x0000ffffffff0000", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x0000ffffffff0000), + BPF_ALU64_IMM(BPF_XOR, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_XOR_K: 0x0000ffffffff0000 ^ -1 = 0xffff00000000ffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0xffff00000000ffff), + BPF_ALU64_IMM(BPF_XOR, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_XOR_K: 0x000000000000000 ^ -1 = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000000000000000), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_XOR, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_LSH | BPF_X */ + { + "ALU_LSH_X: 1 << 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_LSH_X: 1 << 31 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 31), + BPF_ALU32_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x80000000 } }, + }, + { + "ALU64_LSH_X: 1 << 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_LSH_X: 1 << 31 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 31), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x80000000 } }, + }, + /* BPF_ALU | BPF_LSH | BPF_K */ + { + "ALU_LSH_K: 1 << 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_LSH, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_LSH_K: 1 << 31 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_LSH, R0, 31), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x80000000 } }, + }, + { + "ALU64_LSH_K: 1 << 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_LSH, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_LSH_K: 1 << 31 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_LSH, R0, 31), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x80000000 } }, + }, + /* BPF_ALU | BPF_RSH | BPF_X */ + { + "ALU_RSH_X: 2 >> 1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_RSH_X: 0x80000000 >> 31 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x80000000), + BPF_ALU32_IMM(BPF_MOV, R1, 31), + BPF_ALU32_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_RSH_X: 2 >> 1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_RSH_X: 0x80000000 >> 31 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x80000000), + BPF_ALU32_IMM(BPF_MOV, R1, 31), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_ALU | BPF_RSH | BPF_K */ + { + "ALU_RSH_K: 2 >> 1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_RSH, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_RSH_K: 0x80000000 >> 31 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x80000000), + BPF_ALU32_IMM(BPF_RSH, R0, 31), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_RSH_K: 2 >> 1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU64_IMM(BPF_RSH, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_RSH_K: 0x80000000 >> 31 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x80000000), + BPF_ALU64_IMM(BPF_RSH, R0, 31), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_ALU | BPF_ARSH | BPF_X */ + { + "ALU_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xff00ff0000000000LL), + BPF_ALU32_IMM(BPF_MOV, R1, 40), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffff00ff } }, + }, + /* BPF_ALU | BPF_ARSH | BPF_K */ + { + "ALU_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xff00ff0000000000LL), + BPF_ALU64_IMM(BPF_ARSH, R0, 40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffff00ff } }, + }, + /* BPF_ALU | BPF_NEG */ + { + "ALU_NEG: -(3) = -3", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 3), + BPF_ALU32_IMM(BPF_NEG, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -3 } }, + }, + { + "ALU_NEG: -(-3) = 3", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -3), + BPF_ALU32_IMM(BPF_NEG, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_NEG: -(3) = -3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_NEG, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -3 } }, + }, + { + "ALU64_NEG: -(-3) = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, -3), + BPF_ALU64_IMM(BPF_NEG, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + /* BPF_ALU | BPF_END | BPF_FROM_BE */ + { + "ALU_END_FROM_BE 16: 0x0123456789abcdef -> 0xcdef", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_BE, R0, 16), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, cpu_to_be16(0xcdef) } }, + }, + { + "ALU_END_FROM_BE 32: 0x0123456789abcdef -> 0x89abcdef", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_BE, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, cpu_to_be32(0x89abcdef) } }, + }, + { + "ALU_END_FROM_BE 64: 0x0123456789abcdef -> 0x89abcdef", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_BE, R0, 64), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, (u32) cpu_to_be64(0x0123456789abcdefLL) } }, + }, + /* BPF_ALU | BPF_END | BPF_FROM_LE */ + { + "ALU_END_FROM_LE 16: 0x0123456789abcdef -> 0xefcd", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_LE, R0, 16), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, cpu_to_le16(0xcdef) } }, + }, + { + "ALU_END_FROM_LE 32: 0x0123456789abcdef -> 0xefcdab89", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_LE, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, cpu_to_le32(0x89abcdef) } }, + }, + { + "ALU_END_FROM_LE 64: 0x0123456789abcdef -> 0x67452301", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_LE, R0, 64), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, (u32) cpu_to_le64(0x0123456789abcdefLL) } }, + }, + /* BPF_ST(X) | BPF_MEM | BPF_B/H/W/DW */ + { + "ST_MEM_B: Store/Load byte: max negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_B, R10, -40, 0xff), + BPF_LDX_MEM(BPF_B, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xff } }, + }, + { + "ST_MEM_B: Store/Load byte: max positive", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_H, R10, -40, 0x7f), + BPF_LDX_MEM(BPF_H, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x7f } }, + }, + { + "STX_MEM_B: Store/Load byte: max negative", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0xffLL), + BPF_STX_MEM(BPF_B, R10, R1, -40), + BPF_LDX_MEM(BPF_B, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xff } }, + }, + { + "ST_MEM_H: Store/Load half word: max negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_H, R10, -40, 0xffff), + BPF_LDX_MEM(BPF_H, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffff } }, + }, + { + "ST_MEM_H: Store/Load half word: max positive", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_H, R10, -40, 0x7fff), + BPF_LDX_MEM(BPF_H, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x7fff } }, + }, + { + "STX_MEM_H: Store/Load half word: max negative", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0xffffLL), + BPF_STX_MEM(BPF_H, R10, R1, -40), + BPF_LDX_MEM(BPF_H, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffff } }, + }, + { + "ST_MEM_W: Store/Load word: max negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_W, R10, -40, 0xffffffff), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ST_MEM_W: Store/Load word: max positive", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_W, R10, -40, 0x7fffffff), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x7fffffff } }, + }, + { + "STX_MEM_W: Store/Load word: max negative", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0xffffffffLL), + BPF_STX_MEM(BPF_W, R10, R1, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ST_MEM_DW: Store/Load double word: max negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ST_MEM_DW: Store/Load double word: max negative 2", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffff00000000ffff), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff), + BPF_LDX_MEM(BPF_DW, R2, R10, -40), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ST_MEM_DW: Store/Load double word: max positive", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_DW, R10, -40, 0x7fffffff), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x7fffffff } }, + }, + { + "STX_MEM_DW: Store/Load double word: max negative", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_STX_MEM(BPF_W, R10, R1, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + /* BPF_STX | BPF_XADD | BPF_W/DW */ + { + "STX_XADD_W: Test: 0x12 + 0x10 = 0x22", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12), + BPF_ST_MEM(BPF_W, R10, -40, 0x10), + BPF_STX_XADD(BPF_W, R10, R0, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x22 } }, + }, + { + "STX_XADD_DW: Test: 0x12 + 0x10 = 0x22", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12), + BPF_ST_MEM(BPF_DW, R10, -40, 0x10), + BPF_STX_XADD(BPF_DW, R10, R0, -40), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x22 } }, + }, + /* BPF_JMP | BPF_EXIT */ + { + "JMP_EXIT", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x4711), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 0x4712), + }, + INTERNAL, + { }, + { { 0, 0x4711 } }, + }, + /* BPF_JMP | BPF_JA */ + { + "JMP_JA: Unconditional jump: if (true) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSGT | BPF_K */ + { + "JMP_JSGT_K: Signed jump: if (-1 > -2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSGT, R1, -2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGT_K: Signed jump: if (-1 > -1) return 0", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSGT, R1, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSGE | BPF_K */ + { + "JMP_JSGE_K: Signed jump: if (-1 >= -2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSGE, R1, -2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGE_K: Signed jump: if (-1 >= -1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSGE, R1, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGT | BPF_K */ + { + "JMP_JGT_K: if (3 > 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JGT, R1, 2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGE | BPF_K */ + { + "JMP_JGE_K: if (3 >= 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JGE, R1, 2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JGE_K: if (3 >= 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JGE, R1, 3, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JNE | BPF_K */ + { + "JMP_JNE_K: if (3 != 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JNE, R1, 2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JEQ | BPF_K */ + { + "JMP_JEQ_K: if (3 == 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JEQ, R1, 3, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSET | BPF_K */ + { + "JMP_JSET_K: if (0x3 & 0x2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JNE, R1, 2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSET_K: if (0x3 & 0xffffffff) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JNE, R1, 0xffffffff, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSGT | BPF_X */ + { + "JMP_JSGT_X: Signed jump: if (-1 > -2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -2), + BPF_JMP_REG(BPF_JSGT, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGT_X: Signed jump: if (-1 > -1) return 0", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -1), + BPF_JMP_REG(BPF_JSGT, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSGE | BPF_X */ + { + "JMP_JSGE_X: Signed jump: if (-1 >= -2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -2), + BPF_JMP_REG(BPF_JSGE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGE_X: Signed jump: if (-1 >= -1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -1), + BPF_JMP_REG(BPF_JSGE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGT | BPF_X */ + { + "JMP_JGT_X: if (3 > 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JGT, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGE | BPF_X */ + { + "JMP_JGE_X: if (3 >= 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JGE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JGE_X: if (3 >= 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 3), + BPF_JMP_REG(BPF_JGE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JNE | BPF_X */ + { + "JMP_JNE_X: if (3 != 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JNE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JEQ | BPF_X */ + { + "JMP_JEQ_X: if (3 == 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 3), + BPF_JMP_REG(BPF_JEQ, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSET | BPF_X */ + { + "JMP_JSET_X: if (0x3 & 0x2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JNE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSET_X: if (0x3 & 0xffffffff) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 0xffffffff), + BPF_JMP_REG(BPF_JNE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, }; static struct net_device dev; -- cgit v1.2.3 From 9449c3cd90472141cf081af88181a56163ff7132 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Tue, 12 May 2015 18:29:44 +0800 Subject: net: make skb_dst_pop routine static As xfrm_output_one() is the only caller of skb_dst_pop(), we should make skb_dst_pop() localized. Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- include/net/dst.h | 12 ------------ net/xfrm/xfrm_output.c | 12 ++++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 22aa93f165f1..2bc73f8a00a9 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -349,18 +349,6 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, __skb_tunnel_rx(skb, dev, net); } -/* Children define the path of the packet through the - * Linux networking. Thus, destinations are stackable. - */ - -static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) -{ - struct dst_entry *child = dst_clone(skb_dst(skb)->child); - - skb_dst_drop(skb); - return child; -} - int dst_discard_sk(struct sock *sk, struct sk_buff *skb); static inline int dst_discard(struct sk_buff *skb) { diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index fbcedbe33190..68ada2ca4b60 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -38,6 +38,18 @@ static int xfrm_skb_check_space(struct sk_buff *skb) return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); } +/* Children define the path of the packet through the + * Linux networking. Thus, destinations are stackable. + */ + +static struct dst_entry *skb_dst_pop(struct sk_buff *skb) +{ + struct dst_entry *child = dst_clone(skb_dst(skb)->child); + + skb_dst_drop(skb); + return child; +} + static int xfrm_output_one(struct sk_buff *skb, int err) { struct dst_entry *dst = skb_dst(skb); -- cgit v1.2.3 From 275e2bc0f25d5eb99c99ebb7293fc3722533124b Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Sat, 2 May 2015 19:28:17 +0200 Subject: netfilter: ipset: Fix ext_*() macros So pointers returned by these macros could be referenced with -> directly. Signed-off-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 34b172301558..f88be7258e5f 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -122,13 +122,13 @@ struct ip_set_skbinfo { struct ip_set; #define ext_timeout(e, s) \ -(unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT]) +((unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT])) #define ext_counter(e, s) \ -(struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER]) +((struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])) #define ext_comment(e, s) \ -(struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT]) +((struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT])) #define ext_skbinfo(e, s) \ -(struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO]) +((struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO])) typedef int (*ipset_adtfn)(struct ip_set *set, void *value, const struct ip_set_ext *ext, -- cgit v1.2.3 From 5eb764edee52e837638b8d55ceace2c68e248cd2 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Tue, 12 May 2015 23:03:53 -0700 Subject: switchdev: align comment with other comments in block Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/net/switchdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 3b217b4cca27..9f9a7cc573c3 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -53,7 +53,7 @@ struct switchdev_obj { enum switchdev_obj_id id; enum switchdev_trans trans; union { - struct switchdev_obj_vlan { /* PORT_VLAN */ + struct switchdev_obj_vlan { /* PORT_VLAN */ u16 flags; u16 vid_start; u16 vid_end; -- cgit v1.2.3 From 42275bd8fcb351f951781d8882f359d25976824b Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Wed, 13 May 2015 11:16:50 -0700 Subject: switchdev: don't use anonymous union on switchdev attr/obj structs Older gcc versions (e.g. gcc version 4.4.6) don't like anonymous unions which was causing build issues on the newly added switchdev attr/obj structs. Fix this by using named union on structs. Signed-off-by: Scott Feldman Reported-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 18 ++++++++--------- include/net/switchdev.h | 4 ++-- net/bridge/br_stp.c | 2 +- net/core/net-sysfs.c | 4 ++-- net/core/rtnetlink.c | 3 ++- net/dsa/slave.c | 6 +++--- net/switchdev/switchdev.c | 39 ++++++++++++++++++------------------ 7 files changed, 39 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index ca533936658b..f0a9cb44be6b 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4339,11 +4339,11 @@ static int rocker_port_attr_get(struct net_device *dev, switch (attr->id) { case SWITCHDEV_ATTR_PORT_PARENT_ID: - attr->ppid.id_len = sizeof(rocker->hw.id); - memcpy(&attr->ppid.id, &rocker->hw.id, attr->ppid.id_len); + attr->u.ppid.id_len = sizeof(rocker->hw.id); + memcpy(&attr->u.ppid.id, &rocker->hw.id, attr->u.ppid.id_len); break; case SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS: - attr->brport_flags = rocker_port->brport_flags; + attr->u.brport_flags = rocker_port->brport_flags; break; default: return -EOPNOTSUPP; @@ -4400,11 +4400,11 @@ static int rocker_port_attr_set(struct net_device *dev, switch (attr->id) { case SWITCHDEV_ATTR_PORT_STP_STATE: err = rocker_port_stp_update(rocker_port, attr->trans, - attr->stp_state); + attr->u.stp_state); break; case SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS: err = rocker_port_brport_flags_set(rocker_port, attr->trans, - attr->brport_flags); + attr->u.brport_flags); break; default: err = -EOPNOTSUPP; @@ -4466,10 +4466,10 @@ static int rocker_port_obj_add(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_PORT_VLAN: err = rocker_port_vlans_add(rocker_port, obj->trans, - &obj->vlan); + &obj->u.vlan); break; case SWITCHDEV_OBJ_IPV4_FIB: - fib4 = &obj->ipv4_fib; + fib4 = &obj->u.ipv4_fib; err = rocker_port_fib_ipv4(rocker_port, obj->trans, htonl(fib4->dst), fib4->dst_len, fib4->fi, fib4->tb_id, 0); @@ -4520,10 +4520,10 @@ static int rocker_port_obj_del(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_PORT_VLAN: - err = rocker_port_vlans_del(rocker_port, &obj->vlan); + err = rocker_port_vlans_del(rocker_port, &obj->u.vlan); break; case SWITCHDEV_OBJ_IPV4_FIB: - fib4 = &obj->ipv4_fib; + fib4 = &obj->u.ipv4_fib; err = rocker_port_fib_ipv4(rocker_port, SWITCHDEV_TRANS_NONE, htonl(fib4->dst), fib4->dst_len, fib4->fi, fib4->tb_id, diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 9f9a7cc573c3..ea5b1c230d3d 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -38,7 +38,7 @@ struct switchdev_attr { struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ u8 stp_state; /* PORT_STP_STATE */ unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ - }; + } u; }; struct fib_info; @@ -67,7 +67,7 @@ struct switchdev_obj { u32 nlflags; u32 tb_id; } ipv4_fib; - }; + } u; }; /** diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index b9300da31565..45f1ff113af9 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -41,7 +41,7 @@ void br_set_state(struct net_bridge_port *p, unsigned int state) { struct switchdev_attr attr = { .id = SWITCHDEV_ATTR_PORT_STP_STATE, - .stp_state = state, + .u.stp_state = state, }; int err; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 5a9ce96f6d27..18b34d771ed4 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -465,8 +465,8 @@ static ssize_t phys_switch_id_show(struct device *dev, ret = switchdev_port_attr_get(netdev, &attr); if (!ret) - ret = sprintf(buf, "%*phN\n", attr.ppid.id_len, - attr.ppid.id); + ret = sprintf(buf, "%*phN\n", attr.u.ppid.id_len, + attr.u.ppid.id); } rtnl_unlock(); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c6c6b2c34926..141ccc357e2e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1016,7 +1016,8 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) return err; } - if (nla_put(skb, IFLA_PHYS_SWITCH_ID, attr.ppid.id_len, attr.ppid.id)) + if (nla_put(skb, IFLA_PHYS_SWITCH_ID, attr.u.ppid.id_len, + attr.u.ppid.id)) return -EMSGSIZE; return 0; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 3fb5210e318c..04ffad311704 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -353,7 +353,7 @@ static int dsa_slave_port_attr_set(struct net_device *dev, switch (attr->id) { case SWITCHDEV_ATTR_PORT_STP_STATE: if (attr->trans == SWITCHDEV_TRANS_COMMIT) - ret = dsa_slave_stp_update(dev, attr->stp_state); + ret = dsa_slave_stp_update(dev, attr->u.stp_state); break; default: ret = -EOPNOTSUPP; @@ -408,8 +408,8 @@ static int dsa_slave_port_attr_get(struct net_device *dev, switch (attr->id) { case SWITCHDEV_ATTR_PORT_PARENT_ID: - attr->ppid.id_len = sizeof(ds->index); - memcpy(&attr->ppid.id, &ds->index, attr->ppid.id_len); + attr->u.ppid.id_len = sizeof(ds->index); + memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len); break; default: return -EOPNOTSUPP; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 77f1b6e3f78e..0409f9b5bdbc 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -383,7 +383,7 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return err; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, - attr.brport_flags, mask, nlflags); + attr.u.brport_flags, mask, nlflags); } EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink); @@ -402,9 +402,9 @@ static int switchdev_port_br_setflag(struct net_device *dev, return err; if (flag) - attr.brport_flags |= brport_flag; + attr.u.brport_flags |= brport_flag; else - attr.brport_flags &= ~brport_flag; + attr.u.brport_flags &= ~brport_flag; return switchdev_port_attr_set(dev, &attr); } @@ -466,6 +466,7 @@ static int switchdev_port_br_afspec(struct net_device *dev, struct switchdev_obj obj = { .id = SWITCHDEV_OBJ_PORT_VLAN, }; + struct switchdev_obj_vlan *vlan = &obj.u.vlan; int rem; int err; @@ -475,30 +476,30 @@ static int switchdev_port_br_afspec(struct net_device *dev, if (nla_len(attr) != sizeof(struct bridge_vlan_info)) return -EINVAL; vinfo = nla_data(attr); - obj.vlan.flags = vinfo->flags; + vlan->flags = vinfo->flags; if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { - if (obj.vlan.vid_start) + if (vlan->vid_start) return -EINVAL; - obj.vlan.vid_start = vinfo->vid; + vlan->vid_start = vinfo->vid; } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) { - if (!obj.vlan.vid_start) + if (!vlan->vid_start) return -EINVAL; - obj.vlan.vid_end = vinfo->vid; - if (obj.vlan.vid_end <= obj.vlan.vid_start) + vlan->vid_end = vinfo->vid; + if (vlan->vid_end <= vlan->vid_start) return -EINVAL; err = f(dev, &obj); if (err) return err; - memset(&obj.vlan, 0, sizeof(obj.vlan)); + memset(vlan, 0, sizeof(*vlan)); } else { - if (obj.vlan.vid_start) + if (vlan->vid_start) return -EINVAL; - obj.vlan.vid_start = vinfo->vid; - obj.vlan.vid_end = vinfo->vid; + vlan->vid_start = vinfo->vid; + vlan->vid_end = vinfo->vid; err = f(dev, &obj); if (err) return err; - memset(&obj.vlan, 0, sizeof(obj.vlan)); + memset(vlan, 0, sizeof(*vlan)); } } @@ -613,10 +614,10 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) return NULL; if (nhsel > 0) { - if (prev_attr.ppid.id_len != attr.ppid.id_len) + if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len) return NULL; - if (memcmp(prev_attr.ppid.id, attr.ppid.id, - attr.ppid.id_len)) + if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id, + attr.u.ppid.id_len)) return NULL; } @@ -644,7 +645,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, { struct switchdev_obj fib_obj = { .id = SWITCHDEV_OBJ_IPV4_FIB, - .ipv4_fib = { + .u.ipv4_fib = { .dst = dst, .dst_len = dst_len, .fi = fi, @@ -698,7 +699,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, { struct switchdev_obj fib_obj = { .id = SWITCHDEV_OBJ_IPV4_FIB, - .ipv4_fib = { + .u.ipv4_fib = { .dst = dst, .dst_len = dst_len, .fi = fi, -- cgit v1.2.3 From e578d9c02587d57bfa7b560767c698a668a468c6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 May 2015 19:50:41 +0200 Subject: net: sched: use counter to break reclassify loops Seems all we want here is to avoid endless 'goto reclassify' loop. tc_classify_compat even resets this counter when something other than TC_ACT_RECLASSIFY is returned, so this skb-counter doesn't break hypothetical loops induced by something other than perpetual TC_ACT_RECLASSIFY return values. skb_act_clone is now identical to skb_clone, so just use that. Tested with following (bogus) filter: tc filter add dev eth0 parent ffff: \ protocol ip u32 match u32 0 0 police rate 10Kbit burst \ 64000 mtu 1500 action reclassify Acked-by: Daniel Borkmann Signed-off-by: Florian Westphal Acked-by: Alexei Starovoitov Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- Documentation/networking/tc-actions-env-rules.txt | 4 ---- include/net/sch_generic.h | 15 --------------- include/uapi/linux/pkt_cls.h | 2 +- net/sched/act_mirred.c | 2 +- net/sched/sch_api.c | 12 +++--------- 5 files changed, 5 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/Documentation/networking/tc-actions-env-rules.txt b/Documentation/networking/tc-actions-env-rules.txt index 95c71716b2e2..f37814693ad3 100644 --- a/Documentation/networking/tc-actions-env-rules.txt +++ b/Documentation/networking/tc-actions-env-rules.txt @@ -8,10 +8,6 @@ For example if your action queues a packet to be processed later, or intentionally branches by redirecting a packet, then you need to clone the packet. -There are certain fields in the skb tc_verd that need to be reset so we -avoid loops, etc. A few are generic enough that skb_act_clone() -resets them for you, so invoke skb_act_clone() rather than skb_clone(). - 2) If you munge any packet thou shalt call pskb_expand_head in the case someone else is referencing the skb. After that you "own" the skb. diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 1b0a2e88ed2b..2738f6f87908 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -739,21 +739,6 @@ static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen) return rtab->data[slot]; } -#ifdef CONFIG_NET_CLS_ACT -static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask, - int action) -{ - struct sk_buff *n; - - n = skb_clone(skb, gfp_mask); - - if (n) { - n->tc_verd = SET_TC_VERD(n->tc_verd, 0); - } - return n; -} -#endif - struct psched_ratecfg { u64 rate_bytes_ps; /* bytes per second */ u32 mult; diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 596ffa0c7084..ffc112c8e1c2 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -44,13 +44,13 @@ bits 9,10,11: redirect counter - redirect TTL. Loop avoidance #define TC_OK2MUNGE _TC_MAKEMASK1(1) #define SET_TC_OK2MUNGE(v) ( TC_OK2MUNGE | (v & ~TC_OK2MUNGE)) #define CLR_TC_OK2MUNGE(v) ( v & ~TC_OK2MUNGE) -#endif #define S_TC_VERD _TC_MAKE32(2) #define M_TC_VERD _TC_MAKEMASK(4,S_TC_VERD) #define G_TC_VERD(x) _TC_GETVALUE(x,S_TC_VERD,M_TC_VERD) #define V_TC_VERD(x) _TC_MAKEVALUE(x,S_TC_VERD) #define SET_TC_VERD(v,n) ((V_TC_VERD(n)) | (v & ~M_TC_VERD)) +#endif #define S_TC_FROM _TC_MAKE32(6) #define M_TC_FROM _TC_MAKEMASK(2,S_TC_FROM) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 3f63ceac8e01..a42a3b257226 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -151,7 +151,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, } at = G_TC_AT(skb->tc_verd); - skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action); + skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2 == NULL) goto out; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ad9eed70bc8f..0b74dc0ede9c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1816,13 +1816,8 @@ int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, continue; err = tp->classify(skb, tp, res); - if (err >= 0) { -#ifdef CONFIG_NET_CLS_ACT - if (err != TC_ACT_RECLASSIFY && skb->tc_verd) - skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0); -#endif + if (err >= 0) return err; - } } return -1; } @@ -1834,23 +1829,22 @@ int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, int err = 0; #ifdef CONFIG_NET_CLS_ACT const struct tcf_proto *otp = tp; + int limit = 0; reclassify: #endif err = tc_classify_compat(skb, tp, res); #ifdef CONFIG_NET_CLS_ACT if (err == TC_ACT_RECLASSIFY) { - u32 verd = G_TC_VERD(skb->tc_verd); tp = otp; - if (verd++ >= MAX_REC_LOOP) { + if (unlikely(limit++ >= MAX_REC_LOOP)) { net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n", tp->q->ops->id, tp->prio & 0xffff, ntohs(tp->protocol)); return TC_ACT_SHOT; } - skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); goto reclassify; } #endif -- cgit v1.2.3 From 1bd758eb1cab2fa5b71a23f9e5d3c8076f4ed650 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:07 +0200 Subject: net: change name of flow_dissector header to match the .c file name add couple of empty lines on the way. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- drivers/net/ethernet/cisco/enic/enic_clsf.c | 2 +- include/linux/skbuff.h | 2 +- include/net/flow_dissector.h | 68 +++++++++++++++++++++++++++++ include/net/flow_keys.h | 61 -------------------------- include/net/ip.h | 2 +- include/net/ipv6.h | 2 +- net/core/flow_dissector.c | 2 +- net/sched/cls_flow.c | 2 +- net/sched/sch_choke.c | 2 +- 10 files changed, 76 insertions(+), 69 deletions(-) create mode 100644 include/net/flow_dissector.h delete mode 100644 include/net/flow_keys.h (limited to 'include') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a2e25de98bde..fe7c38721775 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -76,7 +76,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c index 0be6850be8a2..380f04903a36 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.c +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include "enic_res.h" #include "enic_clsf.h" diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c0b574a414e7..e35c2b13d434 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -34,7 +34,7 @@ #include #include #include -#include +#include /* A. Checksumming of received packets by device. * diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h new file mode 100644 index 000000000000..5e99a7b3963c --- /dev/null +++ b/include/net/flow_dissector.h @@ -0,0 +1,68 @@ +#ifndef _NET_FLOW_DISSECTOR_H +#define _NET_FLOW_DISSECTOR_H + +/* struct flow_keys: + * @src: source ip address in case of IPv4 + * For IPv6 it contains 32bit hash of src address + * @dst: destination ip address in case of IPv4 + * For IPv6 it contains 32bit hash of dst address + * @ports: port numbers of Transport header + * port16[0]: src port number + * port16[1]: dst port number + * @thoff: Transport header offset + * @n_proto: Network header protocol (eg. IPv4/IPv6) + * @ip_proto: Transport header protocol (eg. TCP/UDP) + * All the members, except thoff, are in network byte order. + */ +struct flow_keys { + /* (src,dst) must be grouped, in the same way than in IP header */ + __be32 src; + __be32 dst; + union { + __be32 ports; + __be16 port16[2]; + }; + u16 thoff; + __be16 n_proto; + u8 ip_proto; +}; + +bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, + void *data, __be16 proto, int nhoff, int hlen); + +static inline bool skb_flow_dissect(const struct sk_buff *skb, + struct flow_keys *flow) +{ + return __skb_flow_dissect(skb, flow, NULL, 0, 0, 0); +} + +__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, + void *data, int hlen_proto); + +static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, + int thoff, u8 ip_proto) +{ + return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); +} + +u32 flow_hash_from_keys(struct flow_keys *keys); + +unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len, + __be16 protocol); + +/* struct flow_keys_digest: + * + * This structure is used to hold a digest of the full flow keys. This is a + * larger "hash" of a flow to allow definitively matching specific flows where + * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so + * that it can by used in CB of skb (see sch_choke for an example). + */ +#define FLOW_KEYS_DIGEST_LEN 16 +struct flow_keys_digest { + u8 data[FLOW_KEYS_DIGEST_LEN]; +}; + +void make_flow_keys_digest(struct flow_keys_digest *digest, + const struct flow_keys *flow); + +#endif diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h deleted file mode 100644 index 6d6ef626811a..000000000000 --- a/include/net/flow_keys.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _NET_FLOW_KEYS_H -#define _NET_FLOW_KEYS_H - -/* struct flow_keys: - * @src: source ip address in case of IPv4 - * For IPv6 it contains 32bit hash of src address - * @dst: destination ip address in case of IPv4 - * For IPv6 it contains 32bit hash of dst address - * @ports: port numbers of Transport header - * port16[0]: src port number - * port16[1]: dst port number - * @thoff: Transport header offset - * @n_proto: Network header protocol (eg. IPv4/IPv6) - * @ip_proto: Transport header protocol (eg. TCP/UDP) - * All the members, except thoff, are in network byte order. - */ -struct flow_keys { - /* (src,dst) must be grouped, in the same way than in IP header */ - __be32 src; - __be32 dst; - union { - __be32 ports; - __be16 port16[2]; - }; - u16 thoff; - __be16 n_proto; - u8 ip_proto; -}; - -bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, - void *data, __be16 proto, int nhoff, int hlen); -static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) -{ - return __skb_flow_dissect(skb, flow, NULL, 0, 0, 0); -} -__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, - void *data, int hlen_proto); -static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto) -{ - return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); -} -u32 flow_hash_from_keys(struct flow_keys *keys); -unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len, - __be16 protocol); - -/* struct flow_keys_digest: - * - * This structure is used to hold a digest of the full flow keys. This is a - * larger "hash" of a flow to allow definitively matching specific flows where - * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so - * that it can by used in CB of skb (see sch_choke for an example). - */ -#define FLOW_KEYS_DIGEST_LEN 16 -struct flow_keys_digest { - u8 data[FLOW_KEYS_DIGEST_LEN]; -}; - -void make_flow_keys_digest(struct flow_keys_digest *digest, - const struct flow_keys *flow); - -#endif diff --git a/include/net/ip.h b/include/net/ip.h index d14af7edd197..562eb653aebc 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -31,7 +31,7 @@ #include #include #include -#include +#include struct sock; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 53d25ef1699a..9932b86394a9 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #define SIN6_LEN_RFC2133 24 diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d3acc4dff4ae..eaa86b539221 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include /* copy saddr & daddr, possibly using 64bit load/store diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index a620c4e288a5..4c5a92298906 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index dfe3da75594c..2624e991a2d1 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include /* CHOKe stateless AQM for fair bandwidth allocation -- cgit v1.2.3 From b0a31431b4d81fb1ccc36ce64ce3fe6a0aca4031 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:08 +0200 Subject: flow_dissector: remove unused function flow_get_hlen declaration commit 56193d1bce ("net: Add function for parsing the header length out of linear ethernet frames") added this function declaration but it is defined nowhere. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 5e99a7b3963c..118ae6926a72 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -47,9 +47,6 @@ static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, u32 flow_hash_from_keys(struct flow_keys *keys); -unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len, - __be16 protocol); - /* struct flow_keys_digest: * * This structure is used to hold a digest of the full flow keys. This is a -- cgit v1.2.3 From 10b89ee43e849544eddfe34e535341fc077464ec Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:09 +0200 Subject: net: move *skb_get_poff declarations into correct header Since these functions are defined in flow_dissector.c, move header declarations from skbuff.h into flow_dissector.h Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ---- include/net/flow_dissector.h | 3 +++ net/core/filter.c | 1 + net/ethernet/eth.c | 1 + 4 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e35c2b13d434..17607ab9e7a2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3424,10 +3424,6 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, unsigned int transport_len, __sum16(*skb_chkf)(struct sk_buff *skb)); -u32 skb_get_poff(const struct sk_buff *skb); -u32 __skb_get_poff(const struct sk_buff *skb, void *data, - const struct flow_keys *keys, int hlen); - /** * skb_head_is_locked - Determine if the skb->head is locked down * @skb: skb to check diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 118ae6926a72..4570ccafe59d 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -46,6 +46,9 @@ static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, } u32 flow_hash_from_keys(struct flow_keys *keys); +u32 skb_get_poff(const struct sk_buff *skb); +u32 __skb_get_poff(const struct sk_buff *skb, void *data, + const struct flow_keys *keys, int hlen); /* struct flow_keys_digest: * diff --git a/net/core/filter.c b/net/core/filter.c index a831f193e2c7..6805717be614 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 9045e2a1108f..9332a0ab0698 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -58,6 +58,7 @@ #include #include #include +#include #include __setup("ether=", netdev_boot_setup); -- cgit v1.2.3 From 9c684b5083bc191c4b7b189c73d75587e167a474 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:11 +0200 Subject: net: move __skb_get_hash function declaration to flow_dissector.h Since the definition of the function is in flow_dissector.c, it makes sense to have the declaration in flow_dissector.h Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 - include/net/flow_dissector.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 17607ab9e7a2..ae2d1b7769d8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -918,7 +918,6 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type) skb->hash = hash; } -void __skb_get_hash(struct sk_buff *skb); static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 4570ccafe59d..e4ee761ca8c0 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -46,6 +46,7 @@ static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, } u32 flow_hash_from_keys(struct flow_keys *keys); +void __skb_get_hash(struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen); -- cgit v1.2.3 From 5605c76240aadc823e3d46ac9afde2f26fbcf019 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:12 +0200 Subject: net: move __skb_tx_hash to dev.c __skb_tx_hash function has no relation to flow_dissect so just move it to dev.c Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ include/linux/skbuff.h | 3 --- net/core/dev.c | 28 ++++++++++++++++++++++++++++ net/core/flow_dissector.c | 28 ---------------------------- 4 files changed, 31 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cd0951c1893d..d3ed01c18247 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2832,6 +2832,9 @@ static inline int netif_set_xps_queue(struct net_device *dev, } #endif +u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, + unsigned int num_tx_queues); + /* * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used * as a distribution range limit for the returned value. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ae2d1b7769d8..b01c7fba7c17 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3299,9 +3299,6 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) return skb->queue_mapping != 0; } -u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, - unsigned int num_tx_queues); - static inline struct sec_path *skb_sec_path(struct sk_buff *skb) { #ifdef CONFIG_XFRM diff --git a/net/core/dev.c b/net/core/dev.c index 90a568a150b4..d044d2f8532b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2350,6 +2350,34 @@ void netif_device_attach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_attach); +/* + * Returns a Tx hash based on the given packet descriptor a Tx queues' number + * to be used as a distribution range. + */ +u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, + unsigned int num_tx_queues) +{ + u32 hash; + u16 qoffset = 0; + u16 qcount = num_tx_queues; + + if (skb_rx_queue_recorded(skb)) { + hash = skb_get_rx_queue(skb); + while (unlikely(hash >= num_tx_queues)) + hash -= num_tx_queues; + return hash; + } + + if (dev->num_tc) { + u8 tc = netdev_get_prio_tc_map(dev, skb->priority); + qoffset = dev->tc_to_txq[tc].offset; + qcount = dev->tc_to_txq[tc].count; + } + + return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; +} +EXPORT_SYMBOL(__skb_tx_hash); + static void skb_warn_bad_offload(const struct sk_buff *skb) { static const netdev_features_t null_features = 0; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 0d9bc3a586ba..07ca11d06339 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -371,34 +371,6 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) } EXPORT_SYMBOL(skb_get_hash_perturb); -/* - * Returns a Tx hash based on the given packet descriptor a Tx queues' number - * to be used as a distribution range. - */ -u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, - unsigned int num_tx_queues) -{ - u32 hash; - u16 qoffset = 0; - u16 qcount = num_tx_queues; - - if (skb_rx_queue_recorded(skb)) { - hash = skb_get_rx_queue(skb); - while (unlikely(hash >= num_tx_queues)) - hash -= num_tx_queues; - return hash; - } - - if (dev->num_tc) { - u8 tc = netdev_get_prio_tc_map(dev, skb->priority); - qoffset = dev->tc_to_txq[tc].offset; - qcount = dev->tc_to_txq[tc].count; - } - - return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; -} -EXPORT_SYMBOL(__skb_tx_hash); - u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen) { -- cgit v1.2.3 From fbff949e3bc7f3f7d9e8b3ef4855ec7138276a25 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:15 +0200 Subject: flow_dissector: introduce programable flow_dissector Introduce dissector infrastructure which allows user to specify which parts of skb he wants to dissect. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 61 ++++++++++++++++++++++++++++++++++++++++++++ net/core/flow_dissector.c | 48 ++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index e4ee761ca8c0..20239e807f77 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -1,6 +1,64 @@ #ifndef _NET_FLOW_DISSECTOR_H #define _NET_FLOW_DISSECTOR_H +/** + * struct flow_dissector_key_basic: + * @thoff: Transport header offset + * @n_proto: Network header protocol (eg. IPv4/IPv6) + * @ip_proto: Transport header protocol (eg. TCP/UDP) + */ +struct flow_dissector_key_basic { + u16 thoff; + __be16 n_proto; + u8 ip_proto; +}; + +/** + * struct flow_dissector_key_addrs: + * @src: source ip address in case of IPv4 + * For IPv6 it contains 32bit hash of src address + * @dst: destination ip address in case of IPv4 + * For IPv6 it contains 32bit hash of dst address + */ +struct flow_dissector_key_addrs { + /* (src,dst) must be grouped, in the same way than in IP header */ + __be32 src; + __be32 dst; +}; + +/** + * flow_dissector_key_tp_ports: + * @ports: port numbers of Transport header + * port16[0]: src port number + * port16[1]: dst port number + */ +struct flow_dissector_key_ports { + union { + __be32 ports; + __be16 port16[2]; + }; +}; + +enum flow_dissector_key_id { + FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ + FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */ + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ + FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ + + FLOW_DISSECTOR_KEY_MAX, +}; + +struct flow_dissector_key { + enum flow_dissector_key_id key_id; + size_t offset; /* offset of struct flow_dissector_key_* + in target the struct */ +}; + +struct flow_dissector { + unsigned int used_keys; /* each bit repesents presence of one key id */ + unsigned short int offset[FLOW_DISSECTOR_KEY_MAX]; +}; + /* struct flow_keys: * @src: source ip address in case of IPv4 * For IPv6 it contains 32bit hash of src address @@ -27,6 +85,9 @@ struct flow_keys { u8 ip_proto; }; +void skb_flow_dissector_init(struct flow_dissector *flow_dissector, + const struct flow_dissector_key *key, + unsigned int key_count); bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, void *data, __be16 proto, int nhoff, int hlen); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d885e282908e..6883e22b8fc4 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -15,6 +16,53 @@ #include #include +static bool skb_flow_dissector_uses_key(struct flow_dissector *flow_dissector, + enum flow_dissector_key_id key_id) +{ + return flow_dissector->used_keys & (1 << key_id); +} + +static void skb_flow_dissector_set_key(struct flow_dissector *flow_dissector, + enum flow_dissector_key_id key_id) +{ + flow_dissector->used_keys |= (1 << key_id); +} + +static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector, + enum flow_dissector_key_id key_id, + void *target_container) +{ + return ((char *) target_container) + flow_dissector->offset[key_id]; +} + +void skb_flow_dissector_init(struct flow_dissector *flow_dissector, + const struct flow_dissector_key *key, + unsigned int key_count) +{ + unsigned int i; + + memset(flow_dissector, 0, sizeof(*flow_dissector)); + + for (i = 0; i < key_count; i++, key++) { + /* User should make sure that every key target offset is withing + * boundaries of unsigned short. + */ + BUG_ON(key->offset > USHRT_MAX); + BUG_ON(skb_flow_dissector_uses_key(flow_dissector, + key->key_id)); + + skb_flow_dissector_set_key(flow_dissector, key->key_id); + flow_dissector->offset[key->key_id] = key->offset; + } + + /* Ensure that the dissector always includes basic key. That way + * we are able to avoid handling lack of it in fast path. + */ + BUG_ON(!skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_BASIC)); +} +EXPORT_SYMBOL(skb_flow_dissector_init); + /* copy saddr & daddr, possibly using 64bit load/store * Equivalent to : flow->src = iph->saddr; * flow->dst = iph->daddr; -- cgit v1.2.3 From 06635a35d13d42b95422bba6633f175245cc644e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:16 +0200 Subject: flow_dissect: use programable dissector in skb_flow_dissect and friends Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 18 +-- drivers/net/ethernet/cisco/enic/enic_clsf.c | 27 ++-- drivers/net/ethernet/cisco/enic/enic_ethtool.c | 10 +- drivers/net/hyperv/netvsc_drv.c | 8 +- include/linux/skbuff.h | 4 +- include/net/flow_dissector.h | 63 ++++---- include/net/ip.h | 8 +- include/net/ipv6.h | 8 +- net/core/flow_dissector.c | 199 +++++++++++++++++-------- net/ethernet/eth.c | 6 +- net/sched/cls_flow.c | 20 +-- net/sched/sch_choke.c | 4 +- 12 files changed, 229 insertions(+), 146 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index fe7c38721775..ef26e0147050 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3051,16 +3051,16 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, int noff, proto = -1; if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) - return skb_flow_dissect(skb, fk); + return skb_flow_dissect_flow_keys(skb, fk); - fk->ports = 0; + fk->ports.ports = 0; noff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP)) { if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph)))) return false; iph = ip_hdr(skb); - fk->src = iph->saddr; - fk->dst = iph->daddr; + fk->addrs.src = iph->saddr; + fk->addrs.dst = iph->daddr; noff += iph->ihl << 2; if (!ip_is_fragment(iph)) proto = iph->protocol; @@ -3068,15 +3068,15 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6)))) return false; iph6 = ipv6_hdr(skb); - fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr); - fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); + fk->addrs.src = (__force __be32)ipv6_addr_hash(&iph6->saddr); + fk->addrs.dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); noff += sizeof(*iph6); proto = iph6->nexthdr; } else { return false; } if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) - fk->ports = skb_flow_get_ports(skb, noff, proto); + fk->ports.ports = skb_flow_get_ports(skb, noff, proto); return true; } @@ -3102,8 +3102,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) hash = bond_eth_hash(skb); else - hash = (__force u32)flow.ports; - hash ^= (__force u32)flow.dst ^ (__force u32)flow.src; + hash = (__force u32)flow.ports.ports; + hash ^= (__force u32)flow.addrs.dst ^ (__force u32)flow.addrs.src; hash ^= (hash >> 16); hash ^= (hash >> 8); diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c index 380f04903a36..d3d25c7e5b96 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.c +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c @@ -22,7 +22,7 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq) int res; struct filter data; - switch (keys->ip_proto) { + switch (keys->basic.ip_proto) { case IPPROTO_TCP: data.u.ipv4.protocol = PROTO_TCP; break; @@ -33,10 +33,10 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq) return -EPROTONOSUPPORT; }; data.type = FILTER_IPV4_5TUPLE; - data.u.ipv4.src_addr = ntohl(keys->src); - data.u.ipv4.dst_addr = ntohl(keys->dst); - data.u.ipv4.src_port = ntohs(keys->port16[0]); - data.u.ipv4.dst_port = ntohs(keys->port16[1]); + data.u.ipv4.src_addr = ntohl(keys->addrs.src); + data.u.ipv4.dst_addr = ntohl(keys->addrs.dst); + data.u.ipv4.src_port = ntohs(keys->ports.port16[0]); + data.u.ipv4.dst_port = ntohs(keys->ports.port16[1]); data.u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE; spin_lock_bh(&enic->devcmd_lock); @@ -158,11 +158,11 @@ static struct enic_rfs_fltr_node *htbl_key_search(struct hlist_head *h, struct enic_rfs_fltr_node *tpos; hlist_for_each_entry(tpos, h, node) - if (tpos->keys.src == k->src && - tpos->keys.dst == k->dst && - tpos->keys.ports == k->ports && - tpos->keys.ip_proto == k->ip_proto && - tpos->keys.n_proto == k->n_proto) + if (tpos->keys.addrs.src == k->addrs.src && + tpos->keys.addrs.dst == k->addrs.dst && + tpos->keys.ports.ports == k->ports.ports && + tpos->keys.basic.ip_proto == k->basic.ip_proto && + tpos->keys.basic.n_proto == k->basic.n_proto) return tpos; return NULL; } @@ -177,9 +177,10 @@ int enic_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, int res, i; enic = netdev_priv(dev); - res = skb_flow_dissect(skb, &keys); - if (!res || keys.n_proto != htons(ETH_P_IP) || - (keys.ip_proto != IPPROTO_TCP && keys.ip_proto != IPPROTO_UDP)) + res = skb_flow_dissect_flow_keys(skb, &keys); + if (!res || keys.basic.n_proto != htons(ETH_P_IP) || + (keys.basic.ip_proto != IPPROTO_TCP && + keys.basic.ip_proto != IPPROTO_UDP)) return -EPROTONOSUPPORT; tbl_idx = skb_get_hash_raw(skb) & ENIC_RFS_FLW_MASK; diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index 28d9ca675a27..7588f8dcc890 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -334,7 +334,7 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd) n = htbl_fltr_search(enic, (u16)fsp->location); if (!n) return -EINVAL; - switch (n->keys.ip_proto) { + switch (n->keys.basic.ip_proto) { case IPPROTO_TCP: fsp->flow_type = TCP_V4_FLOW; break; @@ -346,16 +346,16 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd) break; } - fsp->h_u.tcp_ip4_spec.ip4src = n->keys.src; + fsp->h_u.tcp_ip4_spec.ip4src = n->keys.addrs.src; fsp->m_u.tcp_ip4_spec.ip4src = (__u32)~0; - fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.dst; + fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.addrs.dst; fsp->m_u.tcp_ip4_spec.ip4dst = (__u32)~0; - fsp->h_u.tcp_ip4_spec.psrc = n->keys.port16[0]; + fsp->h_u.tcp_ip4_spec.psrc = n->keys.ports.port16[0]; fsp->m_u.tcp_ip4_spec.psrc = (__u16)~0; - fsp->h_u.tcp_ip4_spec.pdst = n->keys.port16[1]; + fsp->h_u.tcp_ip4_spec.pdst = n->keys.ports.port16[1]; fsp->m_u.tcp_ip4_spec.pdst = (__u16)~0; fsp->ring_cookie = n->rq_id; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5993c7e2d723..8e5fe888a0ec 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -196,12 +196,12 @@ static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb) struct flow_keys flow; int data_len; - if (!skb_flow_dissect(skb, &flow) || - !(flow.n_proto == htons(ETH_P_IP) || - flow.n_proto == htons(ETH_P_IPV6))) + if (!skb_flow_dissect_flow_keys(skb, &flow) || + !(flow.basic.n_proto == htons(ETH_P_IP) || + flow.basic.n_proto == htons(ETH_P_IPV6))) return false; - if (flow.ip_proto == IPPROTO_TCP) + if (flow.basic.ip_proto == IPPROTO_TCP) data_len = 12; else data_len = 8; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b01c7fba7c17..f83aa6568cbf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1935,8 +1935,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb, if (skb_transport_header_was_set(skb)) return; - else if (skb_flow_dissect(skb, &keys)) - skb_set_transport_header(skb, keys.thoff); + else if (skb_flow_dissect_flow_keys(skb, &keys)) + skb_set_transport_header(skb, keys.basic.thoff); else skb_set_transport_header(skb, offset_hint); } diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 20239e807f77..0c8d406fb730 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -59,42 +59,47 @@ struct flow_dissector { unsigned short int offset[FLOW_DISSECTOR_KEY_MAX]; }; -/* struct flow_keys: - * @src: source ip address in case of IPv4 - * For IPv6 it contains 32bit hash of src address - * @dst: destination ip address in case of IPv4 - * For IPv6 it contains 32bit hash of dst address - * @ports: port numbers of Transport header - * port16[0]: src port number - * port16[1]: dst port number - * @thoff: Transport header offset - * @n_proto: Network header protocol (eg. IPv4/IPv6) - * @ip_proto: Transport header protocol (eg. TCP/UDP) - * All the members, except thoff, are in network byte order. - */ -struct flow_keys { - /* (src,dst) must be grouped, in the same way than in IP header */ - __be32 src; - __be32 dst; - union { - __be32 ports; - __be16 port16[2]; - }; - u16 thoff; - __be16 n_proto; - u8 ip_proto; -}; - void skb_flow_dissector_init(struct flow_dissector *flow_dissector, const struct flow_dissector_key *key, unsigned int key_count); -bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, + +bool __skb_flow_dissect(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, __be16 proto, int nhoff, int hlen); static inline bool skb_flow_dissect(const struct sk_buff *skb, - struct flow_keys *flow) + struct flow_dissector *flow_dissector, + void *target_container) +{ + return __skb_flow_dissect(skb, flow_dissector, target_container, + NULL, 0, 0, 0); +} + +struct flow_keys { + struct flow_dissector_key_addrs addrs; + struct flow_dissector_key_ports ports; + struct flow_dissector_key_basic basic; +}; + +extern struct flow_dissector flow_keys_dissector; +extern struct flow_dissector flow_keys_buf_dissector; + +static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, + struct flow_keys *flow) +{ + memset(flow, 0, sizeof(*flow)); + return __skb_flow_dissect(skb, &flow_keys_dissector, flow, + NULL, 0, 0, 0); +} + +static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow, + void *data, __be16 proto, + int nhoff, int hlen) { - return __skb_flow_dissect(skb, flow, NULL, 0, 0, 0); + memset(flow, 0, sizeof(*flow)); + return __skb_flow_dissect(NULL, &flow_keys_buf_dissector, flow, + data, proto, nhoff, hlen); } __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, diff --git a/include/net/ip.h b/include/net/ip.h index 562eb653aebc..b0443d4fe13f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -360,10 +360,10 @@ static inline void inet_set_txhash(struct sock *sk) struct inet_sock *inet = inet_sk(sk); struct flow_keys keys; - keys.src = inet->inet_saddr; - keys.dst = inet->inet_daddr; - keys.port16[0] = inet->inet_sport; - keys.port16[1] = inet->inet_dport; + keys.addrs.src = inet->inet_saddr; + keys.addrs.dst = inet->inet_daddr; + keys.ports.port16[0] = inet->inet_sport; + keys.ports.port16[1] = inet->inet_dport; sk->sk_txhash = flow_hash_from_keys(&keys); } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 9932b86394a9..9eed9761dfce 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -698,10 +698,10 @@ static inline void ip6_set_txhash(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct flow_keys keys; - keys.src = (__force __be32)ipv6_addr_hash(&np->saddr); - keys.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); - keys.port16[0] = inet->inet_sport; - keys.port16[1] = inet->inet_dport; + keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr); + keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); + keys.ports.port16[0] = inet->inet_sport; + keys.ports.port16[1] = inet->inet_dport; sk->sk_txhash = flow_hash_from_keys(&keys); } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 6883e22b8fc4..6a49acaa6651 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -63,17 +64,6 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, } EXPORT_SYMBOL(skb_flow_dissector_init); -/* copy saddr & daddr, possibly using 64bit load/store - * Equivalent to : flow->src = iph->saddr; - * flow->dst = iph->daddr; - */ -static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph) -{ - BUILD_BUG_ON(offsetof(typeof(*flow), dst) != - offsetof(typeof(*flow), src) + sizeof(flow->src)); - memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); -} - /** * __skb_flow_get_ports - extract the upper layer ports and return them * @skb: sk_buff to extract the ports from @@ -111,17 +101,27 @@ EXPORT_SYMBOL(__skb_flow_get_ports); /** * __skb_flow_dissect - extract the flow_keys struct and return it * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified + * @flow_dissector: list of keys to dissect + * @target_container: target structure to put dissected values into * @data: raw buffer pointer to the packet, if NULL use skb->data * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) * @hlen: packet header length, if @data is NULL use skb_headlen(skb) * - * The function will try to retrieve the struct flow_keys from either the skbuff - * or a raw buffer specified by the rest parameters + * The function will try to retrieve individual keys into target specified + * by flow_dissector from either the skbuff or a raw buffer specified by the + * rest parameters. + * + * Caller must take care of zeroing target container memory. */ -bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, +bool __skb_flow_dissect(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, __be16 proto, int nhoff, int hlen) { + struct flow_dissector_key_basic *key_basic; + struct flow_dissector_key_addrs *key_addrs; + struct flow_dissector_key_ports *key_ports; u8 ip_proto; if (!data) { @@ -131,7 +131,12 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, hlen = skb_headlen(skb); } - memset(flow, 0, sizeof(*flow)); + /* It is ensured by skb_flow_dissector_init() that basic key will + * be always present. + */ + key_basic = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_BASIC, + target_container); again: switch (proto) { @@ -148,14 +153,13 @@ ip: if (ip_is_fragment(iph)) ip_proto = 0; - /* skip the address processing if skb is NULL. The assumption - * here is that if there is no skb we are not looking for flow - * info but lengths and protocols. - */ - if (!skb) + if (!skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS)) break; - - iph_to_flow_copy_addrs(flow, iph); + key_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + target_container); + memcpy(key_addrs, &iph->saddr, sizeof(*key_addrs)); break; } case htons(ETH_P_IPV6): { @@ -171,12 +175,15 @@ ipv6: ip_proto = iph->nexthdr; nhoff += sizeof(struct ipv6hdr); - /* see comment above in IPv4 section */ - if (!skb) + if (!skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) break; + key_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, + target_container); - flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); - flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); + key_addrs->src = (__force __be32)ipv6_addr_hash(&iph->saddr); + key_addrs->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); flow_label = ip6_flowlabel(iph); if (flow_label) { @@ -184,10 +191,18 @@ ipv6: * use that to represent the ports without any * further dissection. */ - flow->n_proto = proto; - flow->ip_proto = ip_proto; - flow->ports = flow_label; - flow->thoff = (u16)nhoff; + + key_basic->n_proto = proto; + key_basic->ip_proto = ip_proto; + key_basic->thoff = (u16)nhoff; + + if (!skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS)) + break; + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS, + target_container); + key_ports->ports = flow_label; return true; } @@ -234,14 +249,22 @@ ipv6: hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) return false; - flow->src = hdr->srcnode; - flow->dst = 0; - flow->n_proto = proto; - flow->thoff = (u16)nhoff; + key_basic->n_proto = proto; + key_basic->thoff = (u16)nhoff; + + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) { + return true; + key_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, + target_container); + key_addrs->src = hdr->srcnode; + key_addrs->dst = 0; + } return true; } case htons(ETH_P_FCOE): - flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN); + key_basic->thoff = (u16)(nhoff + FCOE_HEADER_LEN); /* fall through */ default: return false; @@ -296,14 +319,24 @@ ipv6: break; } - flow->n_proto = proto; - flow->ip_proto = ip_proto; - flow->thoff = (u16) nhoff; - - /* unless skb is set we don't need to record port info */ - if (skb) - flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, - data, hlen); + /* It is ensured by skb_flow_dissector_init() that basic key will + * be always present. + */ + key_basic = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_BASIC, + target_container); + key_basic->n_proto = proto; + key_basic->ip_proto = ip_proto; + key_basic->thoff = (u16) nhoff; + + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS)) { + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS, + target_container); + key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, + data, hlen); + } return true; } @@ -325,16 +358,16 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) u32 hash; /* get a consistent hash (same value on both flow directions) */ - if (((__force u32)keys->dst < (__force u32)keys->src) || - (((__force u32)keys->dst == (__force u32)keys->src) && - ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) { - swap(keys->dst, keys->src); - swap(keys->port16[0], keys->port16[1]); + if (((__force u32)keys->addrs.dst < (__force u32)keys->addrs.src) || + (((__force u32)keys->addrs.dst == (__force u32)keys->addrs.src) && + ((__force u16)keys->ports.port16[1] < (__force u16)keys->ports.port16[0]))) { + swap(keys->addrs.dst, keys->addrs.src); + swap(keys->ports.port16[0], keys->ports.port16[1]); } - hash = __flow_hash_3words((__force u32)keys->dst, - (__force u32)keys->src, - (__force u32)keys->ports, + hash = __flow_hash_3words((__force u32)keys->addrs.dst, + (__force u32)keys->addrs.src, + (__force u32)keys->ports.ports, keyval); if (!hash) hash = 1; @@ -352,7 +385,7 @@ EXPORT_SYMBOL(flow_hash_from_keys); static inline u32 ___skb_get_hash(const struct sk_buff *skb, struct flow_keys *keys, u32 keyval) { - if (!skb_flow_dissect(skb, keys)) + if (!skb_flow_dissect_flow_keys(skb, keys)) return 0; return __flow_hash_from_keys(keys, keyval); @@ -377,11 +410,11 @@ void make_flow_keys_digest(struct flow_keys_digest *digest, memset(digest, 0, sizeof(*digest)); - data->n_proto = flow->n_proto; - data->ip_proto = flow->ip_proto; - data->ports = flow->ports; - data->src = flow->src; - data->dst = flow->dst; + data->n_proto = flow->basic.n_proto; + data->ip_proto = flow->basic.ip_proto; + data->ports = flow->ports.ports; + data->src = flow->addrs.src; + data->dst = flow->addrs.dst; } EXPORT_SYMBOL(make_flow_keys_digest); @@ -404,7 +437,7 @@ void __skb_get_hash(struct sk_buff *skb) hash = ___skb_get_hash(skb, &keys, hashrnd); if (!hash) return; - if (keys.ports) + if (keys.ports.ports) skb->l4_hash = 1; skb->sw_hash = 1; skb->hash = hash; @@ -422,9 +455,9 @@ EXPORT_SYMBOL(skb_get_hash_perturb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen) { - u32 poff = keys->thoff; + u32 poff = keys->basic.thoff; - switch (keys->ip_proto) { + switch (keys->basic.ip_proto) { case IPPROTO_TCP: { /* access doff as u8 to avoid unaligned access */ const u8 *doff; @@ -478,8 +511,52 @@ u32 skb_get_poff(const struct sk_buff *skb) { struct flow_keys keys; - if (!skb_flow_dissect(skb, &keys)) + if (!skb_flow_dissect_flow_keys(skb, &keys)) return 0; return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); } + +static const struct flow_dissector_key flow_keys_dissector_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct flow_keys, basic), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, + .offset = offsetof(struct flow_keys, addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, + .offset = offsetof(struct flow_keys, addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_PORTS, + .offset = offsetof(struct flow_keys, ports), + }, +}; + +static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct flow_keys, basic), + }, +}; + +struct flow_dissector flow_keys_dissector __read_mostly; +EXPORT_SYMBOL(flow_keys_dissector); + +struct flow_dissector flow_keys_buf_dissector __read_mostly; + +static int __init init_default_flow_dissectors(void) +{ + skb_flow_dissector_init(&flow_keys_dissector, + flow_keys_dissector_keys, + ARRAY_SIZE(flow_keys_dissector_keys)); + skb_flow_dissector_init(&flow_keys_buf_dissector, + flow_keys_buf_dissector_keys, + ARRAY_SIZE(flow_keys_buf_dissector_keys)); + return 0; +} + +late_initcall_sync(init_default_flow_dissectors); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 9332a0ab0698..c3325bd2f3fb 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -131,9 +131,9 @@ u32 eth_get_headlen(void *data, unsigned int len) return len; /* parse any remaining L2/L3 headers, check for L4 */ - if (!__skb_flow_dissect(NULL, &keys, data, - eth->h_proto, sizeof(*eth), len)) - return max_t(u32, keys.thoff, sizeof(*eth)); + if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto, + sizeof(*eth), len)) + return max_t(u32, keys.basic.thoff, sizeof(*eth)); /* parse for any L4 headers */ return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 4c5a92298906..4b3e3e30bf4d 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -68,35 +68,35 @@ static inline u32 addr_fold(void *addr) static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->src) - return ntohl(flow->src); + if (flow->addrs.src) + return ntohl(flow->addrs.src); return addr_fold(skb->sk); } static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->dst) - return ntohl(flow->dst); + if (flow->addrs.dst) + return ntohl(flow->addrs.dst); return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) { - return flow->ip_proto; + return flow->basic.ip_proto; } static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->ports) - return ntohs(flow->port16[0]); + if (flow->ports.ports) + return ntohs(flow->ports.port16[0]); return addr_fold(skb->sk); } static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->ports) - return ntohs(flow->port16[1]); + if (flow->ports.ports) + return ntohs(flow->ports.port16[1]); return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } @@ -295,7 +295,7 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, keymask = f->keymask; if (keymask & FLOW_KEYS_NEEDED) - skb_flow_dissect(skb, &flow_keys); + skb_flow_dissect_flow_keys(skb, &flow_keys); for (n = 0; n < f->nkeys; n++) { key = ffs(keymask) - 1; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 2624e991a2d1..93d5742dc7e0 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -170,13 +170,13 @@ static bool choke_match_flow(struct sk_buff *skb1, if (!choke_skb_cb(skb1)->keys_valid) { choke_skb_cb(skb1)->keys_valid = 1; - skb_flow_dissect(skb1, &temp); + skb_flow_dissect_flow_keys(skb1, &temp); make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp); } if (!choke_skb_cb(skb2)->keys_valid) { choke_skb_cb(skb2)->keys_valid = 1; - skb_flow_dissect(skb2, &temp); + skb_flow_dissect_flow_keys(skb2, &temp); make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp); } -- cgit v1.2.3 From c3f8eaeb6ea501bd0e2e424f5dfecf952b12a06f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:17 +0200 Subject: flow_dissector: add missing header includes Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 0c8d406fb730..1add8918a155 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -1,6 +1,9 @@ #ifndef _NET_FLOW_DISSECTOR_H #define _NET_FLOW_DISSECTOR_H +#include +#include + /** * struct flow_dissector_key_basic: * @thoff: Transport header offset -- cgit v1.2.3 From b924933cbbfbdcaa2831a39780c116ec6e48c397 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:18 +0200 Subject: flow_dissector: introduce support for ipv6 addressses So far, only hashes made out of ipv6 addresses could be dissected. This patch introduces support for dissection of full ipv6 addresses. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 13 +++++++++++++ net/core/flow_dissector.c | 29 +++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 1add8918a155..586b12306a52 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -3,6 +3,7 @@ #include #include +#include /** * struct flow_dissector_key_basic: @@ -42,11 +43,23 @@ struct flow_dissector_key_ports { }; }; +/** + * struct flow_dissector_key_ipv6_addrs: + * @src: source ip address + * @dst: destination ip address + */ +struct flow_dissector_key_ipv6_addrs { + /* (src,dst) must be grouped, in the same way than in IP header */ + struct in6_addr src; + struct in6_addr dst; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ + FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 6a49acaa6651..1b95d5ccc9d6 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -175,16 +175,29 @@ ipv6: ip_proto = iph->nexthdr; nhoff += sizeof(struct ipv6hdr); - if (!skb_flow_dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) - break; - key_addrs = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, - target_container); + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) { + key_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, + target_container); - key_addrs->src = (__force __be32)ipv6_addr_hash(&iph->saddr); - key_addrs->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); + key_addrs->src = (__force __be32)ipv6_addr_hash(&iph->saddr); + key_addrs->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); + goto flow_label; + } + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { + struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs; + key_ipv6_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + target_container); + + memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs)); + goto flow_label; + } + break; +flow_label: flow_label = ip6_flowlabel(iph); if (flow_label) { /* Awesome, IPv6 packet has a flow label so we can -- cgit v1.2.3 From 67a900cc0436d74e7ff89042371760def087680d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:19 +0200 Subject: flow_dissector: introduce support for Ethernet addresses Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 13 +++++++++++++ net/core/flow_dissector.c | 12 ++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 586b12306a52..5eac9870689c 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -4,6 +4,7 @@ #include #include #include +#include /** * struct flow_dissector_key_basic: @@ -54,12 +55,24 @@ struct flow_dissector_key_ipv6_addrs { struct in6_addr dst; }; +/** + * struct flow_dissector_key_eth_addrs: + * @src: source Ethernet address + * @dst: destination Ethernet address + */ +struct flow_dissector_key_eth_addrs { + /* (dst,src) must be grouped, in the same way than in ETH header */ + unsigned char dst[ETH_ALEN]; + unsigned char src[ETH_ALEN]; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ + FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1b95d5ccc9d6..7a0b391114a5 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -138,6 +139,17 @@ bool __skb_flow_dissect(const struct sk_buff *skb, FLOW_DISSECTOR_KEY_BASIC, target_container); + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct ethhdr *eth = eth_hdr(skb); + struct flow_dissector_key_eth_addrs *key_eth_addrs; + + key_eth_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + target_container); + memcpy(key_eth_addrs, ð->h_dest, sizeof(*key_eth_addrs)); + } + again: switch (proto) { case htons(ETH_P_IP): { -- cgit v1.2.3 From 59346afe7a5548ab3e9730aeff33993faa76abbe Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:20 +0200 Subject: flow_dissector: change port array into src, dst tuple Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_clsf.c | 4 ++-- drivers/net/ethernet/cisco/enic/enic_ethtool.c | 4 ++-- include/net/flow_dissector.h | 9 ++++++--- include/net/ip.h | 4 ++-- include/net/ipv6.h | 4 ++-- net/core/flow_dissector.c | 4 ++-- net/sched/cls_flow.c | 4 ++-- 7 files changed, 18 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c index d3d25c7e5b96..6739ebc08c47 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.c +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c @@ -35,8 +35,8 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq) data.type = FILTER_IPV4_5TUPLE; data.u.ipv4.src_addr = ntohl(keys->addrs.src); data.u.ipv4.dst_addr = ntohl(keys->addrs.dst); - data.u.ipv4.src_port = ntohs(keys->ports.port16[0]); - data.u.ipv4.dst_port = ntohs(keys->ports.port16[1]); + data.u.ipv4.src_port = ntohs(keys->ports.src); + data.u.ipv4.dst_port = ntohs(keys->ports.dst); data.u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE; spin_lock_bh(&enic->devcmd_lock); diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index 7588f8dcc890..117c0968dd0b 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -352,10 +352,10 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd) fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.addrs.dst; fsp->m_u.tcp_ip4_spec.ip4dst = (__u32)~0; - fsp->h_u.tcp_ip4_spec.psrc = n->keys.ports.port16[0]; + fsp->h_u.tcp_ip4_spec.psrc = n->keys.ports.src; fsp->m_u.tcp_ip4_spec.psrc = (__u16)~0; - fsp->h_u.tcp_ip4_spec.pdst = n->keys.ports.port16[1]; + fsp->h_u.tcp_ip4_spec.pdst = n->keys.ports.dst; fsp->m_u.tcp_ip4_spec.pdst = (__u16)~0; fsp->ring_cookie = n->rq_id; diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 5eac9870689c..bac9c1421f58 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -34,13 +34,16 @@ struct flow_dissector_key_addrs { /** * flow_dissector_key_tp_ports: * @ports: port numbers of Transport header - * port16[0]: src port number - * port16[1]: dst port number + * src: source port number + * dst: destination port number */ struct flow_dissector_key_ports { union { __be32 ports; - __be16 port16[2]; + struct { + __be16 src; + __be16 dst; + }; }; }; diff --git a/include/net/ip.h b/include/net/ip.h index b0443d4fe13f..0ed6d768e606 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -362,8 +362,8 @@ static inline void inet_set_txhash(struct sock *sk) keys.addrs.src = inet->inet_saddr; keys.addrs.dst = inet->inet_daddr; - keys.ports.port16[0] = inet->inet_sport; - keys.ports.port16[1] = inet->inet_dport; + keys.ports.src = inet->inet_sport; + keys.ports.dst = inet->inet_dport; sk->sk_txhash = flow_hash_from_keys(&keys); } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 9eed9761dfce..aab8190d16e8 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -700,8 +700,8 @@ static inline void ip6_set_txhash(struct sock *sk) keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr); keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); - keys.ports.port16[0] = inet->inet_sport; - keys.ports.port16[1] = inet->inet_dport; + keys.ports.src = inet->inet_sport; + keys.ports.dst = inet->inet_dport; sk->sk_txhash = flow_hash_from_keys(&keys); } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 7a0b391114a5..204d09c42510 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -385,9 +385,9 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) /* get a consistent hash (same value on both flow directions) */ if (((__force u32)keys->addrs.dst < (__force u32)keys->addrs.src) || (((__force u32)keys->addrs.dst == (__force u32)keys->addrs.src) && - ((__force u16)keys->ports.port16[1] < (__force u16)keys->ports.port16[0]))) { + ((__force u16)keys->ports.dst < (__force u16)keys->ports.src))) { swap(keys->addrs.dst, keys->addrs.src); - swap(keys->ports.port16[0], keys->ports.port16[1]); + swap(keys->ports.src, keys->ports.dst); } hash = __flow_hash_3words((__force u32)keys->addrs.dst, diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 4b3e3e30bf4d..b4359924846c 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -88,7 +88,7 @@ static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flo static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) { if (flow->ports.ports) - return ntohs(flow->ports.port16[0]); + return ntohs(flow->ports.src); return addr_fold(skb->sk); } @@ -96,7 +96,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) { if (flow->ports.ports) - return ntohs(flow->ports.port16[1]); + return ntohs(flow->ports.dst); return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } -- cgit v1.2.3 From 77b9900ef53ae047e36a37d13a2aa33bb2d60641 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:21 +0200 Subject: tc: introduce Flower classifier This patch introduces a flow-based filter. So far, the very essential packet fields are supported. This patch is only the first step. There is a lot of potential performance improvements possible to implement. Also a lot of features are missing now. They will be addressed in follow-up patches. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 30 ++ net/sched/Kconfig | 10 + net/sched/Makefile | 1 + net/sched/cls_flower.c | 688 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 729 insertions(+) create mode 100644 net/sched/cls_flower.c (limited to 'include') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index ffc112c8e1c2..39fb53d67b11 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -409,6 +409,36 @@ enum { #define TCA_BPF_MAX (__TCA_BPF_MAX - 1) +/* Flower classifier */ + +enum { + TCA_FLOWER_UNSPEC, + TCA_FLOWER_CLASSID, + TCA_FLOWER_INDEV, + TCA_FLOWER_ACT, + TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_TYPE, /* be16 */ + TCA_FLOWER_KEY_IP_PROTO, /* u8 */ + TCA_FLOWER_KEY_IPV4_SRC, /* be32 */ + TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */ + TCA_FLOWER_KEY_IPV4_DST, /* be32 */ + TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */ + TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */ + TCA_FLOWER_KEY_TCP_SRC, /* be16 */ + TCA_FLOWER_KEY_TCP_DST, /* be16 */ + TCA_FLOWER_KEY_UDP_SRC, /* be16 */ + TCA_FLOWER_KEY_UDP_DST, /* be16 */ + __TCA_FLOWER_MAX, +}; + +#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1) + /* Extended Matches */ struct tcf_ematch_tree_hdr { diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 2274e723a3df..5fd1c2f487d2 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -477,6 +477,16 @@ config NET_CLS_BPF To compile this code as a module, choose M here: the module will be called cls_bpf. +config NET_CLS_FLOWER + tristate "Flower classifier" + select NET_CLS + ---help--- + If you say Y here, you will be able to classify packets based on + a configurable combination of packet keys and masks. + + To compile this code as a module, choose M here: the module will + be called cls_flower. + config NET_EMATCH bool "Extended Matches" select NET_CLS diff --git a/net/sched/Makefile b/net/sched/Makefile index 7ca7f4c1b8c2..690c1689e090 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o obj-$(CONFIG_NET_CLS_BPF) += cls_bpf.o +obj-$(CONFIG_NET_CLS_FLOWER) += cls_flower.o obj-$(CONFIG_NET_EMATCH) += ematch.o obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c new file mode 100644 index 000000000000..9bc654c764cd --- /dev/null +++ b/net/sched/cls_flower.c @@ -0,0 +1,688 @@ +/* + * net/sched/cls_flower.c Flower classifier + * + * Copyright (c) 2015 Jiri Pirko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +struct fl_flow_key { + int indev_ifindex; + struct flow_dissector_key_basic basic; + struct flow_dissector_key_eth_addrs eth; + union { + struct flow_dissector_key_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; + }; + struct flow_dissector_key_ports tp; +} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ + +struct fl_flow_mask_range { + unsigned short int start; + unsigned short int end; +}; + +struct fl_flow_mask { + struct fl_flow_key key; + struct fl_flow_mask_range range; + struct rcu_head rcu; +}; + +struct cls_fl_head { + struct rhashtable ht; + struct fl_flow_mask mask; + struct flow_dissector dissector; + u32 hgen; + bool mask_assigned; + struct list_head filters; + struct rhashtable_params ht_params; + struct rcu_head rcu; +}; + +struct cls_fl_filter { + struct rhash_head ht_node; + struct fl_flow_key mkey; + struct tcf_exts exts; + struct tcf_result res; + struct fl_flow_key key; + struct list_head list; + u32 handle; + struct rcu_head rcu; +}; + +static unsigned short int fl_mask_range(const struct fl_flow_mask *mask) +{ + return mask->range.end - mask->range.start; +} + +static void fl_mask_update_range(struct fl_flow_mask *mask) +{ + const u8 *bytes = (const u8 *) &mask->key; + size_t size = sizeof(mask->key); + size_t i, first = 0, last = size - 1; + + for (i = 0; i < sizeof(mask->key); i++) { + if (bytes[i]) { + if (!first && i) + first = i; + last = i; + } + } + mask->range.start = rounddown(first, sizeof(long)); + mask->range.end = roundup(last + 1, sizeof(long)); +} + +static void *fl_key_get_start(struct fl_flow_key *key, + const struct fl_flow_mask *mask) +{ + return (u8 *) key + mask->range.start; +} + +static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key, + struct fl_flow_mask *mask) +{ + const long *lkey = fl_key_get_start(key, mask); + const long *lmask = fl_key_get_start(&mask->key, mask); + long *lmkey = fl_key_get_start(mkey, mask); + int i; + + for (i = 0; i < fl_mask_range(mask); i += sizeof(long)) + *lmkey++ = *lkey++ & *lmask++; +} + +static void fl_clear_masked_range(struct fl_flow_key *key, + struct fl_flow_mask *mask) +{ + memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask)); +} + +static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res) +{ + struct cls_fl_head *head = rcu_dereference_bh(tp->root); + struct cls_fl_filter *f; + struct fl_flow_key skb_key; + struct fl_flow_key skb_mkey; + + fl_clear_masked_range(&skb_key, &head->mask); + skb_key.indev_ifindex = skb->skb_iif; + /* skb_flow_dissect() does not set n_proto in case an unknown protocol, + * so do it rather here. + */ + skb_key.basic.n_proto = skb->protocol; + skb_flow_dissect(skb, &head->dissector, &skb_key); + + fl_set_masked_key(&skb_mkey, &skb_key, &head->mask); + + f = rhashtable_lookup_fast(&head->ht, + fl_key_get_start(&skb_mkey, &head->mask), + head->ht_params); + if (f) { + *res = f->res; + return tcf_exts_exec(skb, &f->exts, res); + } + return -1; +} + +static int fl_init(struct tcf_proto *tp) +{ + struct cls_fl_head *head; + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (!head) + return -ENOBUFS; + + INIT_LIST_HEAD_RCU(&head->filters); + rcu_assign_pointer(tp->root, head); + + return 0; +} + +static void fl_destroy_filter(struct rcu_head *head) +{ + struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu); + + tcf_exts_destroy(&f->exts); + kfree(f); +} + +static bool fl_destroy(struct tcf_proto *tp, bool force) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *f, *next; + + if (!force && !list_empty(&head->filters)) + return false; + + list_for_each_entry_safe(f, next, &head->filters, list) { + list_del_rcu(&f->list); + call_rcu(&f->rcu, fl_destroy_filter); + } + RCU_INIT_POINTER(tp->root, NULL); + if (head->mask_assigned) + rhashtable_destroy(&head->ht); + kfree_rcu(head, rcu); + return true; +} + +static unsigned long fl_get(struct tcf_proto *tp, u32 handle) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *f; + + list_for_each_entry(f, &head->filters, list) + if (f->handle == handle) + return (unsigned long) f; + return 0; +} + +static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { + [TCA_FLOWER_UNSPEC] = { .type = NLA_UNSPEC }, + [TCA_FLOWER_CLASSID] = { .type = NLA_U32 }, + [TCA_FLOWER_INDEV] = { .type = NLA_STRING, + .len = IFNAMSIZ }, + [TCA_FLOWER_KEY_ETH_DST] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_ETH_DST_MASK] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_ETH_SRC] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_ETH_SRC_MASK] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_IP_PROTO] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_IPV4_SRC] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_IPV4_DST] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 }, +}; + +static void fl_set_key_val(struct nlattr **tb, + void *val, int val_type, + void *mask, int mask_type, int len) +{ + if (!tb[val_type]) + return; + memcpy(val, nla_data(tb[val_type]), len); + if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type]) + memset(mask, 0xff, len); + else + memcpy(mask, nla_data(tb[mask_type]), len); +} + +static int fl_set_key(struct net *net, struct nlattr **tb, + struct fl_flow_key *key, struct fl_flow_key *mask) +{ + int err; + + if (tb[TCA_FLOWER_INDEV]) { + err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]); + if (err < 0) + return err; + key->indev_ifindex = err; + mask->indev_ifindex = 0xffffffff; + } + + fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST, + mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK, + sizeof(key->eth.dst)); + fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC, + mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK, + sizeof(key->eth.src)); + fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE, + &mask->basic.n_proto, TCA_FLOWER_UNSPEC, + sizeof(key->basic.n_proto)); + if (key->basic.n_proto == htons(ETH_P_IP) || + key->basic.n_proto == htons(ETH_P_IPV6)) { + fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, + &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, + sizeof(key->basic.ip_proto)); + } + if (key->basic.n_proto == htons(ETH_P_IP)) { + fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC, + &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK, + sizeof(key->ipv4.src)); + fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST, + &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK, + sizeof(key->ipv4.dst)); + } else if (key->basic.n_proto == htons(ETH_P_IPV6)) { + fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC, + &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK, + sizeof(key->ipv6.src)); + fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST, + &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK, + sizeof(key->ipv6.dst)); + } + if (key->basic.ip_proto == IPPROTO_TCP) { + fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, + &mask->tp.src, TCA_FLOWER_UNSPEC, + sizeof(key->tp.src)); + fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, + &mask->tp.dst, TCA_FLOWER_UNSPEC, + sizeof(key->tp.dst)); + } else if (key->basic.ip_proto == IPPROTO_UDP) { + fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, + &mask->tp.src, TCA_FLOWER_UNSPEC, + sizeof(key->tp.src)); + fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, + &mask->tp.dst, TCA_FLOWER_UNSPEC, + sizeof(key->tp.dst)); + } + + return 0; +} + +static bool fl_mask_eq(struct fl_flow_mask *mask1, + struct fl_flow_mask *mask2) +{ + const long *lmask1 = fl_key_get_start(&mask1->key, mask1); + const long *lmask2 = fl_key_get_start(&mask2->key, mask2); + + return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) && + !memcmp(lmask1, lmask2, fl_mask_range(mask1)); +} + +static const struct rhashtable_params fl_ht_params = { + .key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */ + .head_offset = offsetof(struct cls_fl_filter, ht_node), + .automatic_shrinking = true, +}; + +static int fl_init_hashtable(struct cls_fl_head *head, + struct fl_flow_mask *mask) +{ + head->ht_params = fl_ht_params; + head->ht_params.key_len = fl_mask_range(mask); + head->ht_params.key_offset += mask->range.start; + + return rhashtable_init(&head->ht, &head->ht_params); +} + +#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member) +#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member)) +#define FL_KEY_MEMBER_END_OFFSET(member) \ + (FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member)) + +#define FL_KEY_IN_RANGE(mask, member) \ + (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end && \ + FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start) + +#define FL_KEY_SET(keys, cnt, id, member) \ + do { \ + keys[cnt].key_id = id; \ + keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member); \ + cnt++; \ + } while(0); + +#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member) \ + do { \ + if (FL_KEY_IN_RANGE(mask, member)) \ + FL_KEY_SET(keys, cnt, id, member); \ + } while(0); + +static void fl_init_dissector(struct cls_fl_head *head, + struct fl_flow_mask *mask) +{ + struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX]; + size_t cnt = 0; + + FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic); + FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, + FLOW_DISSECTOR_KEY_ETH_ADDRS, eth); + FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); + FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); + FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, + FLOW_DISSECTOR_KEY_PORTS, tp); + + skb_flow_dissector_init(&head->dissector, keys, cnt); +} + +static int fl_check_assign_mask(struct cls_fl_head *head, + struct fl_flow_mask *mask) +{ + int err; + + if (head->mask_assigned) { + if (!fl_mask_eq(&head->mask, mask)) + return -EINVAL; + else + return 0; + } + + /* Mask is not assigned yet. So assign it and init hashtable + * according to that. + */ + err = fl_init_hashtable(head, mask); + if (err) + return err; + memcpy(&head->mask, mask, sizeof(head->mask)); + head->mask_assigned = true; + + fl_init_dissector(head, mask); + + return 0; +} + +static int fl_set_parms(struct net *net, struct tcf_proto *tp, + struct cls_fl_filter *f, struct fl_flow_mask *mask, + unsigned long base, struct nlattr **tb, + struct nlattr *est, bool ovr) +{ + struct tcf_exts e; + int err; + + tcf_exts_init(&e, TCA_FLOWER_ACT, 0); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + return err; + + if (tb[TCA_FLOWER_CLASSID]) { + f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]); + tcf_bind_filter(tp, &f->res, base); + } + + err = fl_set_key(net, tb, &f->key, &mask->key); + if (err) + goto errout; + + fl_mask_update_range(mask); + fl_set_masked_key(&f->mkey, &f->key, mask); + + tcf_exts_change(tp, &f->exts, &e); + + return 0; +errout: + tcf_exts_destroy(&e); + return err; +} + +static u32 fl_grab_new_handle(struct tcf_proto *tp, + struct cls_fl_head *head) +{ + unsigned int i = 0x80000000; + u32 handle; + + do { + if (++head->hgen == 0x7FFFFFFF) + head->hgen = 1; + } while (--i > 0 && fl_get(tp, head->hgen)); + + if (unlikely(i == 0)) { + pr_err("Insufficient number of handles\n"); + handle = 0; + } else { + handle = head->hgen; + } + + return handle; +} + +static int fl_change(struct net *net, struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, + u32 handle, struct nlattr **tca, + unsigned long *arg, bool ovr) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg; + struct cls_fl_filter *fnew; + struct nlattr *tb[TCA_FLOWER_MAX + 1]; + struct fl_flow_mask mask = {}; + int err; + + if (!tca[TCA_OPTIONS]) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy); + if (err < 0) + return err; + + if (fold && handle && fold->handle != handle) + return -EINVAL; + + fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); + if (!fnew) + return -ENOBUFS; + + tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0); + + if (!handle) { + handle = fl_grab_new_handle(tp, head); + if (!handle) { + err = -EINVAL; + goto errout; + } + } + fnew->handle = handle; + + err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr); + if (err) + goto errout; + + err = fl_check_assign_mask(head, &mask); + if (err) + goto errout; + + err = rhashtable_insert_fast(&head->ht, &fnew->ht_node, + head->ht_params); + if (err) + goto errout; + if (fold) + rhashtable_remove_fast(&head->ht, &fold->ht_node, + head->ht_params); + + *arg = (unsigned long) fnew; + + if (fold) { + list_replace_rcu(&fnew->list, &fold->list); + tcf_unbind_filter(tp, &fold->res); + call_rcu(&fold->rcu, fl_destroy_filter); + } else { + list_add_tail_rcu(&fnew->list, &head->filters); + } + + return 0; + +errout: + kfree(fnew); + return err; +} + +static int fl_delete(struct tcf_proto *tp, unsigned long arg) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *f = (struct cls_fl_filter *) arg; + + rhashtable_remove_fast(&head->ht, &f->ht_node, + head->ht_params); + list_del_rcu(&f->list); + tcf_unbind_filter(tp, &f->res); + call_rcu(&f->rcu, fl_destroy_filter); + return 0; +} + +static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *f; + + list_for_each_entry_rcu(f, &head->filters, list) { + if (arg->count < arg->skip) + goto skip; + if (arg->fn(tp, (unsigned long) f, arg) < 0) { + arg->stop = 1; + break; + } +skip: + arg->count++; + } +} + +static int fl_dump_key_val(struct sk_buff *skb, + void *val, int val_type, + void *mask, int mask_type, int len) +{ + int err; + + if (!memchr_inv(mask, 0, len)) + return 0; + err = nla_put(skb, val_type, len, val); + if (err) + return err; + if (mask_type != TCA_FLOWER_UNSPEC) { + err = nla_put(skb, mask_type, len, mask); + if (err) + return err; + } + return 0; +} + +static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, + struct sk_buff *skb, struct tcmsg *t) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *f = (struct cls_fl_filter *) fh; + struct nlattr *nest; + struct fl_flow_key *key, *mask; + + if (!f) + return skb->len; + + t->tcm_handle = f->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (!nest) + goto nla_put_failure; + + if (f->res.classid && + nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid)) + goto nla_put_failure; + + key = &f->key; + mask = &head->mask.key; + + if (mask->indev_ifindex) { + struct net_device *dev; + + dev = __dev_get_by_index(net, key->indev_ifindex); + if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name)) + goto nla_put_failure; + } + + if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST, + mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK, + sizeof(key->eth.dst)) || + fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC, + mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK, + sizeof(key->eth.src)) || + fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE, + &mask->basic.n_proto, TCA_FLOWER_UNSPEC, + sizeof(key->basic.n_proto))) + goto nla_put_failure; + if ((key->basic.n_proto == htons(ETH_P_IP) || + key->basic.n_proto == htons(ETH_P_IPV6)) && + fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, + &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, + sizeof(key->basic.ip_proto))) + goto nla_put_failure; + + if (key->basic.n_proto == htons(ETH_P_IP) && + (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC, + &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK, + sizeof(key->ipv4.src)) || + fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST, + &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK, + sizeof(key->ipv4.dst)))) + goto nla_put_failure; + else if (key->basic.n_proto == htons(ETH_P_IPV6) && + (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC, + &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK, + sizeof(key->ipv6.src)) || + fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST, + &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK, + sizeof(key->ipv6.dst)))) + goto nla_put_failure; + + if (key->basic.ip_proto == IPPROTO_TCP && + (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, + &mask->tp.src, TCA_FLOWER_UNSPEC, + sizeof(key->tp.src)) || + fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, + &mask->tp.dst, TCA_FLOWER_UNSPEC, + sizeof(key->tp.dst)))) + goto nla_put_failure; + else if (key->basic.ip_proto == IPPROTO_UDP && + (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, + &mask->tp.src, TCA_FLOWER_UNSPEC, + sizeof(key->tp.src)) || + fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, + &mask->tp.dst, TCA_FLOWER_UNSPEC, + sizeof(key->tp.dst)))) + goto nla_put_failure; + + if (tcf_exts_dump(skb, &f->exts)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + + if (tcf_exts_dump_stats(skb, &f->exts) < 0) + goto nla_put_failure; + + return skb->len; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -1; +} + +static struct tcf_proto_ops cls_fl_ops __read_mostly = { + .kind = "flower", + .classify = fl_classify, + .init = fl_init, + .destroy = fl_destroy, + .get = fl_get, + .change = fl_change, + .delete = fl_delete, + .walk = fl_walk, + .dump = fl_dump, + .owner = THIS_MODULE, +}; + +static int __init cls_fl_init(void) +{ + return register_tcf_proto_ops(&cls_fl_ops); +} + +static void __exit cls_fl_exit(void) +{ + unregister_tcf_proto_ops(&cls_fl_ops); +} + +module_init(cls_fl_init); +module_exit(cls_fl_exit); + +MODULE_AUTHOR("Jiri Pirko "); +MODULE_DESCRIPTION("Flower classifier"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From a9b6391814d5d6b8668fca2dace86949b7244e2e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 12 May 2015 11:56:50 -0400 Subject: packet: rollover statistics Rollover indicates exceptional conditions. Export a counter to inform socket owners of this state. If no socket with sufficient room is found, rollover fails. Also count these events. Finally, also count when flows are rolled over early thanks to huge flow detection, to validate its correctness. Tested: Read counters in bench_rollover on all other tests in the patchset Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/if_packet.h | 7 +++++++ net/packet/af_packet.c | 19 ++++++++++++++++++- net/packet/internal.h | 3 +++ 3 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 053bd102fbe0..d3d715f8c88f 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -54,6 +54,7 @@ struct sockaddr_ll { #define PACKET_FANOUT 18 #define PACKET_TX_HAS_OFF 19 #define PACKET_QDISC_BYPASS 20 +#define PACKET_ROLLOVER_STATS 21 #define PACKET_FANOUT_HASH 0 #define PACKET_FANOUT_LB 1 @@ -75,6 +76,12 @@ struct tpacket_stats_v3 { unsigned int tp_freeze_q_cnt; }; +struct tpacket_rollover_stats { + __aligned_u64 tp_all; + __aligned_u64 tp_huge; + __aligned_u64 tp_failed; +}; + union tpacket_stats_u { struct tpacket_stats stats1; struct tpacket_stats_v3 stats3; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8f0156b10f8d..31d58565726c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1395,7 +1395,7 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f, unsigned int num) { struct packet_sock *po, *po_next; - unsigned int i, j, room; + unsigned int i, j, room = ROOM_NONE; po = pkt_sk(f->arr[idx]); @@ -1413,6 +1413,9 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f, packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) { if (i != j) po->rollover->sock = i; + atomic_long_inc(&po->rollover->num); + if (room == ROOM_LOW) + atomic_long_inc(&po->rollover->num_huge); return i; } @@ -1420,6 +1423,7 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f, i = 0; } while (i != j); + atomic_long_inc(&po->rollover->num_failed); return idx; } @@ -1554,6 +1558,9 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL); if (!po->rollover) return -ENOMEM; + atomic_long_set(&po->rollover->num, 0); + atomic_long_set(&po->rollover->num_huge, 0); + atomic_long_set(&po->rollover->num_failed, 0); } mutex_lock(&fanout_mutex); @@ -3584,6 +3591,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, struct packet_sock *po = pkt_sk(sk); void *data = &val; union tpacket_stats_u st; + struct tpacket_rollover_stats rstats; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -3659,6 +3667,15 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, ((u32)po->fanout->flags << 24)) : 0); break; + case PACKET_ROLLOVER_STATS: + if (!po->rollover) + return -EINVAL; + rstats.tp_all = atomic_long_read(&po->rollover->num); + rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); + rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); + data = &rstats; + lv = sizeof(rstats); + break; case PACKET_TX_HAS_OFF: val = po->tp_tx_has_off; break; diff --git a/net/packet/internal.h b/net/packet/internal.h index a9d30a17c714..c035d263c1e8 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -89,6 +89,9 @@ struct packet_fanout { struct packet_rollover { int sock; + atomic_long_t num; + atomic_long_t num_huge; + atomic_long_t num_failed; #define ROLLOVER_HLEN (L1_CACHE_BYTES / sizeof(u32)) u32 history[ROLLOVER_HLEN] ____cacheline_aligned; } ____cacheline_aligned_in_smp; -- cgit v1.2.3 From f0b5e8a42f37a880b8467e59dc814f4f21581d3d Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Tue, 12 May 2015 20:28:07 +0200 Subject: net: kill useless net_*_ingress_queue() definitions when NET_CLS_ACT is unset This fixes 4577139b2dabf589 ("net: use jump label patching for ingress qdisc in __netif_receive_skb_core"). The only client of this is sch_ingress and it depends on NET_CLS_ACT. So there is no way these definition can be of any help. Cc: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 7b8e260c4a27..bd29ab4b0941 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -82,14 +82,6 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); #ifdef CONFIG_NET_CLS_ACT void net_inc_ingress_queue(void); void net_dec_ingress_queue(void); -#else -static inline void net_inc_ingress_queue(void) -{ -} - -static inline void net_dec_ingress_queue(void) -{ -} #endif extern void rtnetlink_init(void); -- cgit v1.2.3 From 35d32e8fe4ab44180e46a0dd54abea6985398d00 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 13 May 2015 12:57:27 -0400 Subject: geneve: move definition of geneve_hdr() to geneve.h This is a static inline with identical definitions in multiple places... Signed-off-by: John W. Linville Signed-off-by: David S. Miller --- include/net/geneve.h | 5 +++++ net/ipv4/geneve.c | 5 ----- net/openvswitch/vport-geneve.c | 5 ----- 3 files changed, 5 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/geneve.h b/include/net/geneve.h index 14fb8d3390b4..2a0543a1899d 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -62,6 +62,11 @@ struct genevehdr { struct geneve_opt options[]; }; +static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) +{ + return (struct genevehdr *)(udp_hdr(skb) + 1); +} + #ifdef CONFIG_INET struct geneve_sock; diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 8e6a7fe27a4c..001843d41135 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -60,11 +60,6 @@ struct geneve_net { static int geneve_net_id; -static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) -{ - return (struct genevehdr *)(udp_hdr(skb) + 1); -} - static struct geneve_sock *geneve_find_sock(struct net *net, sa_family_t family, __be16 port) { diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index bf02fd5808c9..208c576bd1b6 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -46,11 +46,6 @@ static inline struct geneve_port *geneve_vport(const struct vport *vport) return vport_priv(vport); } -static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) -{ - return (struct genevehdr *)(udp_hdr(skb) + 1); -} - /* Convert 64 bit tunnel ID to 24 bit VNI. */ static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) { -- cgit v1.2.3 From 2d07dc79fe04a43d82a346ced6bbf07bdb523f1b Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 13 May 2015 12:57:30 -0400 Subject: geneve: add initial netdev driver for GENEVE tunnels This is an initial implementation of a netdev driver for GENEVE tunnels. This implementation uses a fixed UDP port, and only supports point-to-point links with specific partner endpoints. Only IPv4 links are supported at this time. Signed-off-by: John W. Linville Signed-off-by: David S. Miller --- drivers/net/Kconfig | 14 ++ drivers/net/Makefile | 1 + drivers/net/geneve.c | 503 +++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/if_link.h | 9 + 4 files changed, 527 insertions(+) create mode 100644 drivers/net/geneve.c (limited to 'include') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index df51d6025a90..019fceffc9e5 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -179,6 +179,20 @@ config VXLAN To compile this driver as a module, choose M here: the module will be called vxlan. +config GENEVE + tristate "Generic Network Virtualization Encapsulation netdev" + depends on INET && GENEVE_CORE + select NET_IP_TUNNEL + ---help--- + This allows one to create geneve virtual interfaces that provide + Layer 2 Networks over Layer 3 Networks. GENEVE is often used + to tunnel virtual network infrastructure in virtualized environments. + For more information see: + http://tools.ietf.org/html/draft-gross-geneve-02 + + To compile this driver as a module, choose M here: the module + will be called geneve. + config NETCONSOLE tristate "Network console logging support" ---help--- diff --git a/drivers/net/Makefile b/drivers/net/Makefile index e25fdd7d905e..c12cb22478a7 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_TUN) += tun.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan.o +obj-$(CONFIG_GENEVE) += geneve.o obj-$(CONFIG_NLMON) += nlmon.o # diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c new file mode 100644 index 000000000000..b7eafa4c1a67 --- /dev/null +++ b/drivers/net/geneve.c @@ -0,0 +1,503 @@ +/* + * GENEVE: Generic Network Virtualization Encapsulation + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#define GENEVE_NETDEV_VER "0.6" + +#define GENEVE_UDP_PORT 6081 + +#define GENEVE_N_VID (1u << 24) +#define GENEVE_VID_MASK (GENEVE_N_VID - 1) + +#define VNI_HASH_BITS 10 +#define VNI_HASH_SIZE (1<rcv_data; + + /* Find the device for this VNI */ + hash = geneve_net_vni_hash(gnvh->vni); + vni_list_head = &gn->vni_list[hash]; + hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) { + if (!memcmp(gnvh->vni, dummy->vni, sizeof(dummy->vni)) && + iph->saddr == dummy->remote.sin_addr.s_addr) { + geneve = dummy; + break; + } + } + if (!geneve) + goto drop; + + /* Drop packets w/ critical options, + * since we don't support any... + */ + if (gnvh->critical) + goto drop; + + skb_reset_mac_header(skb); + skb_scrub_packet(skb, !net_eq(geneve->net, dev_net(geneve->dev))); + skb->protocol = eth_type_trans(skb, geneve->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + + /* Ignore packet loops (and multicast echo) */ + if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) + goto drop; + + skb_reset_network_header(skb); + + iph = ip_hdr(skb); /* Now inner IP header... */ + err = IP_ECN_decapsulate(iph, skb); + + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &iph->saddr, iph->tos); + if (err > 1) { + ++geneve->dev->stats.rx_frame_errors; + ++geneve->dev->stats.rx_errors; + goto drop; + } + } + + stats = this_cpu_ptr(geneve->dev->tstats); + u64_stats_update_begin(&stats->syncp); + stats->rx_packets++; + stats->rx_bytes += skb->len; + u64_stats_update_end(&stats->syncp); + + netif_rx(skb); + + return; +drop: + /* Consume bad packet */ + kfree_skb(skb); +} + +/* Setup stats when device is created */ +static int geneve_init(struct net_device *dev) +{ + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + return 0; +} + +static void geneve_uninit(struct net_device *dev) +{ + free_percpu(dev->tstats); +} + +static int geneve_open(struct net_device *dev) +{ + struct geneve_dev *geneve = netdev_priv(dev); + struct net *net = geneve->net; + struct geneve_net *gn = net_generic(geneve->net, geneve_net_id); + struct geneve_sock *gs; + + gs = geneve_sock_add(net, htons(GENEVE_UDP_PORT), geneve_rx, gn, + false, false); + if (IS_ERR(gs)) + return PTR_ERR(gs); + + geneve->sock = gs; + + return 0; +} + +static int geneve_stop(struct net_device *dev) +{ + struct geneve_dev *geneve = netdev_priv(dev); + struct geneve_sock *gs = geneve->sock; + + geneve_sock_release(gs); + + return 0; +} + +static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct geneve_dev *geneve = netdev_priv(dev); + struct geneve_sock *gs = geneve->sock; + struct rtable *rt = NULL; + const struct iphdr *iip; /* interior IP header */ + struct flowi4 fl4; + int err; + __be16 sport; + __u8 tos, ttl = 0; + + iip = ip_hdr(skb); + + skb_reset_mac_header(skb); + + /* TODO: port min/max limits should be configurable */ + sport = udp_flow_src_port(dev_net(dev), skb, 0, 0, true); + + memset(&fl4, 0, sizeof(fl4)); + fl4.daddr = geneve->remote.sin_addr.s_addr; + rt = ip_route_output_key(geneve->net, &fl4); + if (IS_ERR(rt)) { + netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); + dev->stats.tx_carrier_errors++; + goto tx_error; + } + if (rt->dst.dev == dev) { /* is this necessary? */ + netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr); + dev->stats.collisions++; + goto rt_tx_error; + } + + /* TODO: tos and ttl should be configurable */ + + tos = ip_tunnel_ecn_encap(0, iip, skb); + + if (IN_MULTICAST(ntohl(fl4.daddr))) + ttl = 1; + + ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); + + /* no need to handle local destination and encap bypass...yet... */ + + err = geneve_xmit_skb(gs, rt, skb, fl4.saddr, fl4.daddr, + tos, ttl, 0, sport, htons(GENEVE_UDP_PORT), 0, + geneve->vni, 0, NULL, false, + !net_eq(geneve->net, dev_net(geneve->dev))); + if (err < 0) + ip_rt_put(rt); + + iptunnel_xmit_stats(err, &dev->stats, dev->tstats); + + return NETDEV_TX_OK; + +rt_tx_error: + ip_rt_put(rt); +tx_error: + dev->stats.tx_errors++; + dev_kfree_skb(skb); + return NETDEV_TX_OK; +} + +static const struct net_device_ops geneve_netdev_ops = { + .ndo_init = geneve_init, + .ndo_uninit = geneve_uninit, + .ndo_open = geneve_open, + .ndo_stop = geneve_stop, + .ndo_start_xmit = geneve_xmit, + .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_change_mtu = eth_change_mtu, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = eth_mac_addr, +}; + +static void geneve_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version)); + strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver)); +} + +static const struct ethtool_ops geneve_ethtool_ops = { + .get_drvinfo = geneve_get_drvinfo, + .get_link = ethtool_op_get_link, +}; + +/* Info for udev, that this is a virtual tunnel endpoint */ +static struct device_type geneve_type = { + .name = "geneve", +}; + +/* Initialize the device structure. */ +static void geneve_setup(struct net_device *dev) +{ + ether_setup(dev); + + dev->netdev_ops = &geneve_netdev_ops; + dev->ethtool_ops = &geneve_ethtool_ops; + dev->destructor = free_netdev; + + SET_NETDEV_DEVTYPE(dev, &geneve_type); + + dev->tx_queue_len = 0; + dev->features |= NETIF_F_LLTX; + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; + dev->features |= NETIF_F_RXCSUM; + dev->features |= NETIF_F_GSO_SOFTWARE; + + dev->vlan_features = dev->features; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; + + dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; + + netif_keep_dst(dev); + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; +} + +static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { + [IFLA_GENEVE_ID] = { .type = NLA_U32 }, + [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, +}; + +static int geneve_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + + if (!data) + return -EINVAL; + + if (data[IFLA_GENEVE_ID]) { + __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); + + if (vni >= GENEVE_VID_MASK) + return -ERANGE; + } + + return 0; +} + +static int geneve_newlink(struct net *net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct geneve_net *gn = net_generic(net, geneve_net_id); + struct geneve_dev *dummy, *geneve = netdev_priv(dev); + struct hlist_head *vni_list_head; + struct sockaddr_in remote; /* IPv4 address for link partner */ + __u32 vni, hash; + int err; + + if (!data[IFLA_GENEVE_ID] || !data[IFLA_GENEVE_REMOTE]) + return -EINVAL; + + geneve->net = net; + geneve->dev = dev; + + vni = nla_get_u32(data[IFLA_GENEVE_ID]); + geneve->vni[0] = (vni & 0x00ff0000) >> 16; + geneve->vni[1] = (vni & 0x0000ff00) >> 8; + geneve->vni[2] = vni & 0x000000ff; + + geneve->remote.sin_addr.s_addr = + nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); + if (IN_MULTICAST(ntohl(geneve->remote.sin_addr.s_addr))) + return -EINVAL; + + remote = geneve->remote; + hash = geneve_net_vni_hash(geneve->vni); + vni_list_head = &gn->vni_list[hash]; + hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) { + if (!memcmp(geneve->vni, dummy->vni, sizeof(dummy->vni)) && + !memcmp(&remote, &dummy->remote, sizeof(dummy->remote))) + return -EBUSY; + } + + if (tb[IFLA_ADDRESS] == NULL) + eth_hw_addr_random(dev); + + err = register_netdevice(dev); + if (err) + return err; + + list_add(&geneve->next, &gn->geneve_list); + + hlist_add_head_rcu(&geneve->hlist, &gn->vni_list[hash]); + + return 0; +} + +static void geneve_dellink(struct net_device *dev, struct list_head *head) +{ + struct geneve_dev *geneve = netdev_priv(dev); + + if (!hlist_unhashed(&geneve->hlist)) + hlist_del_rcu(&geneve->hlist); + + list_del(&geneve->next); + unregister_netdevice_queue(dev, head); +} + +static size_t geneve_get_size(const struct net_device *dev) +{ + return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ + nla_total_size(sizeof(struct in_addr)) + /* IFLA_GENEVE_REMOTE */ + 0; +} + +static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct geneve_dev *geneve = netdev_priv(dev); + __u32 vni; + + vni = (geneve->vni[0] << 16) | (geneve->vni[1] << 8) | geneve->vni[2]; + if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) + goto nla_put_failure; + + if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, + geneve->remote.sin_addr.s_addr)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops geneve_link_ops __read_mostly = { + .kind = "geneve", + .maxtype = IFLA_GENEVE_MAX, + .policy = geneve_policy, + .priv_size = sizeof(struct geneve_dev), + .setup = geneve_setup, + .validate = geneve_validate, + .newlink = geneve_newlink, + .dellink = geneve_dellink, + .get_size = geneve_get_size, + .fill_info = geneve_fill_info, +}; + +static __net_init int geneve_init_net(struct net *net) +{ + struct geneve_net *gn = net_generic(net, geneve_net_id); + unsigned int h; + + INIT_LIST_HEAD(&gn->geneve_list); + + for (h = 0; h < VNI_HASH_SIZE; ++h) + INIT_HLIST_HEAD(&gn->vni_list[h]); + + return 0; +} + +static void __net_exit geneve_exit_net(struct net *net) +{ + struct geneve_net *gn = net_generic(net, geneve_net_id); + struct geneve_dev *geneve, *next; + struct net_device *dev, *aux; + LIST_HEAD(list); + + rtnl_lock(); + + /* gather any geneve devices that were moved into this ns */ + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &geneve_link_ops) + unregister_netdevice_queue(dev, &list); + + /* now gather any other geneve devices that were created in this ns */ + list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) { + /* If geneve->dev is in the same netns, it was already added + * to the list by the previous loop. + */ + if (!net_eq(dev_net(geneve->dev), net)) + unregister_netdevice_queue(geneve->dev, &list); + } + + /* unregister the devices gathered above */ + unregister_netdevice_many(&list); + rtnl_unlock(); +} + +static struct pernet_operations geneve_net_ops = { + .init = geneve_init_net, + .exit = geneve_exit_net, + .id = &geneve_net_id, + .size = sizeof(struct geneve_net), +}; + +static int __init geneve_init_module(void) +{ + int rc; + + rc = register_pernet_subsys(&geneve_net_ops); + if (rc) + goto out1; + + rc = rtnl_link_register(&geneve_link_ops); + if (rc) + goto out2; + + return 0; +out2: + unregister_pernet_subsys(&geneve_net_ops); +out1: + return rc; +} +late_initcall(geneve_init_module); + +static void __exit geneve_cleanup_module(void) +{ + rtnl_link_unregister(&geneve_link_ops); + unregister_pernet_subsys(&geneve_net_ops); +} +module_exit(geneve_cleanup_module); + +MODULE_LICENSE("GPL"); +MODULE_VERSION(GENEVE_NETDEV_VER); +MODULE_AUTHOR("John W. Linville "); +MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic"); +MODULE_ALIAS_RTNL_LINK("geneve"); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6d6e502e1051..afccc9393fef 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -390,6 +390,15 @@ struct ifla_vxlan_port_range { __be16 high; }; +/* GENEVE section */ +enum { + IFLA_GENEVE_UNSPEC, + IFLA_GENEVE_ID, + IFLA_GENEVE_REMOTE, + __IFLA_GENEVE_MAX +}; +#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) + /* Bonding section */ enum { -- cgit v1.2.3 From 87d5c18ce1f41a2410d4fb26d5d68c55867450f5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 13 May 2015 18:19:34 +0200 Subject: netfilter: cleanup struct nf_hook_ops indentation Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netfilter.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 63560d0a8dfe..83be4a3cec98 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -79,16 +79,16 @@ typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, const struct nf_hook_state *state); struct nf_hook_ops { - struct list_head list; + struct list_head list; /* User fills in from here down. */ - nf_hookfn *hook; - struct module *owner; - void *priv; - u_int8_t pf; - unsigned int hooknum; + nf_hookfn *hook; + struct module *owner; + void *priv; + u_int8_t pf; + unsigned int hooknum; /* Hooks are ordered in ascending priority. */ - int priority; + int priority; }; struct nf_sockopt_ops { -- cgit v1.2.3 From f7191483461ce2ae579b6f7227fa7ce49e006656 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 13 May 2015 18:19:35 +0200 Subject: netfilter: add hook list to nf_hook_state Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netfilter.h | 7 +++++-- net/netfilter/core.c | 6 ++---- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 83be4a3cec98..388ed1952242 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -54,10 +54,12 @@ struct nf_hook_state { struct net_device *in; struct net_device *out; struct sock *sk; + struct list_head *hook_list; int (*okfn)(struct sock *, struct sk_buff *); }; static inline void nf_hook_state_init(struct nf_hook_state *p, + struct list_head *hook_list, unsigned int hook, int thresh, u_int8_t pf, struct net_device *indev, @@ -71,6 +73,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->in = indev; p->out = outdev; p->sk = sk; + p->hook_list = hook_list; p->okfn = okfn; } @@ -166,8 +169,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, if (nf_hooks_active(pf, hook)) { struct nf_hook_state state; - nf_hook_state_init(&state, hook, thresh, pf, - indev, outdev, sk, okfn); + nf_hook_state_init(&state, &nf_hooks[pf][hook], hook, thresh, + pf, indev, outdev, sk, okfn); return nf_hook_slow(skb, &state); } return 1; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index e6163017c42d..e418cfd603c0 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -166,11 +166,9 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); - elem = list_entry_rcu(&nf_hooks[state->pf][state->hook], - struct nf_hook_ops, list); + elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list); next_hook: - verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state, - &elem); + verdict = nf_iterate(state->hook_list, skb, state, &elem); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { -- cgit v1.2.3 From b8d0aad0c77f488d1d51a02d871a5cbc2d8032b9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 13 May 2015 18:19:36 +0200 Subject: netfilter: add nf_hook_list_active() In preparation to have netfilter ingress per-device hook list. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netfilter.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 388ed1952242..49d00638d1fa 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -134,26 +134,33 @@ extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #ifdef HAVE_JUMP_LABEL extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; -static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) +static inline bool nf_hook_list_active(struct list_head *nf_hook_list, + u_int8_t pf, unsigned int hook) { if (__builtin_constant_p(pf) && __builtin_constant_p(hook)) return static_key_false(&nf_hooks_needed[pf][hook]); - return !list_empty(&nf_hooks[pf][hook]); + return !list_empty(nf_hook_list); } #else -static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) +static inline bool nf_hook_list_active(struct list_head *nf_hook_list, + u_int8_t pf, unsigned int hook) { - return !list_empty(&nf_hooks[pf][hook]); + return !list_empty(nf_hook_list); } #endif +static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) +{ + return nf_hook_list_active(&nf_hooks[pf][hook], pf, hook); +} + int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); /** * nf_hook_thresh - call a netfilter hook - * + * * Returns 1 if the hook has allowed the packet to pass. The function * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. -- cgit v1.2.3 From 1cf51900f8545b358b5deaacfda348d990f671db Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 13 May 2015 18:19:37 +0200 Subject: net: add CONFIG_NET_INGRESS to enable ingress filtering This new config switch enables the ingress filtering infrastructure that is controlled through the ingress_needed static key. This prepares the introduction of the Netfilter ingress hook that resides under this unique static key. Note that CONFIG_SCH_INGRESS automatically selects this, that should be no problem since this also depends on CONFIG_NET_CLS_ACT. Signed-off-by: Pablo Neira Ayuso Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 +- net/Kconfig | 3 +++ net/core/dev.c | 7 ++++--- net/sched/Kconfig | 1 + 4 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index bd29ab4b0941..a2324fb45cf4 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -79,7 +79,7 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); -#ifdef CONFIG_NET_CLS_ACT +#ifdef CONFIG_NET_INGRESS void net_inc_ingress_queue(void); void net_dec_ingress_queue(void); #endif diff --git a/net/Kconfig b/net/Kconfig index 44dd5786ee91..57a7c5af3175 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -45,6 +45,9 @@ config COMPAT_NETLINK_MESSAGES Newly written code should NEVER need this option but do compat-independent messages instead! +config NET_INGRESS + bool + menu "Networking options" source "net/packet/Kconfig" diff --git a/net/core/dev.c b/net/core/dev.c index af549062ae8e..a5ef90016ce7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1630,7 +1630,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); -#ifdef CONFIG_NET_CLS_ACT +#ifdef CONFIG_NET_INGRESS static struct static_key ingress_needed __read_mostly; void net_inc_ingress_queue(void) @@ -3798,13 +3798,14 @@ another_round: } skip_taps: -#ifdef CONFIG_NET_CLS_ACT +#ifdef CONFIG_NET_INGRESS if (static_key_false(&ingress_needed)) { skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) goto unlock; } - +#endif +#ifdef CONFIG_NET_CLS_ACT skb->tc_verd = 0; ncls: #endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 5fd1c2f487d2..daa33432b716 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -312,6 +312,7 @@ config NET_SCH_PIE config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT + select NET_INGRESS ---help--- Say Y here if you want to use classifiers for incoming packets. If unsure, say Y. -- cgit v1.2.3 From e687ad60af09010936bbd0b2a3b5d90a8ee8353c Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 13 May 2015 18:19:38 +0200 Subject: netfilter: add netfilter ingress hook after handle_ing() under unique static key This patch adds the Netfilter ingress hook just after the existing tc ingress hook, that seems to be the consensus solution for this. Note that the Netfilter hook resides under the global static key that enables ingress filtering. Nonetheless, Netfilter still also has its own static key for minimal impact on the existing handle_ing(). * Without this patch: Result: OK: 6216490(c6216338+d152) usec, 100000000 (60byte,0frags) 16086246pps 7721Mb/sec (7721398080bps) errors: 100000000 42.46% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core 25.92% kpktgend_0 [kernel.kallsyms] [k] kfree_skb 7.81% kpktgend_0 [pktgen] [k] pktgen_thread_worker 5.62% kpktgend_0 [kernel.kallsyms] [k] ip_rcv 2.70% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal 2.34% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk 1.44% kpktgend_0 [kernel.kallsyms] [k] __build_skb * With this patch: Result: OK: 6214833(c6214731+d101) usec, 100000000 (60byte,0frags) 16090536pps 7723Mb/sec (7723457280bps) errors: 100000000 41.23% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core 26.57% kpktgend_0 [kernel.kallsyms] [k] kfree_skb 7.72% kpktgend_0 [pktgen] [k] pktgen_thread_worker 5.55% kpktgend_0 [kernel.kallsyms] [k] ip_rcv 2.78% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal 2.06% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk 1.43% kpktgend_0 [kernel.kallsyms] [k] __build_skb * Without this patch + tc ingress: tc filter add dev eth4 parent ffff: protocol ip prio 1 \ u32 match ip dst 4.3.2.1/32 Result: OK: 9269001(c9268821+d179) usec, 100000000 (60byte,0frags) 10788648pps 5178Mb/sec (5178551040bps) errors: 100000000 40.99% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core 17.50% kpktgend_0 [kernel.kallsyms] [k] kfree_skb 11.77% kpktgend_0 [cls_u32] [k] u32_classify 5.62% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat 5.18% kpktgend_0 [pktgen] [k] pktgen_thread_worker 3.23% kpktgend_0 [kernel.kallsyms] [k] tc_classify 2.97% kpktgend_0 [kernel.kallsyms] [k] ip_rcv 1.83% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal 1.50% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk 0.99% kpktgend_0 [kernel.kallsyms] [k] __build_skb * With this patch + tc ingress: tc filter add dev eth4 parent ffff: protocol ip prio 1 \ u32 match ip dst 4.3.2.1/32 Result: OK: 9308218(c9308091+d126) usec, 100000000 (60byte,0frags) 10743194pps 5156Mb/sec (5156733120bps) errors: 100000000 42.01% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core 17.78% kpktgend_0 [kernel.kallsyms] [k] kfree_skb 11.70% kpktgend_0 [cls_u32] [k] u32_classify 5.46% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat 5.16% kpktgend_0 [pktgen] [k] pktgen_thread_worker 2.98% kpktgend_0 [kernel.kallsyms] [k] ip_rcv 2.84% kpktgend_0 [kernel.kallsyms] [k] tc_classify 1.96% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal 1.57% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk Note that the results are very similar before and after. I can see gcc gets the code under the ingress static key out of the hot path. Then, on that cold branch, it generates the code to accomodate the netfilter ingress static key. My explanation for this is that this reduces the pressure on the instruction cache for non-users as the new code is out of the hot path, and it comes with minimal impact for tc ingress users. Using gcc version 4.8.4 on: Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 8 [...] L1d cache: 16K L1i cache: 64K L2 cache: 2048K L3 cache: 8192K Signed-off-by: Pablo Neira Ayuso Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ include/linux/netfilter.h | 1 + include/linux/netfilter_ingress.h | 41 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/netfilter.h | 6 ++++++ net/core/dev.c | 36 ++++++++++++++++++++++++++++++++++ net/netfilter/Kconfig | 7 +++++++ net/netfilter/core.c | 31 ++++++++++++++++++++++++++++- 7 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 include/linux/netfilter_ingress.h (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d3ed01c18247..51f8d2f5dc3f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1656,6 +1656,9 @@ struct net_device { struct tcf_proto __rcu *ingress_cl_list; #endif struct netdev_queue __rcu *ingress_queue; +#ifdef CONFIG_NETFILTER_INGRESS + struct list_head nf_hooks_ingress; +#endif unsigned char broadcast[MAX_ADDR_LEN]; #ifdef CONFIG_RFS_ACCEL diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 49d00638d1fa..f5ff5d156da8 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -86,6 +86,7 @@ struct nf_hook_ops { /* User fills in from here down. */ nf_hookfn *hook; + struct net_device *dev; struct module *owner; void *priv; u_int8_t pf; diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h new file mode 100644 index 000000000000..cb0727fe2b3d --- /dev/null +++ b/include/linux/netfilter_ingress.h @@ -0,0 +1,41 @@ +#ifndef _NETFILTER_INGRESS_H_ +#define _NETFILTER_INGRESS_H_ + +#include +#include + +#ifdef CONFIG_NETFILTER_INGRESS +static inline int nf_hook_ingress_active(struct sk_buff *skb) +{ + return nf_hook_list_active(&skb->dev->nf_hooks_ingress, + NFPROTO_NETDEV, NF_NETDEV_INGRESS); +} + +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + struct nf_hook_state state; + + nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress, + NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL, + skb->dev, NULL, NULL); + return nf_hook_slow(skb, &state); +} + +static inline void nf_hook_ingress_init(struct net_device *dev) +{ + INIT_LIST_HEAD(&dev->nf_hooks_ingress); +} +#else /* CONFIG_NETFILTER_INGRESS */ +static inline int nf_hook_ingress_active(struct sk_buff *skb) +{ + return 0; +} + +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + return 0; +} + +static inline void nf_hook_ingress_init(struct net_device *dev) {} +#endif /* CONFIG_NETFILTER_INGRESS */ +#endif /* _NETFILTER_INGRESS_H_ */ diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index ef1b1f88ca18..177027cce6b3 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -51,11 +51,17 @@ enum nf_inet_hooks { NF_INET_NUMHOOKS }; +enum nf_dev_hooks { + NF_NETDEV_INGRESS, + NF_NETDEV_NUMHOOKS +}; + enum { NFPROTO_UNSPEC = 0, NFPROTO_INET = 1, NFPROTO_IPV4 = 2, NFPROTO_ARP = 3, + NFPROTO_NETDEV = 5, NFPROTO_BRIDGE = 7, NFPROTO_IPV6 = 10, NFPROTO_DECNET = 12, diff --git a/net/core/dev.c b/net/core/dev.c index a5ef90016ce7..29f0d6e6542c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -135,6 +135,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -3666,6 +3667,13 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, return skb; } +#else +static inline struct sk_buff *handle_ing(struct sk_buff *skb, + struct packet_type **pt_prev, + int *ret, struct net_device *orig_dev) +{ + return skb; +} #endif /** @@ -3739,6 +3747,28 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb) } } +#ifdef CONFIG_NETFILTER_INGRESS +static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, + int *ret, struct net_device *orig_dev) +{ + if (nf_hook_ingress_active(skb)) { + if (*pt_prev) { + *ret = deliver_skb(skb, *pt_prev, orig_dev); + *pt_prev = NULL; + } + + return nf_hook_ingress(skb); + } + return 0; +} +#else +static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, + int *ret, struct net_device *orig_dev) +{ + return 0; +} +#endif + static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) { struct packet_type *ptype, *pt_prev; @@ -3803,6 +3833,9 @@ skip_taps: skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) goto unlock; + + if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) + goto unlock; } #endif #ifdef CONFIG_NET_CLS_ACT @@ -6968,6 +7001,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->group = INIT_NETDEV_GROUP; if (!dev->ethtool_ops) dev->ethtool_ops = &default_ethtool_ops; + + nf_hook_ingress_init(dev); + return dev; free_all: diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f70e34a68f70..db1c674397ad 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1,6 +1,13 @@ menu "Core Netfilter Configuration" depends on NET && INET && NETFILTER +config NETFILTER_INGRESS + bool "Netfilter ingress support" + select NET_INGRESS + help + This allows you to classify packets from ingress using the Netfilter + infrastructure. + config NETFILTER_NETLINK tristate diff --git a/net/netfilter/core.c b/net/netfilter/core.c index e418cfd603c0..653e32eac08c 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -64,10 +64,27 @@ static DEFINE_MUTEX(nf_hook_mutex); int nf_register_hook(struct nf_hook_ops *reg) { + struct list_head *nf_hook_list; struct nf_hook_ops *elem; mutex_lock(&nf_hook_mutex); - list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) { + switch (reg->pf) { + case NFPROTO_NETDEV: +#ifdef CONFIG_NETFILTER_INGRESS + if (reg->hooknum == NF_NETDEV_INGRESS) { + BUG_ON(reg->dev == NULL); + nf_hook_list = ®->dev->nf_hooks_ingress; + net_inc_ingress_queue(); + break; + } +#endif + /* Fall through. */ + default: + nf_hook_list = &nf_hooks[reg->pf][reg->hooknum]; + break; + } + + list_for_each_entry(elem, nf_hook_list, list) { if (reg->priority < elem->priority) break; } @@ -85,6 +102,18 @@ void nf_unregister_hook(struct nf_hook_ops *reg) mutex_lock(&nf_hook_mutex); list_del_rcu(®->list); mutex_unlock(&nf_hook_mutex); + switch (reg->pf) { + case NFPROTO_NETDEV: +#ifdef CONFIG_NETFILTER_INGRESS + if (reg->hooknum == NF_NETDEV_INGRESS) { + net_dec_ingress_queue(); + break; + } + break; +#endif + default: + break; + } #ifdef HAVE_JUMP_LABEL static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif -- cgit v1.2.3 From 7fb48c5bc3100f7674a8e26f42c1518196500728 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 3 May 2015 22:05:28 +0200 Subject: netfilter: bridge: neigh_head and physoutdev can't be used at same time The neigh_header is only needed when we detect DNAT after prerouting and neigh cache didn't have a mac address for us. The output port has not been chosen yet so we can re-use the storage area, bringing struct size down to 32 bytes on x86_64. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 8 +++++--- net/bridge/br_netfilter.c | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c0b574a414e7..3d932e64125a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -170,12 +170,14 @@ struct nf_bridge_info { BRNF_PROTO_UNCHANGED, BRNF_PROTO_8021Q, BRNF_PROTO_PPPOE - } orig_proto; + } orig_proto:8; bool pkt_otherhost; unsigned int mask; struct net_device *physindev; - struct net_device *physoutdev; - char neigh_header[8]; + union { + struct net_device *physoutdev; + char neigh_header[8]; + }; }; #endif diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index ab55e2472beb..13973da29b2a 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -973,6 +973,8 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) nf_bridge->neigh_header, ETH_HLEN - ETH_ALEN); skb->dev = nf_bridge->physindev; + + nf_bridge->physoutdev = NULL; br_handle_frame_finish(NULL, skb); } -- cgit v1.2.3 From a3b1c1eb50f9b3e0c73c37157d0c61b2e90ae580 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 6 May 2015 16:28:57 +0200 Subject: netfilter: ipset: deinline ip_set_put_extensions() On x86 allyesconfig build: The function compiles to 489 bytes of machine code. It has 25 callsites. text data bss dec hex filename 82441375 22255384 20627456 125324215 7784bb7 vmlinux.before 82434909 22255384 20627456 125317749 7783275 vmlinux Signed-off-by: Denys Vlasenko CC: Jozsef Kadlecsik CC: Eric W. Biederman CC: David S. Miller CC: Jan Engelhardt CC: Jiri Pirko CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: netfilter-devel@vger.kernel.org Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 24 ++---------------------- net/netfilter/ipset/ip_set_core.c | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index f88be7258e5f..ffdfdc24952a 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -533,29 +533,9 @@ bitmap_bytes(u32 a, u32 b) #include #include -static inline int +int ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, - const void *e, bool active) -{ - if (SET_WITH_TIMEOUT(set)) { - unsigned long *timeout = ext_timeout(e, set); - - if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(active ? ip_set_timeout_get(timeout) - : *timeout))) - return -EMSGSIZE; - } - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, set))) - return -EMSGSIZE; - if (SET_WITH_COMMENT(set) && - ip_set_put_comment(skb, ext_comment(e, set))) - return -EMSGSIZE; - if (SET_WITH_SKBINFO(set) && - ip_set_put_skbinfo(skb, ext_skbinfo(e, set))) - return -EMSGSIZE; - return 0; -} + const void *e, bool active); #define IP_SET_INIT_KEXT(skb, opt, set) \ { .bytes = (skb)->len, .packets = 1, \ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 7f9c0565e715..475e4960a164 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -432,6 +432,31 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], } EXPORT_SYMBOL_GPL(ip_set_get_extensions); +int +ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, + const void *e, bool active) +{ + if (SET_WITH_TIMEOUT(set)) { + unsigned long *timeout = ext_timeout(e, set); + + if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(active ? ip_set_timeout_get(timeout) + : *timeout))) + return -EMSGSIZE; + } + if (SET_WITH_COUNTER(set) && + ip_set_put_counter(skb, ext_counter(e, set))) + return -EMSGSIZE; + if (SET_WITH_COMMENT(set) && + ip_set_put_comment(skb, ext_comment(e, set))) + return -EMSGSIZE; + if (SET_WITH_SKBINFO(set) && + ip_set_put_skbinfo(skb, ext_skbinfo(e, set))) + return -EMSGSIZE; + return 0; +} +EXPORT_SYMBOL_GPL(ip_set_put_extensions); + /* * Creating/destroying/renaming/swapping affect the existence and * the properties of a set. All of these can be executed from userspace -- cgit v1.2.3 From 922f2dd1b65a888e34c472979460dc23211750a2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 12 May 2015 10:33:24 -0700 Subject: net: phy: Add phy_ignore_ta_mask to account for broken turn-around Some PHY devices/switches will not release the turn-around line as they should do at the end of a MDIO transaction. To help with such situations, allow MDIO bus drivers to be made aware of such restrictions. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 685809835b5c..701c7a3946e0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -181,6 +181,9 @@ struct mii_bus { /* PHY addresses to be ignored when probing */ u32 phy_mask; + /* PHY addresses to ignore the TA/read failure */ + u32 phy_ignore_ta_mask; + /* * Pointer to an array of interrupts, each PHY's * interrupt at the index matching its address -- cgit v1.2.3 From 264ea103a7473f51aced838e68ed384ea2c759f5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 May 2015 14:26:56 -0700 Subject: tcp: syncookies: extend validity range Now we allow storing more request socks per listener, we might hit syncookie mode less often and hit following bug in our stack : When we send a burst of syncookies, then exit this mode, tcp_synq_no_recent_overflow() can return false if the ACK packets coming from clients are coming three seconds after the end of syncookie episode. This is a way too strong requirement and conflicts with rest of syncookie code which allows ACK to be aged up to 2 minutes. Perfectly valid ACK packets are dropped just because clients might be in a crowded wifi environment or on another planet. So let's fix this, and also change tcp_synq_overflow() to not dirty a cache line for every syncookie we send, as we are under attack. Signed-off-by: Eric Dumazet Acked-by: Florian Westphal Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/net/tcp.h | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index b8ea12880fd9..7ace6acbf5fd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -326,18 +326,6 @@ static inline bool tcp_too_many_orphans(struct sock *sk, int shift) bool tcp_check_oom(struct sock *sk, int shift); -/* syncookies: remember time of last synqueue overflow */ -static inline void tcp_synq_overflow(struct sock *sk) -{ - tcp_sk(sk)->rx_opt.ts_recent_stamp = jiffies; -} - -/* syncookies: no recent synqueue overflow on this listening socket? */ -static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) -{ - unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; - return time_after(jiffies, last_overflow + TCP_TIMEOUT_FALLBACK); -} extern struct proto tcp_prot; @@ -483,13 +471,35 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); * i.e. a sent cookie is valid only at most for 2*60 seconds (or less if * the counter advances immediately after a cookie is generated). */ -#define MAX_SYNCOOKIE_AGE 2 +#define MAX_SYNCOOKIE_AGE 2 +#define TCP_SYNCOOKIE_PERIOD (60 * HZ) +#define TCP_SYNCOOKIE_VALID (MAX_SYNCOOKIE_AGE * TCP_SYNCOOKIE_PERIOD) + +/* syncookies: remember time of last synqueue overflow + * But do not dirty this field too often (once per second is enough) + */ +static inline void tcp_synq_overflow(struct sock *sk) +{ + unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned long now = jiffies; + + if (time_after(now, last_overflow + HZ)) + tcp_sk(sk)->rx_opt.ts_recent_stamp = now; +} + +/* syncookies: no recent synqueue overflow on this listening socket? */ +static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) +{ + unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + + return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID); +} static inline u32 tcp_cookie_time(void) { u64 val = get_jiffies_64(); - do_div(val, 60 * HZ); + do_div(val, TCP_SYNCOOKIE_PERIOD); return val; } -- cgit v1.2.3 From a4afd37b26f4b9f640310a89b7f8d176ae3460b1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 13 May 2015 13:12:43 +0200 Subject: test_bpf: add tests related to BPF_MAXINSNS Couple of torture test cases related to the bug fixed in 0b59d8806a31 ("ARM: net: delegate filter to kernel interpreter when imm_offset() return value can't fit into 12bits."). I've added a helper to allocate and fill the insn space. Output on x86_64 from my laptop: test_bpf: #233 BPF_MAXINSNS: Maximum possible literals jited:0 7 PASS test_bpf: #234 BPF_MAXINSNS: Single literal jited:0 8 PASS test_bpf: #235 BPF_MAXINSNS: Run/add until end jited:0 11553 PASS test_bpf: #236 BPF_MAXINSNS: Too many instructions PASS test_bpf: #237 BPF_MAXINSNS: Very long jump jited:0 9 PASS test_bpf: #238 BPF_MAXINSNS: Ctx heavy transformations jited:0 20329 20398 PASS test_bpf: #239 BPF_MAXINSNS: Call heavy transformations jited:0 32178 32475 PASS test_bpf: #240 BPF_MAXINSNS: Jump heavy test jited:0 10518 PASS test_bpf: #233 BPF_MAXINSNS: Maximum possible literals jited:1 4 PASS test_bpf: #234 BPF_MAXINSNS: Single literal jited:1 4 PASS test_bpf: #235 BPF_MAXINSNS: Run/add until end jited:1 1625 PASS test_bpf: #236 BPF_MAXINSNS: Too many instructions PASS test_bpf: #237 BPF_MAXINSNS: Very long jump jited:1 8 PASS test_bpf: #238 BPF_MAXINSNS: Ctx heavy transformations jited:1 3301 3174 PASS test_bpf: #239 BPF_MAXINSNS: Call heavy transformations jited:1 24107 23491 PASS test_bpf: #240 BPF_MAXINSNS: Jump heavy test jited:1 8651 PASS Signed-off-by: Daniel Borkmann Cc: Alexei Starovoitov Cc: Nicolas Schichan Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 8 ++ lib/test_bpf.c | 317 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 316 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index ce1d72d34382..200be4a74a33 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -277,6 +277,14 @@ struct bpf_prog_aux; .off = 0, \ .imm = 0 }) +/* Internal classic blocks for direct assignment */ + +#define __BPF_STMT(CODE, K) \ + ((struct sock_filter) BPF_STMT(CODE, K)) + +#define __BPF_JUMP(CODE, K, JT, JF) \ + ((struct sock_filter) BPF_JUMP(CODE, K, JT, JF)) + #define bytes_to_bpf_size(bytes) \ ({ \ int bpf_size = -EINVAL; \ diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 8bca780e3613..66358b21acce 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -21,6 +21,7 @@ #include #include #include +#include /* General test specific settings */ #define MAX_SUBTESTS 3 @@ -67,6 +68,10 @@ struct bpf_test { union { struct sock_filter insns[MAX_INSNS]; struct bpf_insn insns_int[MAX_INSNS]; + struct { + void *insns; + unsigned int len; + } ptr; } u; __u8 aux; __u8 data[MAX_DATA]; @@ -74,8 +79,190 @@ struct bpf_test { int data_size; __u32 result; } test[MAX_SUBTESTS]; + int (*fill_helper)(struct bpf_test *self); }; +/* Large test cases need separate allocation and fill handler. */ + +static int bpf_fill_maxinsns1(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + __u32 k = ~0; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + for (i = 0; i < len; i++, k--) + insn[i] = __BPF_STMT(BPF_RET | BPF_K, k); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns2(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + for (i = 0; i < len; i++) + insn[i] = __BPF_STMT(BPF_RET | BPF_K, 0xfefefefe); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns3(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + struct rnd_state rnd; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + prandom_seed_state(&rnd, 3141592653589793238ULL); + + for (i = 0; i < len - 1; i++) { + __u32 k = prandom_u32_state(&rnd); + + insn[i] = __BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, k); + } + + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_A, 0); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns4(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS + 1; + struct sock_filter *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + for (i = 0; i < len; i++) + insn[i] = __BPF_STMT(BPF_RET | BPF_K, 0xfefefefe); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns5(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + insn[0] = __BPF_JUMP(BPF_JMP | BPF_JA, len - 2, 0, 0); + + for (i = 1; i < len - 1; i++) + insn[i] = __BPF_STMT(BPF_RET | BPF_K, 0xfefefefe); + + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_K, 0xabababab); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns6(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + for (i = 0; i < len - 1; i++) + insn[i] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS, SKF_AD_OFF + + SKF_AD_VLAN_TAG_PRESENT); + + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_A, 0); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns7(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + for (i = 0; i < len - 4; i++) + insn[i] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS, SKF_AD_OFF + + SKF_AD_CPU); + + insn[len - 4] = __BPF_STMT(BPF_MISC | BPF_TAX, 0); + insn[len - 3] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS, SKF_AD_OFF + + SKF_AD_CPU); + insn[len - 2] = __BPF_STMT(BPF_ALU | BPF_SUB | BPF_X, 0); + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_A, 0); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns8(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + int i, jmp_off = len - 3; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + insn[0] = __BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff); + + for (i = 1; i < len - 1; i++) + insn[i] = __BPF_JUMP(BPF_JMP | BPF_JGT, 0xffffffff, jmp_off--, 0); + + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_A, 0); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + static struct bpf_test tests[] = { { "TAX", @@ -3998,6 +4185,73 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Maximum possible literals", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0xffffffff } }, + .fill_helper = bpf_fill_maxinsns1, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Single literal", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0xfefefefe } }, + .fill_helper = bpf_fill_maxinsns2, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Run/add until end", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0x947bf368 } }, + .fill_helper = bpf_fill_maxinsns3, + }, + { + "BPF_MAXINSNS: Too many instructions", + { }, + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, + { }, + { }, + .fill_helper = bpf_fill_maxinsns4, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Very long jump", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0xabababab } }, + .fill_helper = bpf_fill_maxinsns5, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Ctx heavy transformations", + { }, + CLASSIC, + { }, + { + { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }, + { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) } + }, + .fill_helper = bpf_fill_maxinsns6, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Call heavy transformations", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 1, 0 }, { 10, 0 } }, + .fill_helper = bpf_fill_maxinsns7, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Jump heavy test", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0xffffffff } }, + .fill_helper = bpf_fill_maxinsns8, + }, }; static struct net_device dev; @@ -4051,10 +4305,15 @@ static void release_test_data(const struct bpf_test *test, void *data) kfree_skb(data); } -static int probe_filter_length(struct sock_filter *fp) +static int filter_length(int which) { - int len = 0; + struct sock_filter *fp; + int len; + if (tests[which].fill_helper) + return tests[which].u.ptr.len; + + fp = tests[which].u.insns; for (len = MAX_INSNS - 1; len > 0; --len) if (fp[len].code != 0 || fp[len].k != 0) break; @@ -4062,16 +4321,25 @@ static int probe_filter_length(struct sock_filter *fp) return len + 1; } +static void *filter_pointer(int which) +{ + if (tests[which].fill_helper) + return tests[which].u.ptr.insns; + else + return tests[which].u.insns; +} + static struct bpf_prog *generate_filter(int which, int *err) { - struct bpf_prog *fp; - struct sock_fprog_kern fprog; - unsigned int flen = probe_filter_length(tests[which].u.insns); __u8 test_type = tests[which].aux & TEST_TYPE_MASK; + unsigned int flen = filter_length(which); + void *fptr = filter_pointer(which); + struct sock_fprog_kern fprog; + struct bpf_prog *fp; switch (test_type) { case CLASSIC: - fprog.filter = tests[which].u.insns; + fprog.filter = fptr; fprog.len = flen; *err = bpf_prog_create(&fp, &fprog); @@ -4107,8 +4375,7 @@ static struct bpf_prog *generate_filter(int which, int *err) } fp->len = flen; - memcpy(fp->insnsi, tests[which].u.insns_int, - fp->len * sizeof(struct bpf_insn)); + memcpy(fp->insnsi, fptr, fp->len * sizeof(struct bpf_insn)); bpf_prog_select_runtime(fp); break; @@ -4180,6 +4447,29 @@ static int run_one(const struct bpf_prog *fp, struct bpf_test *test) return err_cnt; } +static __init int prepare_bpf_tests(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (tests[i].fill_helper && + tests[i].fill_helper(&tests[i]) < 0) + return -ENOMEM; + } + + return 0; +} + +static __init void destroy_bpf_tests(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (tests[i].fill_helper) + kfree(tests[i].u.ptr.insns); + } +} + static __init int test_bpf(void) { int i, err_cnt = 0, pass_cnt = 0; @@ -4227,7 +4517,16 @@ static __init int test_bpf(void) static int __init test_bpf_init(void) { - return test_bpf(); + int ret; + + ret = prepare_bpf_tests(); + if (ret < 0) + return ret; + + ret = test_bpf(); + + destroy_bpf_tests(); + return ret; } static void __exit test_bpf_exit(void) -- cgit v1.2.3 From ef7f3a5c7149ad2dbd1d8a71d0aa88a02d1dbcb8 Mon Sep 17 00:00:00 2001 From: Bert Vermeulen Date: Wed, 13 May 2015 13:35:39 +0200 Subject: mdio-gpio: Propagate mii_bus.phy_ignore_ta_mask This also changes mii_bus.phy_mask to u32 for consistency. Signed-off-by: Bert Vermeulen Signed-off-by: David S. Miller --- drivers/net/phy/mdio-gpio.c | 1 + include/linux/mdio-gpio.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 53d18150f4e2..7dc21e56a7aa 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -158,6 +158,7 @@ static struct mii_bus *mdio_gpio_bus_init(struct device *dev, new_bus->name = "GPIO Bitbanged MDIO", new_bus->phy_mask = pdata->phy_mask; + new_bus->phy_ignore_ta_mask = pdata->phy_ignore_ta_mask; new_bus->irq = pdata->irqs; new_bus->parent = dev; diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h index 66c30a763b10..11f00cdabe3d 100644 --- a/include/linux/mdio-gpio.h +++ b/include/linux/mdio-gpio.h @@ -23,7 +23,8 @@ struct mdio_gpio_platform_data { bool mdio_active_low; bool mdo_active_low; - unsigned int phy_mask; + u32 phy_mask; + u32 phy_ignore_ta_mask; int irqs[PHY_MAX_ADDR]; /* reset callback */ int (*reset)(struct mii_bus *bus); -- cgit v1.2.3 From 55917a21d0cc012bb6073bb05bb768fd51d8e237 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 14 May 2015 14:57:23 +0200 Subject: netfilter: x_tables: add context to know if extension runs from nft_compat Currently, we have four xtables extensions that cannot be used from the xt over nft compat layer. The problem is that they need real access to the full blown xt_entry to validate that the rule comes with the right dependencies. This check was introduced to overcome the lack of sufficient userspace dependency validation in iptables. To resolve this problem, this patch introduces a new field to the xt_tgchk_param structure that tell us if the extension is run from nft_compat context. The three affected extensions are: 1) CLUSTERIP, this target has been superseded by xt_cluster. So just bail out by returning -EINVAL. 2) TCPMSS. Relax the checking when used from nft_compat. If used with the wrong configuration, it will corrupt !syn packets by adding TCP MSS option. 3) ebt_stp. Relax the check to make sure it uses the reserved destination MAC address for STP. Signed-off-by: Pablo Neira Ayuso Tested-by: Arturo Borrero Gonzalez --- include/linux/netfilter/x_tables.h | 2 ++ net/bridge/netfilter/ebt_stp.c | 6 ++++-- net/ipv4/netfilter/ipt_CLUSTERIP.c | 5 +++++ net/netfilter/nft_compat.c | 2 ++ net/netfilter/xt_TCPMSS.c | 6 ++++++ 5 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index a3e215bb0241..09f38206c18f 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -62,6 +62,7 @@ struct xt_mtchk_param { void *matchinfo; unsigned int hook_mask; u_int8_t family; + bool nft_compat; }; /** @@ -92,6 +93,7 @@ struct xt_tgchk_param { void *targinfo; unsigned int hook_mask; u_int8_t family; + bool nft_compat; }; /* Target destructor parameters */ diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 071d87214dde..0c40570069ba 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -164,8 +164,10 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par) !(info->bitmask & EBT_STP_MASK)) return -EINVAL; /* Make sure the match only receives stp frames */ - if (!ether_addr_equal(e->destmac, bridge_ula) || - !ether_addr_equal(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC)) + if (!par->nft_compat && + (!ether_addr_equal(e->destmac, bridge_ula) || + !ether_addr_equal(e->destmsk, msk) || + !(e->bitmask & EBT_DESTMAC))) return -EINVAL; return 0; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 771ab3d01ad3..45cb16a6a4a3 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -367,6 +367,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) struct clusterip_config *config; int ret; + if (par->nft_compat) { + pr_err("cannot use CLUSTERIP target from nftables compat\n"); + return -EOPNOTSUPP; + } + if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7f29cfc76349..66def315eb56 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -161,6 +161,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, par->hook_mask = 0; } par->family = ctx->afi->family; + par->nft_compat = true; } static void target_compat_from_user(struct xt_target *t, void *in, void *out) @@ -377,6 +378,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, par->hook_mask = 0; } par->family = ctx->afi->family; + par->nft_compat = true; } static void match_compat_from_user(struct xt_match *m, void *in, void *out) diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index e762de5ee89b..8c3190e2fc6a 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -277,6 +277,9 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par) "FORWARD, OUTPUT and POSTROUTING hooks\n"); return -EINVAL; } + if (par->nft_compat) + return 0; + xt_ematch_foreach(ematch, e) if (find_syn_match(ematch)) return 0; @@ -299,6 +302,9 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par) "FORWARD, OUTPUT and POSTROUTING hooks\n"); return -EINVAL; } + if (par->nft_compat) + return 0; + xt_ematch_foreach(ematch, e) if (find_syn_match(ematch)) return 0; -- cgit v1.2.3 From d53a2aa3a116609c7db8799da31541c4ba5999eb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 May 2015 08:52:19 -0700 Subject: net: fix sparse error in csum_replace4() make C=2 CF=-D__CHECK_ENDIAN__ net/ipv4/netfilter/nf_nat_l3proto_ipv4.o CHECK net/ipv4/netfilter/nf_nat_l3proto_ipv4.c include/net/checksum.h:125:64: warning: incorrect type in argument 2 (different base types) include/net/checksum.h:125:64: expected restricted __wsum [usertype] addend include/net/checksum.h:125:64: got restricted __be32 [usertype] from include/net/checksum.h:125:71: warning: incorrect type in argument 2 (different base types) include/net/checksum.h:125:71: expected restricted __wsum [usertype] addend include/net/checksum.h:125:71: got restricted __be32 [usertype] to include/net/checksum.h:125:64: warning: incorrect type in argument 2 (different base types) include/net/checksum.h:125:64: expected restricted __wsum [usertype] addend include/net/checksum.h:125:64: got restricted __be32 [usertype] from include/net/checksum.h:125:71: warning: incorrect type in argument 2 (different base types) include/net/checksum.h:125:71: expected restricted __wsum [usertype] addend include/net/checksum.h:125:71: got restricted __be32 [usertype] to Fixes: 4565af0d406b ("net: optimise csum_replace4()") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/checksum.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/checksum.h b/include/net/checksum.h index 0a55ac715077..2d1d73cb773e 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -122,7 +122,9 @@ static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { - *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), from), to)); + __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); + + *sum = csum_fold(csum_add(tmp, (__force __wsum)to)); } /* Implements RFC 1624 (Incremental Internet Checksum) -- cgit v1.2.3 From c91d46065209bfe6124396fc409765ced0601b59 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 May 2015 05:48:07 -0700 Subject: net: fix two sparse errors First one in __skb_checksum_validate_complete() fixes the following (and other callers) make C=2 CF=-D__CHECK_ENDIAN__ net/ipv4/tcp_ipv4.o CHECK net/ipv4/tcp_ipv4.c include/linux/skbuff.h:3052:24: warning: incorrect type in return expression (different base types) include/linux/skbuff.h:3052:24: expected restricted __sum16 include/linux/skbuff.h:3052:24: got int Second is fixing gso_make_checksum() : CHECK net/ipv4/gre_offload.c include/linux/skbuff.h:3360:14: warning: incorrect type in assignment (different base types) include/linux/skbuff.h:3360:14: expected unsigned short [unsigned] [usertype] csum include/linux/skbuff.h:3360:14: got restricted __sum16 include/linux/skbuff.h:3365:16: warning: incorrect type in return expression (different base types) include/linux/skbuff.h:3365:16: expected restricted __sum16 include/linux/skbuff.h:3365:16: got unsigned short [unsigned] [usertype] csum Fixes: 5a21232983aa7 ("net: Support for csum_bad in skbuff") Fixes: 7e2b10c1e52ca ("net: Support for multiple checksums with gso") Signed-off-by: Eric Dumazet CC: Tom Herbert Acked-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f83aa6568cbf..b57eebfb67e0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3051,7 +3051,7 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb, } } else if (skb->csum_bad) { /* ip_summed == CHECKSUM_NONE in this case */ - return 1; + return (__force __sum16)1; } skb->csum = psum; @@ -3353,15 +3353,14 @@ static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res) { int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) - - skb_transport_offset(skb); - __u16 csum; + skb_transport_offset(skb); + __wsum partial; - csum = csum_fold(csum_partial(skb_transport_header(skb), - plen, skb->csum)); + partial = csum_partial(skb_transport_header(skb), plen, skb->csum); skb->csum = res; SKB_GSO_CB(skb)->csum_start -= plen; - return csum; + return csum_fold(partial); } static inline bool skb_is_gso(const struct sk_buff *skb) -- cgit v1.2.3 From 1a24e04e4b50939daa3041682b38b82c896ca438 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 May 2015 12:39:25 -0700 Subject: net: fix sk_mem_reclaim_partial() sk_mem_reclaim_partial() goal is to ensure each socket has one SK_MEM_QUANTUM forward allocation. This is needed both for performance and better handling of memory pressure situations in follow up patches. SK_MEM_QUANTUM is currently a page, but might be reduced to 4096 bytes as some arches have 64KB pages. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 6 +++--- net/core/sock.c | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index d882f4c8e438..4581a60636f8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1368,7 +1368,7 @@ static inline struct inode *SOCK_INODE(struct socket *socket) * Functions for memory accounting */ int __sk_mem_schedule(struct sock *sk, int size, int kind); -void __sk_mem_reclaim(struct sock *sk); +void __sk_mem_reclaim(struct sock *sk, int amount); #define SK_MEM_QUANTUM ((int)PAGE_SIZE) #define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) @@ -1409,7 +1409,7 @@ static inline void sk_mem_reclaim(struct sock *sk) if (!sk_has_account(sk)) return; if (sk->sk_forward_alloc >= SK_MEM_QUANTUM) - __sk_mem_reclaim(sk); + __sk_mem_reclaim(sk, sk->sk_forward_alloc); } static inline void sk_mem_reclaim_partial(struct sock *sk) @@ -1417,7 +1417,7 @@ static inline void sk_mem_reclaim_partial(struct sock *sk) if (!sk_has_account(sk)) return; if (sk->sk_forward_alloc > SK_MEM_QUANTUM) - __sk_mem_reclaim(sk); + __sk_mem_reclaim(sk, sk->sk_forward_alloc - 1); } static inline void sk_mem_charge(struct sock *sk, int size) diff --git a/net/core/sock.c b/net/core/sock.c index c18738a795b0..29124fcdc42a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2069,12 +2069,13 @@ EXPORT_SYMBOL(__sk_mem_schedule); /** * __sk_reclaim - reclaim memory_allocated * @sk: socket + * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple) */ -void __sk_mem_reclaim(struct sock *sk) +void __sk_mem_reclaim(struct sock *sk, int amount) { - sk_memory_allocated_sub(sk, - sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT); - sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; + amount >>= SK_MEM_QUANTUM_SHIFT; + sk_memory_allocated_sub(sk, amount); + sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; if (sk_under_memory_pressure(sk) && (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) -- cgit v1.2.3 From a6c5ea4ccf0033591e6e476d7a273c0074c07aa7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 May 2015 12:39:26 -0700 Subject: tcp: rename sk_forced_wmem_schedule() to sk_forced_mem_schedule() We plan to use sk_forced_wmem_schedule() in input path as well, so make it non static and rename it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_output.c | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 7ace6acbf5fd..841691a296dc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -311,6 +311,8 @@ static inline bool tcp_out_of_memory(struct sock *sk) return false; } +void sk_forced_mem_schedule(struct sock *sk, int size); + static inline bool tcp_too_many_orphans(struct sock *sk, int shift) { struct percpu_counter *ocp = sk->sk_prot->orphan_count; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7386d32cd670..bac1a950d087 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2816,8 +2816,10 @@ begin_fwd: * connection tear down and (memory) recovery. * Otherwise tcp_send_fin() could be tempted to either delay FIN * or even be forced to close flow without any FIN. + * In general, we want to allow one skb per socket to avoid hangs + * with edge trigger epoll() */ -static void sk_forced_wmem_schedule(struct sock *sk, int size) +void sk_forced_mem_schedule(struct sock *sk, int size) { int amt, status; @@ -2864,7 +2866,7 @@ coalesce: return; } skb_reserve(skb, MAX_TCP_HEADER); - sk_forced_wmem_schedule(sk, skb->truesize); + sk_forced_mem_schedule(sk, skb->truesize); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ tcp_init_nondata_skb(skb, tp->write_seq, TCPHDR_ACK | TCPHDR_FIN); -- cgit v1.2.3 From b8da51ebb1aa93908350f95efae73aecbc2e266c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 May 2015 12:39:27 -0700 Subject: tcp: introduce tcp_under_memory_pressure() Introduce an optimized version of sk_under_memory_pressure() for TCP. Our intent is to use it in fast paths. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 8 ++++++++ net/ipv4/tcp_input.c | 8 ++++---- net/ipv4/tcp_output.c | 4 ++-- net/ipv4/tcp_timer.c | 2 +- 4 files changed, 15 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 841691a296dc..0d85223efa4c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -286,6 +286,14 @@ extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; +/* optimized version of sk_under_memory_pressure() for TCP sockets */ +static inline bool tcp_under_memory_pressure(const struct sock *sk) +{ + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + return !!sk->sk_cgrp->memory_pressure; + + return tcp_memory_pressure; +} /* * The next routines deal with comparing 32 bit unsigned ints * and worry about wraparound (automatic with unsigned arithmetic). diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cf8b20ff6658..093779f7e893 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -359,7 +359,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) /* Check #1 */ if (tp->rcv_ssthresh < tp->window_clamp && (int)tp->rcv_ssthresh < tcp_space(sk) && - !sk_under_memory_pressure(sk)) { + !tcp_under_memory_pressure(sk)) { int incr; /* Check #2. Increase window, if skb with such overhead @@ -446,7 +446,7 @@ static void tcp_clamp_window(struct sock *sk) if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && - !sk_under_memory_pressure(sk) && + !tcp_under_memory_pressure(sk) && sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), sysctl_tcp_rmem[2]); @@ -4781,7 +4781,7 @@ static int tcp_prune_queue(struct sock *sk) if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) tcp_clamp_window(sk); - else if (sk_under_memory_pressure(sk)) + else if (tcp_under_memory_pressure(sk)) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); tcp_collapse_ofo_queue(sk); @@ -4825,7 +4825,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk) return false; /* If we are under global TCP memory pressure, do not expand. */ - if (sk_under_memory_pressure(sk)) + if (tcp_under_memory_pressure(sk)) return false; /* If we are under soft global TCP memory pressure, do not expand. */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bac1a950d087..08c2cc40b26d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2392,7 +2392,7 @@ u32 __tcp_select_window(struct sock *sk) if (free_space < (full_space >> 1)) { icsk->icsk_ack.quick = 0; - if (sk_under_memory_pressure(sk)) + if (tcp_under_memory_pressure(sk)) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); @@ -2843,7 +2843,7 @@ void tcp_send_fin(struct sock *sk) * Note: in the latter case, FIN packet will be sent after a timeout, * as TCP stack thinks it has already been transmitted. */ - if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) { + if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) { coalesce: TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; TCP_SKB_CB(tskb)->end_seq++; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 65bf670e8714..5b752f58a900 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -247,7 +247,7 @@ void tcp_delack_timer_handler(struct sock *sk) } out: - if (sk_under_memory_pressure(sk)) + if (tcp_under_memory_pressure(sk)) sk_mem_reclaim(sk); } -- cgit v1.2.3 From 45d4122ca7cdb3a4b91f392605cd22cfa75f1d99 Mon Sep 17 00:00:00 2001 From: "Samudrala, Sridhar" Date: Wed, 13 May 2015 21:55:43 -0700 Subject: switchdev: add support for fdb add/del/dump via switchdev_port_obj ops. - introduce port fdb obj and generic switchdev_port_fdb_add/del/dump() - use switchdev_port_fdb_add/del/dump in rocker/team/bonding ndo ops. - add support for fdb obj in switchdev_port_obj_add/del/dump() - switch rocker to implement fdb ops via switchdev_ops v3: updated to sync with named union changes. Signed-off-by: Sridhar Samudrala Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 3 + drivers/net/ethernet/rocker/rocker.c | 188 +++++++++++++++-------------------- drivers/net/team/team.c | 3 + include/net/switchdev.h | 50 ++++++++++ net/switchdev/switchdev.c | 175 ++++++++++++++++++++++++++++++++ 5 files changed, 312 insertions(+), 107 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ef26e0147050..2268438f3f63 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4039,6 +4039,9 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_getlink = switchdev_port_bridge_getlink, .ndo_bridge_dellink = switchdev_port_bridge_dellink, + .ndo_fdb_add = switchdev_port_fdb_add, + .ndo_fdb_del = switchdev_port_fdb_del, + .ndo_fdb_dump = switchdev_port_fdb_dump, .ndo_features_check = passthru_features_check, }; diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 3e13dcccd73d..0f5e962d691c 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4193,110 +4193,6 @@ static int rocker_port_vlan_rx_kill_vid(struct net_device *dev, ROCKER_OP_FLAG_REMOVE, vid); } -static int rocker_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid, - u16 nlm_flags) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - __be16 vlan_id = rocker_port_vid_to_vlan(rocker_port, vid, NULL); - int flags = 0; - - if (!rocker_port_is_bridged(rocker_port)) - return -EINVAL; - - return rocker_port_fdb(rocker_port, SWITCHDEV_TRANS_NONE, - addr, vlan_id, flags); -} - -static int rocker_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - __be16 vlan_id = rocker_port_vid_to_vlan(rocker_port, vid, NULL); - int flags = ROCKER_OP_FLAG_REMOVE; - - if (!rocker_port_is_bridged(rocker_port)) - return -EINVAL; - - return rocker_port_fdb(rocker_port, SWITCHDEV_TRANS_NONE, - addr, vlan_id, flags); -} - -static int rocker_fdb_fill_info(struct sk_buff *skb, - struct rocker_port *rocker_port, - const unsigned char *addr, u16 vid, - u32 portid, u32 seq, int type, - unsigned int flags) -{ - struct nlmsghdr *nlh; - struct ndmsg *ndm; - - nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); - if (!nlh) - return -EMSGSIZE; - - ndm = nlmsg_data(nlh); - ndm->ndm_family = AF_BRIDGE; - ndm->ndm_pad1 = 0; - ndm->ndm_pad2 = 0; - ndm->ndm_flags = NTF_SELF; - ndm->ndm_type = 0; - ndm->ndm_ifindex = rocker_port->dev->ifindex; - ndm->ndm_state = NUD_REACHABLE; - - if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) - goto nla_put_failure; - - if (vid && nla_put_u16(skb, NDA_VLAN, vid)) - goto nla_put_failure; - - nlmsg_end(skb, nlh); - return 0; - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; -} - -static int rocker_port_fdb_dump(struct sk_buff *skb, - struct netlink_callback *cb, - struct net_device *dev, - struct net_device *filter_dev, - int idx) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - struct rocker *rocker = rocker_port->rocker; - struct rocker_fdb_tbl_entry *found; - struct hlist_node *tmp; - int bkt; - unsigned long lock_flags; - const unsigned char *addr; - u16 vid; - int err; - - spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); - hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { - if (found->key.pport != rocker_port->pport) - continue; - if (idx < cb->args[0]) - goto skip; - addr = found->key.addr; - vid = rocker_port_vlan_to_vid(rocker_port, found->key.vlan_id); - err = rocker_fdb_fill_info(skb, rocker_port, addr, vid, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWNEIGH, NLM_F_MULTI); - if (err < 0) - break; -skip: - ++idx; - } - spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags); - return idx; -} - static int rocker_port_get_phys_port_name(struct net_device *dev, char *buf, size_t len) { @@ -4320,12 +4216,12 @@ static const struct net_device_ops rocker_port_netdev_ops = { .ndo_set_mac_address = rocker_port_set_mac_address, .ndo_vlan_rx_add_vid = rocker_port_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = rocker_port_vlan_rx_kill_vid, - .ndo_fdb_add = rocker_port_fdb_add, - .ndo_fdb_del = rocker_port_fdb_del, - .ndo_fdb_dump = rocker_port_fdb_dump, .ndo_bridge_getlink = switchdev_port_bridge_getlink, .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_dellink = switchdev_port_bridge_dellink, + .ndo_fdb_add = switchdev_port_fdb_add, + .ndo_fdb_del = switchdev_port_fdb_del, + .ndo_fdb_dump = switchdev_port_fdb_dump, .ndo_get_phys_port_name = rocker_port_get_phys_port_name, }; @@ -4447,6 +4343,19 @@ static int rocker_port_vlans_add(struct rocker_port *rocker_port, return 0; } +static int rocker_port_fdb_add(struct rocker_port *rocker_port, + enum switchdev_trans trans, + struct switchdev_obj_fdb *fdb) +{ + __be16 vlan_id = rocker_port_vid_to_vlan(rocker_port, fdb->vid, NULL); + int flags = 0; + + if (!rocker_port_is_bridged(rocker_port)) + return -EINVAL; + + return rocker_port_fdb(rocker_port, trans, fdb->addr, vlan_id, flags); +} + static int rocker_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) { @@ -4476,6 +4385,9 @@ static int rocker_port_obj_add(struct net_device *dev, htonl(fib4->dst), fib4->dst_len, fib4->fi, fib4->tb_id, 0); break; + case SWITCHDEV_OBJ_PORT_FDB: + err = rocker_port_fdb_add(rocker_port, obj->trans, &obj->u.fdb); + break; default: err = -EOPNOTSUPP; break; @@ -4513,6 +4425,19 @@ static int rocker_port_vlans_del(struct rocker_port *rocker_port, return 0; } +static int rocker_port_fdb_del(struct rocker_port *rocker_port, + enum switchdev_trans trans, + struct switchdev_obj_fdb *fdb) +{ + __be16 vlan_id = rocker_port_vid_to_vlan(rocker_port, fdb->vid, NULL); + int flags = ROCKER_OP_FLAG_REMOVE; + + if (!rocker_port_is_bridged(rocker_port)) + return -EINVAL; + + return rocker_port_fdb(rocker_port, trans, fdb->addr, vlan_id, flags); +} + static int rocker_port_obj_del(struct net_device *dev, struct switchdev_obj *obj) { @@ -4531,6 +4456,54 @@ static int rocker_port_obj_del(struct net_device *dev, fib4->fi, fib4->tb_id, ROCKER_OP_FLAG_REMOVE); break; + case SWITCHDEV_OBJ_PORT_FDB: + err = rocker_port_fdb_del(rocker_port, obj->trans, &obj->u.fdb); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int rocker_port_fdb_dump(struct rocker_port *rocker_port, + struct switchdev_obj *obj) +{ + struct rocker *rocker = rocker_port->rocker; + struct switchdev_obj_fdb *fdb = &obj->u.fdb; + struct rocker_fdb_tbl_entry *found; + struct hlist_node *tmp; + unsigned long lock_flags; + int bkt; + int err = 0; + + spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); + hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { + if (found->key.pport != rocker_port->pport) + continue; + fdb->addr = found->key.addr; + fdb->vid = rocker_port_vlan_to_vid(rocker_port, + found->key.vlan_id); + err = obj->cb(rocker_port->dev, obj); + if (err) + break; + } + spin_unlock_irqrestore(&rocker->fdb_tbl_lock, lock_flags); + + return err; +} + +static int rocker_port_obj_dump(struct net_device *dev, + struct switchdev_obj *obj) +{ + struct rocker_port *rocker_port = netdev_priv(dev); + int err = 0; + + switch (obj->id) { + case SWITCHDEV_OBJ_PORT_FDB: + err = rocker_port_fdb_dump(rocker_port, obj); + break; default: err = -EOPNOTSUPP; break; @@ -4544,6 +4517,7 @@ static const struct switchdev_ops rocker_port_switchdev_ops = { .switchdev_port_attr_set = rocker_port_attr_set, .switchdev_port_obj_add = rocker_port_obj_add, .switchdev_port_obj_del = rocker_port_obj_del, + .switchdev_port_obj_dump = rocker_port_obj_dump, }; /******************** diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 1ec035a53c3d..daa054b3ff03 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1980,6 +1980,9 @@ static const struct net_device_ops team_netdev_ops = { .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_getlink = switchdev_port_bridge_getlink, .ndo_bridge_dellink = switchdev_port_bridge_dellink, + .ndo_fdb_add = switchdev_port_fdb_add, + .ndo_fdb_del = switchdev_port_fdb_del, + .ndo_fdb_dump = switchdev_port_fdb_dump, .ndo_features_check = passthru_features_check, }; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index ea5b1c230d3d..437f8fe75705 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -47,11 +47,13 @@ enum switchdev_obj_id { SWITCHDEV_OBJ_UNDEFINED, SWITCHDEV_OBJ_PORT_VLAN, SWITCHDEV_OBJ_IPV4_FIB, + SWITCHDEV_OBJ_PORT_FDB, }; struct switchdev_obj { enum switchdev_obj_id id; enum switchdev_trans trans; + int (*cb)(struct net_device *dev, struct switchdev_obj *obj); union { struct switchdev_obj_vlan { /* PORT_VLAN */ u16 flags; @@ -67,6 +69,10 @@ struct switchdev_obj { u32 nlflags; u32 tb_id; } ipv4_fib; + struct switchdev_obj_fdb { /* PORT_FDB */ + const unsigned char *addr; + u16 vid; + } fdb; } u; }; @@ -80,6 +86,8 @@ struct switchdev_obj { * @switchdev_port_obj_add: Add an object to port (see switchdev_obj). * * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj). + * + * @switchdev_port_obj_dump: Dump port objects (see switchdev_obj). */ struct switchdev_ops { int (*switchdev_port_attr_get)(struct net_device *dev, @@ -90,6 +98,8 @@ struct switchdev_ops { struct switchdev_obj *obj); int (*switchdev_port_obj_del)(struct net_device *dev, struct switchdev_obj *obj); + int (*switchdev_port_obj_dump)(struct net_device *dev, + struct switchdev_obj *obj); }; enum switchdev_notifier_type { @@ -121,6 +131,7 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr); int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj); int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj); +int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj); int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); int call_switchdev_notifiers(unsigned long val, struct net_device *dev, @@ -137,6 +148,15 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 tb_id); void switchdev_fib_ipv4_abort(struct fib_info *fi); +int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, + u16 vid, u16 nlm_flags); +int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, + u16 vid); +int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, + struct net_device *filter_dev, int idx); #else @@ -164,6 +184,12 @@ static inline int switchdev_port_obj_del(struct net_device *dev, return -EOPNOTSUPP; } +static inline int switchdev_port_obj_dump(struct net_device *dev, + struct switchdev_obj *obj) +{ + return -EOPNOTSUPP; +} + static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; @@ -221,6 +247,30 @@ static inline void switchdev_fib_ipv4_abort(struct fib_info *fi) { } +static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, + u16 vid, u16 nlm_flags) +{ + return -EOPNOTSUPP; +} + +static inline int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + return -EOPNOTSUPP; +} + +static inline int switchdev_port_fdb_dump(struct sk_buff *skb, + struct netlink_callback *cb, + struct net_device *dev, + struct net_device *filter_dev, + int idx) +{ + return -EOPNOTSUPP; +} + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 0409f9b5bdbc..d4c8cf828240 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -296,6 +296,36 @@ int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj) } EXPORT_SYMBOL_GPL(switchdev_port_obj_del); +/** + * switchdev_port_obj_dump - Dump port objects + * + * @dev: port device + * @obj: object to dump + */ +int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj) +{ + const struct switchdev_ops *ops = dev->switchdev_ops; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (ops && ops->switchdev_port_obj_dump) + return ops->switchdev_port_obj_dump(dev, obj); + + /* Switch device port(s) may be stacked under + * bond/team/vlan dev, so recurse down to dump objects on + * first port at bottom of stack. + */ + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = switchdev_port_obj_dump(lower_dev, obj); + break; + } + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_port_obj_dump); + static DEFINE_MUTEX(switchdev_mutex); static RAW_NOTIFIER_HEAD(switchdev_notif_chain); @@ -566,6 +596,151 @@ int switchdev_port_bridge_dellink(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink); +/** + * switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port + * + * @ndmsg: netlink hdr + * @nlattr: netlink attributes + * @dev: port device + * @addr: MAC address to add + * @vid: VLAN to add + * + * Add FDB entry to switch device. + */ +int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, + u16 vid, u16 nlm_flags) +{ + struct switchdev_obj obj = { + .id = SWITCHDEV_OBJ_PORT_FDB, + .u.fdb = { + .addr = addr, + .vid = vid, + }, + }; + + return switchdev_port_obj_add(dev, &obj); +} +EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); + +/** + * switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port + * + * @ndmsg: netlink hdr + * @nlattr: netlink attributes + * @dev: port device + * @addr: MAC address to delete + * @vid: VLAN to delete + * + * Delete FDB entry from switch device. + */ +int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, + u16 vid) +{ + struct switchdev_obj obj = { + .id = SWITCHDEV_OBJ_PORT_FDB, + .u.fdb = { + .addr = addr, + .vid = vid, + }, + }; + + return switchdev_port_obj_del(dev, &obj); +} +EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); + +struct switchdev_fdb_dump { + struct switchdev_obj obj; + struct sk_buff *skb; + struct netlink_callback *cb; + struct net_device *filter_dev; + int idx; +}; + +static int switchdev_port_fdb_dump_cb(struct net_device *dev, + struct switchdev_obj *obj) +{ + struct switchdev_fdb_dump *dump = + container_of(obj, struct switchdev_fdb_dump, obj); + u32 portid = NETLINK_CB(dump->cb->skb).portid; + u32 seq = dump->cb->nlh->nlmsg_seq; + struct nlmsghdr *nlh; + struct ndmsg *ndm; + struct net_device *master = netdev_master_upper_dev_get(dev); + + if (dump->idx < dump->cb->args[0]) + goto skip; + + if (master && dump->filter_dev != master) + goto skip; + + nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, + sizeof(*ndm), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = AF_BRIDGE; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = NTF_SELF; + ndm->ndm_type = 0; + ndm->ndm_ifindex = dev->ifindex; + ndm->ndm_state = NUD_REACHABLE; + + if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr)) + goto nla_put_failure; + + if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid)) + goto nla_put_failure; + + nlmsg_end(dump->skb, nlh); + +skip: + dump->idx++; + return 0; + +nla_put_failure: + nlmsg_cancel(dump->skb, nlh); + return -EMSGSIZE; +} + +/** + * switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries + * + * @skb: netlink skb + * @cb: netlink callback + * @dev: port device + * @filter_dev: filter device + * @idx: + * + * Delete FDB entry from switch device. + */ +int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, + struct net_device *filter_dev, int idx) +{ + struct switchdev_fdb_dump dump = { + .obj = { + .id = SWITCHDEV_OBJ_PORT_FDB, + .cb = switchdev_port_fdb_dump_cb, + }, + .skb = skb, + .cb = cb, + .filter_dev = filter_dev, + .idx = idx, + }; + int err; + + err = switchdev_port_obj_dump(dev, &dump.obj); + if (err) + return err; + + return dump.idx; +} +EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); + static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) { const struct switchdev_ops *ops = dev->switchdev_ops; -- cgit v1.2.3 From de133464c9e70808d3e5a861294bc55940988178 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 15 May 2015 14:47:32 -0700 Subject: netns: make nsid_lock per net The spinlock is used to protect netns_ids which is per net, so there is no need to use a global spinlock. Cc: Nicolas Dichtel Signed-off-by: Cong Wang Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/net_namespace.h | 1 + net/core/net_namespace.c | 32 ++++++++++++++++---------------- 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 3f850acc844e..72eb23723294 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -58,6 +58,7 @@ struct net { struct list_head exit_list; /* Use only net_mutex */ struct user_namespace *user_ns; /* Owning user namespace */ + spinlock_t nsid_lock; struct idr netns_ids; struct ns_common ns; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index d2f42da9479b..2c2eb1b629b1 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -28,7 +28,6 @@ static LIST_HEAD(pernet_list); static struct list_head *first_device = &pernet_list; DEFINE_MUTEX(net_mutex); -static DEFINE_SPINLOCK(nsid_lock); LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); @@ -218,10 +217,10 @@ int peernet2id_alloc(struct net *net, struct net *peer) bool alloc; int id; - spin_lock_irqsave(&nsid_lock, flags); + spin_lock_irqsave(&net->nsid_lock, flags); alloc = atomic_read(&peer->count) == 0 ? false : true; id = __peernet2id_alloc(net, peer, &alloc); - spin_unlock_irqrestore(&nsid_lock, flags); + spin_unlock_irqrestore(&net->nsid_lock, flags); if (alloc && id >= 0) rtnl_net_notifyid(net, RTM_NEWNSID, id); return id; @@ -234,9 +233,9 @@ int peernet2id(struct net *net, struct net *peer) unsigned long flags; int id; - spin_lock_irqsave(&nsid_lock, flags); + spin_lock_irqsave(&net->nsid_lock, flags); id = __peernet2id(net, peer); - spin_unlock_irqrestore(&nsid_lock, flags); + spin_unlock_irqrestore(&net->nsid_lock, flags); return id; } @@ -257,11 +256,11 @@ struct net *get_net_ns_by_id(struct net *net, int id) return NULL; rcu_read_lock(); - spin_lock_irqsave(&nsid_lock, flags); + spin_lock_irqsave(&net->nsid_lock, flags); peer = idr_find(&net->netns_ids, id); if (peer) get_net(peer); - spin_unlock_irqrestore(&nsid_lock, flags); + spin_unlock_irqrestore(&net->nsid_lock, flags); rcu_read_unlock(); return peer; @@ -282,6 +281,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) net->dev_base_seq = 1; net->user_ns = user_ns; idr_init(&net->netns_ids); + spin_lock_init(&net->nsid_lock); list_for_each_entry(ops, &pernet_list, list) { error = ops_init(ops, net); @@ -404,17 +404,17 @@ static void cleanup_net(struct work_struct *work) for_each_net(tmp) { int id; - spin_lock_irq(&nsid_lock); + spin_lock_irq(&tmp->nsid_lock); id = __peernet2id(tmp, net); if (id >= 0) idr_remove(&tmp->netns_ids, id); - spin_unlock_irq(&nsid_lock); + spin_unlock_irq(&tmp->nsid_lock); if (id >= 0) rtnl_net_notifyid(tmp, RTM_DELNSID, id); } - spin_lock_irq(&nsid_lock); + spin_lock_irq(&net->nsid_lock); idr_destroy(&net->netns_ids); - spin_unlock_irq(&nsid_lock); + spin_unlock_irq(&net->nsid_lock); } rtnl_unlock(); @@ -563,15 +563,15 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) if (IS_ERR(peer)) return PTR_ERR(peer); - spin_lock_irqsave(&nsid_lock, flags); + spin_lock_irqsave(&net->nsid_lock, flags); if (__peernet2id(net, peer) >= 0) { - spin_unlock_irqrestore(&nsid_lock, flags); + spin_unlock_irqrestore(&net->nsid_lock, flags); err = -EEXIST; goto out; } err = alloc_netid(net, peer, nsid); - spin_unlock_irqrestore(&nsid_lock, flags); + spin_unlock_irqrestore(&net->nsid_lock, flags); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err); err = 0; @@ -695,9 +695,9 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) }; unsigned long flags; - spin_lock_irqsave(&nsid_lock, flags); + spin_lock_irqsave(&net->nsid_lock, flags); idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); - spin_unlock_irqrestore(&nsid_lock, flags); + spin_unlock_irqrestore(&net->nsid_lock, flags); cb->args[0] = net_cb.idx; return skb->len; -- cgit v1.2.3 From 5cf422808244ca8f1177c72fe6e1ce8322794b57 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Fri, 15 May 2015 14:15:35 -0700 Subject: ipv4: introduce frag_expire_skip_icmp() Improve readability of skip ICMP for de-fragmentation expiration logic. This change will also make the logic easier to maintain when the following patches in this series are applied. Signed-off-by: Andy Zhou Signed-off-by: David S. Miller --- include/net/ip.h | 10 ++++++++++ net/ipv4/ip_fragment.c | 13 +++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 0ed6d768e606..43f6f39df9fc 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -478,6 +478,16 @@ enum ip_defrag_users { IP_DEFRAG_MACVLAN, }; +/* Return true if the value of 'user' is between 'lower_bond' + * and 'upper_bond' inclusively. + */ +static inline bool ip_defrag_user_in_between(u32 user, + enum ip_defrag_users lower_bond, + enum ip_defrag_users upper_bond) +{ + return user >= lower_bond && user <= upper_bond; +} + int ip_defrag(struct sk_buff *skb, u32 user); #ifdef CONFIG_INET struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cc1da6d9cb35..83424f1742b8 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -173,6 +173,13 @@ static void ipq_kill(struct ipq *ipq) inet_frag_kill(&ipq->q, &ip4_frags); } +static bool frag_expire_skip_icmp(u32 user) +{ + return user == IP_DEFRAG_AF_PACKET || + ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_IN, + __IP_DEFRAG_CONNTRACK_IN_END); +} + /* * Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ @@ -217,10 +224,8 @@ static void ip_expire(unsigned long arg) /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ - if (qp->user == IP_DEFRAG_AF_PACKET || - ((qp->user >= IP_DEFRAG_CONNTRACK_IN) && - (qp->user <= __IP_DEFRAG_CONNTRACK_IN_END) && - (skb_rtable(head)->rt_type != RTN_LOCAL))) + if (frag_expire_skip_icmp(qp->user) && + (skb_rtable(head)->rt_type != RTN_LOCAL)) goto out_rcu_unlock; /* Send an ICMP "Fragment Reassembly Timeout" message. */ -- cgit v1.2.3 From 49d16b23cd1e61c028ee088c5a64e9ac6a9c6147 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Fri, 15 May 2015 14:15:37 -0700 Subject: bridge_netfilter: No ICMP packet on IPv4 fragmentation error When bridge netfilter re-fragments an IP packet for output, all packets that can not be re-fragmented to their original input size should be silently discarded. However, current bridge netfilter output path generates an ICMP packet with 'size exceeded MTU' message for such packets, this is a bug. This patch refactors the ip_fragment() API to allow two separate use cases. The bridge netfilter user case will not send ICMP, the routing output will, as before. Signed-off-by: Andy Zhou Acked-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/ip.h | 4 ++-- net/bridge/br_netfilter.c | 21 ++++++++++++++++++++- net/ipv4/ip_output.c | 40 ++++++++++++++++++++++++++++------------ 3 files changed, 50 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 43f6f39df9fc..cd7a6a458bb6 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -108,8 +108,8 @@ int ip_local_deliver(struct sk_buff *skb); int ip_mr_input(struct sk_buff *skb); int ip_output(struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct sock *sk, struct sk_buff *skb); -int ip_fragment(struct sock *sk, struct sk_buff *skb, - int (*output)(struct sock *, struct sk_buff *)); +int ip_do_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)); int ip_do_nat(struct sk_buff *skb); void ip_send_check(struct iphdr *ip); int __ip_local_out(struct sk_buff *skb); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 2b0e8bb49944..1d2eb32d8270 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -853,6 +853,25 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) return br_dev_queue_push_xmit(sk, skb); } +static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)) +{ + unsigned int mtu = ip_skb_dst_mtu(skb); + struct iphdr *iph = ip_hdr(skb); + struct rtable *rt = skb_rtable(skb); + struct net_device *dev = rt->dst.dev; + + if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || + (IPCB(skb)->frag_max_size && + IPCB(skb)->frag_max_size > mtu))) { + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + kfree_skb(skb); + return -EMSGSIZE; + } + + return ip_do_fragment(sk, skb, output); +} + static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { int ret; @@ -886,7 +905,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) skb_copy_from_linear_data_offset(skb, -data->size, data->mac, data->size); - ret = ip_fragment(sk, skb, br_nf_push_frag_xmit); + ret = br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit); } else { nf_bridge_info_free(skb); ret = br_dev_queue_push_xmit(sk, skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 2acc5dc32807..8d91b922fcfe 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -83,6 +83,9 @@ int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; EXPORT_SYMBOL(sysctl_ip_default_ttl); +static int ip_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)); + /* Generate a checksum for an outgoing IP datagram. */ void ip_send_check(struct iphdr *iph) { @@ -478,6 +481,28 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } +static int ip_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)) +{ + struct iphdr *iph = ip_hdr(skb); + unsigned int mtu = ip_skb_dst_mtu(skb); + + if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || + (IPCB(skb)->frag_max_size && + IPCB(skb)->frag_max_size > mtu))) { + struct rtable *rt = skb_rtable(skb); + struct net_device *dev = rt->dst.dev; + + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + kfree_skb(skb); + return -EMSGSIZE; + } + + return ip_do_fragment(sk, skb, output); +} + /* * This IP datagram is too large to be sent in one piece. Break it up into * smaller pieces (each of size equal to IP header plus @@ -485,8 +510,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) * single device frame, and queue such a frame for sending. */ -int ip_fragment(struct sock *sk, struct sk_buff *skb, - int (*output)(struct sock *, struct sk_buff *)) +int ip_do_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)) { struct iphdr *iph; int ptr; @@ -507,15 +532,6 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb, iph = ip_hdr(skb); mtu = ip_skb_dst_mtu(skb); - if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || - (IPCB(skb)->frag_max_size && - IPCB(skb)->frag_max_size > mtu))) { - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); - kfree_skb(skb); - return -EMSGSIZE; - } /* * Setup starting values. @@ -751,7 +767,7 @@ fail: IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); return err; } -EXPORT_SYMBOL(ip_fragment); +EXPORT_SYMBOL(ip_do_fragment); int ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) -- cgit v1.2.3 From 1a19cb680be0d4b06ce9a9d6516b8f45f544d3e8 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:39 +0200 Subject: ieee802154: change transmit power to s32 This patch change the transmit power from s8 to s32. This prepares to store a mbm value instead dbm inside the transmit power variable. The old interface keep the a s8 dbm value, which should be backward compatibility when assign s8 to s32. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/at86rf230.c | 2 +- include/net/cfg802154.h | 2 +- include/net/mac802154.h | 2 +- net/ieee802154/nl802154.c | 6 +++--- net/mac802154/driver-ops.h | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 67d00fbc2e0e..02b6bb72304d 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -1194,7 +1194,7 @@ at86rf230_set_hw_addr_filt(struct ieee802154_hw *hw, } static int -at86rf230_set_txpower(struct ieee802154_hw *hw, s8 db) +at86rf230_set_txpower(struct ieee802154_hw *hw, s32 db) { struct at86rf230_local *lp = hw->priv; diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 6ea16c84293b..47804cddb46f 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -85,7 +85,7 @@ struct wpan_phy { u8 current_channel; u8 current_page; u32 channels_supported[IEEE802154_MAX_PAGE + 1]; - s8 transmit_power; + s32 transmit_power; struct wpan_phy_cca cca; __le64 perm_extended_addr; diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 7df28a4c23f9..400e4e85c53f 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -213,7 +213,7 @@ struct ieee802154_ops { int (*set_hw_addr_filt)(struct ieee802154_hw *hw, struct ieee802154_hw_addr_filt *filt, unsigned long changed); - int (*set_txpower)(struct ieee802154_hw *hw, s8 dbm); + int (*set_txpower)(struct ieee802154_hw *hw, s32 dbm); int (*set_lbt)(struct ieee802154_hw *hw, bool on); int (*set_cca_mode)(struct ieee802154_hw *hw, const struct wpan_phy_cca *cca); diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index d750d9778ba7..f3185c71af10 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -207,7 +207,7 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = { [NL802154_ATTR_PAGE] = { .type = NLA_U8, }, [NL802154_ATTR_CHANNEL] = { .type = NLA_U8, }, - [NL802154_ATTR_TX_POWER] = { .type = NLA_S8, }, + [NL802154_ATTR_TX_POWER] = { .type = NLA_S32, }, [NL802154_ATTR_CCA_MODE] = { .type = NLA_U32, }, [NL802154_ATTR_CCA_OPT] = { .type = NLA_U32, }, @@ -301,8 +301,8 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev, goto nla_put_failure; } - if (nla_put_s8(msg, NL802154_ATTR_TX_POWER, - rdev->wpan_phy.transmit_power)) + if (nla_put_s32(msg, NL802154_ATTR_TX_POWER, + rdev->wpan_phy.transmit_power)) goto nla_put_failure; finish: diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h index a0533357b9ea..57c1bdbfaa91 100644 --- a/net/mac802154/driver-ops.h +++ b/net/mac802154/driver-ops.h @@ -58,7 +58,7 @@ drv_set_channel(struct ieee802154_local *local, u8 page, u8 channel) return local->ops->set_channel(&local->hw, page, channel); } -static inline int drv_set_tx_power(struct ieee802154_local *local, s8 dbm) +static inline int drv_set_tx_power(struct ieee802154_local *local, s32 dbm) { might_sleep(); -- cgit v1.2.3 From e2eb173aaacd1a1bcd255d3e74ffb719e47eeadb Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:40 +0200 Subject: ieee802154: change transmit power to mbm This patch change the handling of transmit power level from dbm to mbm. This prepares to handle floating point transmit power levels values. The old netlink 802.15.4 will convert the dbm value to mbm for handling backward compatibility. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/at86rf230.c | 3 ++- include/net/cfg802154.h | 1 + include/net/mac802154.h | 4 ++-- net/ieee802154/nl-mac.c | 4 ++-- net/mac802154/driver-ops.h | 4 ++-- 5 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 02b6bb72304d..3a303e4a3c07 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -1194,9 +1194,10 @@ at86rf230_set_hw_addr_filt(struct ieee802154_hw *hw, } static int -at86rf230_set_txpower(struct ieee802154_hw *hw, s32 db) +at86rf230_set_txpower(struct ieee802154_hw *hw, s32 mbm) { struct at86rf230_local *lp = hw->priv; + s8 db = mbm / 100; /* typical maximum output is 5dBm with RG_PHY_TX_PWR 0x60, lower five * bits decrease power in 1dB steps. 0x60 represents extra PA gain of diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 47804cddb46f..b5b3f9f43084 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -85,6 +85,7 @@ struct wpan_phy { u8 current_channel; u8 current_page; u32 channels_supported[IEEE802154_MAX_PAGE + 1]; + /* current transmit_power in mBm */ s32 transmit_power; struct wpan_phy_cca cca; diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 400e4e85c53f..e863a8557c0a 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -171,7 +171,7 @@ struct ieee802154_hw { * Returns either zero, or negative errno. * * set_txpower: - * Set radio transmit power in dB. Called with pib_lock held. + * Set radio transmit power in mBm. Called with pib_lock held. * Returns either zero, or negative errno. * * set_lbt @@ -213,7 +213,7 @@ struct ieee802154_ops { int (*set_hw_addr_filt)(struct ieee802154_hw *hw, struct ieee802154_hw_addr_filt *filt, unsigned long changed); - int (*set_txpower)(struct ieee802154_hw *hw, s32 dbm); + int (*set_txpower)(struct ieee802154_hw *hw, s32 mbm); int (*set_lbt)(struct ieee802154_hw *hw, bool on); int (*set_cca_mode)(struct ieee802154_hw *hw, const struct wpan_phy_cca *cca); diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 2b4955d7aae5..4ba2e13f7a07 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -117,7 +117,7 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, rtnl_unlock(); if (nla_put_s8(msg, IEEE802154_ATTR_TXPOWER, - params.transmit_power) || + params.transmit_power / 100) || nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, params.lbt) || nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE, params.cca.mode) || @@ -510,7 +510,7 @@ int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info) ops->get_mac_params(dev, ¶ms); if (info->attrs[IEEE802154_ATTR_TXPOWER]) - params.transmit_power = nla_get_s8(info->attrs[IEEE802154_ATTR_TXPOWER]); + params.transmit_power = nla_get_s8(info->attrs[IEEE802154_ATTR_TXPOWER]) * 100; if (info->attrs[IEEE802154_ATTR_LBT_ENABLED]) params.lbt = nla_get_u8(info->attrs[IEEE802154_ATTR_LBT_ENABLED]); diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h index 57c1bdbfaa91..d289ae3f1e93 100644 --- a/net/mac802154/driver-ops.h +++ b/net/mac802154/driver-ops.h @@ -58,7 +58,7 @@ drv_set_channel(struct ieee802154_local *local, u8 page, u8 channel) return local->ops->set_channel(&local->hw, page, channel); } -static inline int drv_set_tx_power(struct ieee802154_local *local, s32 dbm) +static inline int drv_set_tx_power(struct ieee802154_local *local, s32 mbm) { might_sleep(); @@ -67,7 +67,7 @@ static inline int drv_set_tx_power(struct ieee802154_local *local, s32 dbm) return -EOPNOTSUPP; } - return local->ops->set_txpower(&local->hw, dbm); + return local->ops->set_txpower(&local->hw, mbm); } static inline int drv_set_cca_mode(struct ieee802154_local *local, -- cgit v1.2.3 From 32b23550ad64d9676f2218b3d5de46bacf98ef1d Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:41 +0200 Subject: ieee802154: change cca ed level to mbm This patch change the handling of cca energy detection level from dbm to mbm. This prepares to handle floating point cca energy detection levels values. The old netlink 802.15.4 will convert the dbm value to mbm for handling backward compatibility. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/at86rf230.c | 3 ++- include/net/cfg802154.h | 1 + include/net/mac802154.h | 5 ++--- net/ieee802154/nl-mac.c | 4 ++-- net/mac802154/driver-ops.h | 4 ++-- 5 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 3a303e4a3c07..e68d45ed622e 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -1268,9 +1268,10 @@ at86rf23x_get_desens_steps(struct at86rf230_local *lp, s32 level) } static int -at86rf230_set_cca_ed_level(struct ieee802154_hw *hw, s32 level) +at86rf230_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm) { struct at86rf230_local *lp = hw->priv; + s32 level = mbm / 100; if (level < lp->data->rssi_base_val || level > 30) return -EINVAL; diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index b5b3f9f43084..9ced2c9fdbfc 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -91,6 +91,7 @@ struct wpan_phy { __le64 perm_extended_addr; + /* current cca ed threshold in mBm */ s32 cca_ed_level; /* PHY depended MAC PIB values */ diff --git a/include/net/mac802154.h b/include/net/mac802154.h index e863a8557c0a..71e245605ef8 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -184,7 +184,7 @@ struct ieee802154_hw { * Returns either zero, or negative errno. * * set_cca_ed_level - * Sets the CCA energy detection threshold in dBm. Called with pib_lock + * Sets the CCA energy detection threshold in mBm. Called with pib_lock * held. * Returns either zero, or negative errno. * @@ -217,8 +217,7 @@ struct ieee802154_ops { int (*set_lbt)(struct ieee802154_hw *hw, bool on); int (*set_cca_mode)(struct ieee802154_hw *hw, const struct wpan_phy_cca *cca); - int (*set_cca_ed_level)(struct ieee802154_hw *hw, - s32 level); + int (*set_cca_ed_level)(struct ieee802154_hw *hw, s32 mbm); int (*set_csma_params)(struct ieee802154_hw *hw, u8 min_be, u8 max_be, u8 retries); int (*set_frame_retries)(struct ieee802154_hw *hw, diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 4ba2e13f7a07..cdc1cc3543f6 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -122,7 +122,7 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE, params.cca.mode) || nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL, - params.cca_ed_level) || + params.cca_ed_level / 100) || nla_put_u8(msg, IEEE802154_ATTR_CSMA_RETRIES, params.csma_retries) || nla_put_u8(msg, IEEE802154_ATTR_CSMA_MIN_BE, @@ -519,7 +519,7 @@ int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info) params.cca.mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]); if (info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) - params.cca_ed_level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]); + params.cca_ed_level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) * 100; if (info->attrs[IEEE802154_ATTR_CSMA_RETRIES]) params.csma_retries = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_RETRIES]); diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h index d289ae3f1e93..caecd5f43aa7 100644 --- a/net/mac802154/driver-ops.h +++ b/net/mac802154/driver-ops.h @@ -96,7 +96,7 @@ static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode) } static inline int -drv_set_cca_ed_level(struct ieee802154_local *local, s32 ed_level) +drv_set_cca_ed_level(struct ieee802154_local *local, s32 mbm) { might_sleep(); @@ -105,7 +105,7 @@ drv_set_cca_ed_level(struct ieee802154_local *local, s32 ed_level) return -EOPNOTSUPP; } - return local->ops->set_cca_ed_level(&local->hw, ed_level); + return local->ops->set_cca_ed_level(&local->hw, mbm); } static inline int drv_set_pan_id(struct ieee802154_local *local, __le16 pan_id) -- cgit v1.2.3 From 72f655e44db9c7e835ceba96dc03cbe979d3f80d Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:42 +0200 Subject: ieee802154: introduce wpan_phy_supported This patch introduce the wpan_phy_supported struct for wpan_phy. There is currently no way to check if a transceiver can handle IEEE 802.15.4 complaint values. With this struct we can check before if the transceiver supports these values before sending to driver layer. Signed-off-by: Alexander Aring Suggested-by: Phoebe Buckheister Acked-by: Varka Bhadram Cc: Alan Ott Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/at86rf230.c | 8 ++++---- drivers/net/ieee802154/cc2520.c | 2 +- drivers/net/ieee802154/fakelb.c | 30 +++++++++++++++--------------- drivers/net/ieee802154/mrf24j40.c | 2 +- include/net/cfg802154.h | 6 +++++- net/ieee802154/nl-phy.c | 4 ++-- net/ieee802154/nl802154.c | 4 ++-- 7 files changed, 30 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index e68d45ed622e..151d57f4320d 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -1579,7 +1579,7 @@ at86rf230_detect_device(struct at86rf230_local *lp) case 3: chip = "at86rf231"; lp->data = &at86rf231_data; - lp->hw->phy->channels_supported[0] = 0x7FFF800; + lp->hw->phy->supported.channels[0] = 0x7FFF800; lp->hw->phy->current_channel = 11; lp->hw->phy->symbol_duration = 16; break; @@ -1587,15 +1587,15 @@ at86rf230_detect_device(struct at86rf230_local *lp) chip = "at86rf212"; lp->data = &at86rf212_data; lp->hw->flags |= IEEE802154_HW_LBT; - lp->hw->phy->channels_supported[0] = 0x00007FF; - lp->hw->phy->channels_supported[2] = 0x00007FF; + lp->hw->phy->supported.channels[0] = 0x00007FF; + lp->hw->phy->supported.channels[2] = 0x00007FF; lp->hw->phy->current_channel = 5; lp->hw->phy->symbol_duration = 25; break; case 11: chip = "at86rf233"; lp->data = &at86rf233_data; - lp->hw->phy->channels_supported[0] = 0x7FFF800; + lp->hw->phy->supported.channels[0] = 0x7FFF800; lp->hw->phy->current_channel = 13; lp->hw->phy->symbol_duration = 16; break; diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index f833b8bb6663..84b28a05c5a1 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -653,7 +653,7 @@ static int cc2520_register(struct cc2520_private *priv) ieee802154_random_extended_addr(&priv->hw->phy->perm_extended_addr); /* We do support only 2.4 Ghz */ - priv->hw->phy->channels_supported[0] = 0x7FFF800; + priv->hw->phy->supported.channels[0] = 0x7FFF800; priv->hw->flags = IEEE802154_HW_OMIT_CKSUM | IEEE802154_HW_AACK | IEEE802154_HW_AFILT; diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c index dc2bfb600b4b..91bbf030a443 100644 --- a/drivers/net/ieee802154/fakelb.c +++ b/drivers/net/ieee802154/fakelb.c @@ -149,35 +149,35 @@ static int fakelb_add_one(struct device *dev, struct fakelb_priv *fake) priv->hw = hw; /* 868 MHz BPSK 802.15.4-2003 */ - hw->phy->channels_supported[0] |= 1; + hw->phy->supported.channels[0] |= 1; /* 915 MHz BPSK 802.15.4-2003 */ - hw->phy->channels_supported[0] |= 0x7fe; + hw->phy->supported.channels[0] |= 0x7fe; /* 2.4 GHz O-QPSK 802.15.4-2003 */ - hw->phy->channels_supported[0] |= 0x7FFF800; + hw->phy->supported.channels[0] |= 0x7FFF800; /* 868 MHz ASK 802.15.4-2006 */ - hw->phy->channels_supported[1] |= 1; + hw->phy->supported.channels[1] |= 1; /* 915 MHz ASK 802.15.4-2006 */ - hw->phy->channels_supported[1] |= 0x7fe; + hw->phy->supported.channels[1] |= 0x7fe; /* 868 MHz O-QPSK 802.15.4-2006 */ - hw->phy->channels_supported[2] |= 1; + hw->phy->supported.channels[2] |= 1; /* 915 MHz O-QPSK 802.15.4-2006 */ - hw->phy->channels_supported[2] |= 0x7fe; + hw->phy->supported.channels[2] |= 0x7fe; /* 2.4 GHz CSS 802.15.4a-2007 */ - hw->phy->channels_supported[3] |= 0x3fff; + hw->phy->supported.channels[3] |= 0x3fff; /* UWB Sub-gigahertz 802.15.4a-2007 */ - hw->phy->channels_supported[4] |= 1; + hw->phy->supported.channels[4] |= 1; /* UWB Low band 802.15.4a-2007 */ - hw->phy->channels_supported[4] |= 0x1e; + hw->phy->supported.channels[4] |= 0x1e; /* UWB High band 802.15.4a-2007 */ - hw->phy->channels_supported[4] |= 0xffe0; + hw->phy->supported.channels[4] |= 0xffe0; /* 750 MHz O-QPSK 802.15.4c-2009 */ - hw->phy->channels_supported[5] |= 0xf; + hw->phy->supported.channels[5] |= 0xf; /* 750 MHz MPSK 802.15.4c-2009 */ - hw->phy->channels_supported[5] |= 0xf0; + hw->phy->supported.channels[5] |= 0xf0; /* 950 MHz BPSK 802.15.4d-2009 */ - hw->phy->channels_supported[6] |= 0x3ff; + hw->phy->supported.channels[6] |= 0x3ff; /* 950 MHz GFSK 802.15.4d-2009 */ - hw->phy->channels_supported[6] |= 0x3ffc00; + hw->phy->supported.channels[6] |= 0x3ffc00; INIT_LIST_HEAD(&priv->list); priv->fake = fake; diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c index fba2dfd910f7..f2a1bd122a74 100644 --- a/drivers/net/ieee802154/mrf24j40.c +++ b/drivers/net/ieee802154/mrf24j40.c @@ -750,7 +750,7 @@ static int mrf24j40_probe(struct spi_device *spi) devrec->hw->priv = devrec; devrec->hw->parent = &devrec->spi->dev; - devrec->hw->phy->channels_supported[0] = CHANNEL_MASK; + devrec->hw->phy->supported.channels[0] = CHANNEL_MASK; devrec->hw->flags = IEEE802154_HW_OMIT_CKSUM | IEEE802154_HW_AACK | IEEE802154_HW_AFILT; diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 9ced2c9fdbfc..1941d7a79219 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -61,6 +61,10 @@ struct cfg802154_ops { struct wpan_dev *wpan_dev, bool mode); }; +struct wpan_phy_supported { + u32 channels[IEEE802154_MAX_PAGE + 1]; +}; + struct wpan_phy_cca { enum nl802154_cca_modes mode; enum nl802154_cca_opts opt; @@ -84,7 +88,7 @@ struct wpan_phy { */ u8 current_channel; u8 current_page; - u32 channels_supported[IEEE802154_MAX_PAGE + 1]; + struct wpan_phy_supported supported; /* current transmit_power in mBm */ s32 transmit_power; struct wpan_phy_cca cca; diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 346c6665d25e..cbc0d09351e0 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -56,8 +56,8 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel)) goto nla_put_failure; for (i = 0; i < 32; i++) { - if (phy->channels_supported[i]) - buf[pages++] = phy->channels_supported[i] | (i << 27); + if (phy->supported.channels[i]) + buf[pages++] = phy->supported.channels[i] | (i << 27); } if (pages && nla_put(msg, IEEE802154_ATTR_CHANNEL_PAGE_LIST, diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index f3185c71af10..d271879a2174 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -248,7 +248,7 @@ nl802154_send_wpan_phy_channels(struct cfg802154_registered_device *rdev, for (page = 0; page <= IEEE802154_MAX_PAGE; page++) { if (nla_put_u32(msg, NL802154_ATTR_SUPPORTED_CHANNEL, - rdev->wpan_phy.channels_supported[page])) + rdev->wpan_phy.supported.channels[page])) return -ENOBUFS; } nla_nest_end(msg, nl_page); @@ -626,7 +626,7 @@ static int nl802154_set_channel(struct sk_buff *skb, struct genl_info *info) /* check 802.15.4 constraints */ if (page > IEEE802154_MAX_PAGE || channel > IEEE802154_MAX_CHANNEL || - !(rdev->wpan_phy.channels_supported[page] & BIT(channel))) + !(rdev->wpan_phy.supported.channels[page] & BIT(channel))) return -EINVAL; return rdev_set_channel(rdev, page, channel); -- cgit v1.2.3 From fea3318d20776a94afeea0460c6ee9904e60569e Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:43 +0200 Subject: ieee802154: add several phy supported handling This patch adds support for phy supported handling for all other already existing handling 802.15.4 functionality. We assume now a fully 802.15.4 complaint transceiver at phy allocation. If a transceiver can support 802.15.4 default values only, then the values should be overwirtten by values the transceiver supports. If the transceiver doesn't set the according hardware flags, we assume the 802.15.4 defaults now which cannot be changed. Signed-off-by: Alexander Aring Suggested-by: Phoebe Buckheister Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 26 +++++++++++++++++++++++++- include/net/nl802154.h | 22 ++++++++++++++++++++++ net/ieee802154/nl802154.c | 22 +++++++++++++++++----- net/mac802154/main.c | 26 ++++++++++++++++++++++++++ 4 files changed, 90 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 1941d7a79219..23abd08a310a 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -61,8 +61,32 @@ struct cfg802154_ops { struct wpan_dev *wpan_dev, bool mode); }; +static inline bool +wpan_phy_supported_bool(bool b, enum nl802154_supported_bool_states st) +{ + switch (st) { + case NL802154_SUPPORTED_BOOL_TRUE: + return b; + case NL802154_SUPPORTED_BOOL_FALSE: + return !b; + case NL802154_SUPPORTED_BOOL_BOTH: + return true; + default: + WARN_ON(1); + } + + return false; +} + struct wpan_phy_supported { - u32 channels[IEEE802154_MAX_PAGE + 1]; + u32 channels[IEEE802154_MAX_PAGE + 1], + cca_modes, cca_opts; + enum nl802154_supported_bool_states lbt; + u8 min_minbe, max_minbe, min_maxbe, max_maxbe, + min_csma_backoffs, max_csma_backoffs; + s8 min_frame_retries, max_frame_retries; + size_t tx_powers_size, cca_ed_levels_size; + const s32 *tx_powers, *cca_ed_levels; }; struct wpan_phy_cca { diff --git a/include/net/nl802154.h b/include/net/nl802154.h index f8b5bc997959..055277156eef 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -162,4 +162,26 @@ enum nl802154_cca_opts { NL802154_CCA_OPT_ATTR_MAX = __NL802154_CCA_OPT_ATTR_AFTER_LAST - 1 }; +/** + * enum nl802154_supported_bool_states - bool states for bool capability entry + * + * @NL802154_SUPPORTED_BOOL_FALSE: indicates to set false + * @NL802154_SUPPORTED_BOOL_TRUE: indicates to set true + * @__NL802154_SUPPORTED_BOOL_INVALD: reserved + * @NL802154_SUPPORTED_BOOL_BOTH: indicates to set true and false + * @__NL802154_SUPPORTED_BOOL_AFTER_LAST: Internal + * @NL802154_SUPPORTED_BOOL_MAX: highest value for bool states + */ +enum nl802154_supported_bool_states { + NL802154_SUPPORTED_BOOL_FALSE, + NL802154_SUPPORTED_BOOL_TRUE, + /* to handle them in a mask */ + __NL802154_SUPPORTED_BOOL_INVALD, + NL802154_SUPPORTED_BOOL_BOTH, + + /* keep last */ + __NL802154_SUPPORTED_BOOL_AFTER_LAST, + NL802154_SUPPORTED_BOOL_MAX = __NL802154_SUPPORTED_BOOL_AFTER_LAST - 1 +}; + #endif /* __NL802154_H */ diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index d271879a2174..fa0c4048b0e8 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -642,7 +642,9 @@ static int nl802154_set_cca_mode(struct sk_buff *skb, struct genl_info *info) cca.mode = nla_get_u32(info->attrs[NL802154_ATTR_CCA_MODE]); /* checking 802.15.4 constraints */ - if (cca.mode < NL802154_CCA_ENERGY || cca.mode > NL802154_CCA_ATTR_MAX) + if (cca.mode < NL802154_CCA_ENERGY || + cca.mode > NL802154_CCA_ATTR_MAX || + !(rdev->wpan_phy.supported.cca_modes & BIT(cca.mode))) return -EINVAL; if (cca.mode == NL802154_CCA_ENERGY_CARRIER) { @@ -650,7 +652,8 @@ static int nl802154_set_cca_mode(struct sk_buff *skb, struct genl_info *info) return -EINVAL; cca.opt = nla_get_u32(info->attrs[NL802154_ATTR_CCA_OPT]); - if (cca.opt > NL802154_CCA_OPT_ATTR_MAX) + if (cca.opt > NL802154_CCA_OPT_ATTR_MAX || + !(rdev->wpan_phy.supported.cca_opts & BIT(cca.opt))) return -EINVAL; } @@ -744,7 +747,11 @@ nl802154_set_backoff_exponent(struct sk_buff *skb, struct genl_info *info) max_be = nla_get_u8(info->attrs[NL802154_ATTR_MAX_BE]); /* check 802.15.4 constraints */ - if (max_be < 3 || max_be > 8 || min_be > max_be) + if (min_be < rdev->wpan_phy.supported.min_minbe || + min_be > rdev->wpan_phy.supported.max_minbe || + max_be < rdev->wpan_phy.supported.min_maxbe || + max_be > rdev->wpan_phy.supported.max_maxbe || + min_be > max_be) return -EINVAL; return rdev_set_backoff_exponent(rdev, wpan_dev, min_be, max_be); @@ -769,7 +776,8 @@ nl802154_set_max_csma_backoffs(struct sk_buff *skb, struct genl_info *info) info->attrs[NL802154_ATTR_MAX_CSMA_BACKOFFS]); /* check 802.15.4 constraints */ - if (max_csma_backoffs > 5) + if (max_csma_backoffs < rdev->wpan_phy.supported.min_csma_backoffs || + max_csma_backoffs > rdev->wpan_phy.supported.max_csma_backoffs) return -EINVAL; return rdev_set_max_csma_backoffs(rdev, wpan_dev, max_csma_backoffs); @@ -793,7 +801,8 @@ nl802154_set_max_frame_retries(struct sk_buff *skb, struct genl_info *info) info->attrs[NL802154_ATTR_MAX_FRAME_RETRIES]); /* check 802.15.4 constraints */ - if (max_frame_retries < -1 || max_frame_retries > 7) + if (max_frame_retries < rdev->wpan_phy.supported.min_frame_retries || + max_frame_retries > rdev->wpan_phy.supported.max_frame_retries) return -EINVAL; return rdev_set_max_frame_retries(rdev, wpan_dev, max_frame_retries); @@ -813,6 +822,9 @@ static int nl802154_set_lbt_mode(struct sk_buff *skb, struct genl_info *info) return -EINVAL; mode = !!nla_get_u8(info->attrs[NL802154_ATTR_LBT_MODE]); + if (!wpan_phy_supported_bool(mode, rdev->wpan_phy.supported.lbt)) + return -EINVAL; + return rdev_set_lbt_mode(rdev, wpan_dev, mode); } diff --git a/net/mac802154/main.c b/net/mac802154/main.c index 08cb32dc8fd3..ddcd6ff8d39c 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -107,6 +107,15 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops) skb_queue_head_init(&local->skb_queue); + /* init supported flags with 802.15.4 default ranges */ + phy->supported.max_minbe = 8; + phy->supported.min_maxbe = 3; + phy->supported.max_maxbe = 8; + phy->supported.min_frame_retries = -1; + phy->supported.max_frame_retries = 7; + phy->supported.max_csma_backoffs = 5; + phy->supported.lbt = NL802154_SUPPORTED_BOOL_FALSE; + return &local->hw; } EXPORT_SYMBOL(ieee802154_alloc_hw); @@ -155,6 +164,23 @@ int ieee802154_register_hw(struct ieee802154_hw *hw) ieee802154_setup_wpan_phy_pib(local->phy); + if (!(hw->flags & IEEE802154_HW_CSMA_PARAMS)) { + local->phy->supported.min_csma_backoffs = 4; + local->phy->supported.max_csma_backoffs = 4; + local->phy->supported.min_maxbe = 5; + local->phy->supported.max_maxbe = 5; + local->phy->supported.min_minbe = 3; + local->phy->supported.max_minbe = 3; + } + + if (!(hw->flags & IEEE802154_HW_FRAME_RETRIES)) { + /* TODO should be 3, but our default value is -1 which means + * no ARET handling. + */ + local->phy->supported.min_frame_retries = -1; + local->phy->supported.max_frame_retries = -1; + } + rc = wpan_phy_register(local->phy); if (rc < 0) goto out_wq; -- cgit v1.2.3 From 791021bf13ec9d0fc14bfd8c9c4b368ace568239 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:44 +0200 Subject: mac802154: check for really changes This patch adds check if the value is really changed inside pib/mib. If a transceiver do support only one value for e.g. max_be then this will also handle that the driver layer doesn't need to care about handling to set one value only. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 12 ++++++++++++ net/mac802154/cfg.c | 26 ++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 23abd08a310a..37abc1603285 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -94,6 +94,18 @@ struct wpan_phy_cca { enum nl802154_cca_opts opt; }; +static inline bool +wpan_phy_cca_cmp(const struct wpan_phy_cca *a, const struct wpan_phy_cca *b) +{ + if (a->mode != b->mode) + return false; + + if (a->mode == NL802154_CCA_ENERGY_CARRIER) + return a->opt == b->opt; + + return true; +} + struct wpan_phy { struct mutex pib_lock; diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c index c63601582c71..45c4dc39766e 100644 --- a/net/mac802154/cfg.c +++ b/net/mac802154/cfg.c @@ -73,6 +73,10 @@ ieee802154_set_channel(struct wpan_phy *wpan_phy, u8 page, u8 channel) ASSERT_RTNL(); + if (wpan_phy->current_page == page && + wpan_phy->current_channel == channel) + return 0; + ret = drv_set_channel(local, page, channel); if (!ret) { wpan_phy->current_page = page; @@ -91,6 +95,9 @@ ieee802154_set_cca_mode(struct wpan_phy *wpan_phy, ASSERT_RTNL(); + if (wpan_phy_cca_cmp(&wpan_phy->cca, cca)) + return 0; + /* check if phy support this setting */ if (!(local->hw.flags & IEEE802154_HW_CCA_MODE)) return -EOPNOTSUPP; @@ -108,6 +115,9 @@ ieee802154_set_pan_id(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, { ASSERT_RTNL(); + if (wpan_dev->pan_id == pan_id) + return 0; + wpan_dev->pan_id = pan_id; return 0; } @@ -121,6 +131,10 @@ ieee802154_set_backoff_exponent(struct wpan_phy *wpan_phy, ASSERT_RTNL(); + if (wpan_dev->min_be == min_be && + wpan_dev->max_be == max_be) + return 0; + if (!(local->hw.flags & IEEE802154_HW_CSMA_PARAMS)) return -EOPNOTSUPP; @@ -135,6 +149,9 @@ ieee802154_set_short_addr(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, { ASSERT_RTNL(); + if (wpan_dev->short_addr == short_addr) + return 0; + wpan_dev->short_addr = short_addr; return 0; } @@ -148,6 +165,9 @@ ieee802154_set_max_csma_backoffs(struct wpan_phy *wpan_phy, ASSERT_RTNL(); + if (wpan_dev->csma_retries == max_csma_backoffs) + return 0; + if (!(local->hw.flags & IEEE802154_HW_CSMA_PARAMS)) return -EOPNOTSUPP; @@ -164,6 +184,9 @@ ieee802154_set_max_frame_retries(struct wpan_phy *wpan_phy, ASSERT_RTNL(); + if (wpan_dev->frame_retries == max_frame_retries) + return 0; + if (!(local->hw.flags & IEEE802154_HW_FRAME_RETRIES)) return -EOPNOTSUPP; @@ -179,6 +202,9 @@ ieee802154_set_lbt_mode(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, ASSERT_RTNL(); + if (wpan_dev->lbt == mode) + return 0; + if (!(local->hw.flags & IEEE802154_HW_LBT)) return -EOPNOTSUPP; -- cgit v1.2.3 From edea8f7c75ec6c238130bd7e74d9f6f4c26e97b0 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:46 +0200 Subject: cfg802154: introduce wpan phy flags This patch introduce a flag property for the wpan phy structure. The current flag settings in ieee802154_hw are accessable in mac802154 layer only which is okay for flags which indicates MAC handling which are done by phy. For real PHY layer settings like cca mode, transmit power, cca energy detection level. The difference between these flags are that the MAC handling flags are only handled in mac802154/HardMac layer e.g. on an interface up. The phy settings are direct netlink calls from nl802154 into the driver layer and the nl802154 need to have a chance to check if the driver supports this handling before sending to the next layer. We also check now on PHY flags while dumping and setting pib attributes. In comparing with MIB attributes the 802.15.4 gives us an default value which we assume when a transceiver implement less functionality. In case of MIB settings the nl802154 layer doesn't need to check on the ieee802154_hw flags then. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/at86rf230.c | 9 +++++++-- include/net/cfg802154.h | 16 ++++++++++++++++ include/net/mac802154.h | 29 +++++++---------------------- net/ieee802154/nl802154.c | 27 +++++++++++++++++---------- net/mac802154/mac_cmd.c | 6 +++--- 5 files changed, 50 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 151d57f4320d..b0ffd1cd0010 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -1566,8 +1566,13 @@ at86rf230_detect_device(struct at86rf230_local *lp) } lp->hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AACK | - IEEE802154_HW_TXPOWER | IEEE802154_HW_ARET | - IEEE802154_HW_AFILT | IEEE802154_HW_PROMISCUOUS; + IEEE802154_HW_CSMA_PARAMS | + IEEE802154_HW_FRAME_RETRIES | IEEE802154_HW_AFILT | + IEEE802154_HW_PROMISCUOUS; + + lp->hw->phy->flags = WPAN_PHY_FLAG_TXPOWER | + WPAN_PHY_FLAG_CCA_ED_LEVEL | + WPAN_PHY_FLAG_CCA_MODE; lp->hw->phy->cca.mode = NL802154_CCA_ENERGY; diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 37abc1603285..a12c6c5247e9 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -106,6 +106,20 @@ wpan_phy_cca_cmp(const struct wpan_phy_cca *a, const struct wpan_phy_cca *b) return true; } +/** + * @WPAN_PHY_FLAG_TRANSMIT_POWER: Indicates that transceiver will support + * transmit power setting. + * @WPAN_PHY_FLAG_CCA_ED_LEVEL: Indicates that transceiver will support cca ed + * level setting. + * @WPAN_PHY_FLAG_CCA_MODE: Indicates that transceiver will support cca mode + * setting. + */ +enum wpan_phy_flags { + WPAN_PHY_FLAG_TXPOWER = BIT(1), + WPAN_PHY_FLAG_CCA_ED_LEVEL = BIT(2), + WPAN_PHY_FLAG_CCA_MODE = BIT(3), +}; + struct wpan_phy { struct mutex pib_lock; @@ -117,6 +131,8 @@ struct wpan_phy { */ const void *privid; + u32 flags; + /* * This is a PIB according to 802.15.4-2011. * We do not provide timing-related variables, as they diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 71e245605ef8..9605c7f7453f 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -89,41 +89,26 @@ struct ieee802154_hw { #define IEEE802154_HW_TX_OMIT_CKSUM 0x00000001 /* Indicates that receiver will autorespond with ACK frames. */ #define IEEE802154_HW_AACK 0x00000002 -/* Indicates that transceiver will support transmit power setting. */ -#define IEEE802154_HW_TXPOWER 0x00000004 /* Indicates that transceiver will support listen before transmit. */ -#define IEEE802154_HW_LBT 0x00000008 -/* Indicates that transceiver will support cca mode setting. */ -#define IEEE802154_HW_CCA_MODE 0x00000010 -/* Indicates that transceiver will support cca ed level setting. */ -#define IEEE802154_HW_CCA_ED_LEVEL 0x00000020 +#define IEEE802154_HW_LBT 0x00000004 /* Indicates that transceiver will support csma (max_be, min_be, csma retries) * settings. */ -#define IEEE802154_HW_CSMA_PARAMS 0x00000040 +#define IEEE802154_HW_CSMA_PARAMS 0x00000008 /* Indicates that transceiver will support ARET frame retries setting. */ -#define IEEE802154_HW_FRAME_RETRIES 0x00000080 +#define IEEE802154_HW_FRAME_RETRIES 0x00000010 /* Indicates that transceiver will support hardware address filter setting. */ -#define IEEE802154_HW_AFILT 0x00000100 +#define IEEE802154_HW_AFILT 0x00000020 /* Indicates that transceiver will support promiscuous mode setting. */ -#define IEEE802154_HW_PROMISCUOUS 0x00000200 +#define IEEE802154_HW_PROMISCUOUS 0x00000040 /* Indicates that receiver omits FCS. */ -#define IEEE802154_HW_RX_OMIT_CKSUM 0x00000400 +#define IEEE802154_HW_RX_OMIT_CKSUM 0x00000080 /* Indicates that receiver will not filter frames with bad checksum. */ -#define IEEE802154_HW_RX_DROP_BAD_CKSUM 0x00000800 +#define IEEE802154_HW_RX_DROP_BAD_CKSUM 0x00000100 /* Indicates that receiver omits FCS and xmitter will add FCS on it's own. */ #define IEEE802154_HW_OMIT_CKSUM (IEEE802154_HW_TX_OMIT_CKSUM | \ IEEE802154_HW_RX_OMIT_CKSUM) -/* This groups the most common CSMA support fields into one. */ -#define IEEE802154_HW_CSMA (IEEE802154_HW_CCA_MODE | \ - IEEE802154_HW_CCA_ED_LEVEL | \ - IEEE802154_HW_CSMA_PARAMS) - -/* This groups the most common ARET support fields into one. */ -#define IEEE802154_HW_ARET (IEEE802154_HW_CSMA | \ - IEEE802154_HW_FRAME_RETRIES) - /* struct ieee802154_ops - callbacks from mac802154 to the driver * * This structure contains various callbacks that the driver may diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index fa0c4048b0e8..40fb0be009b2 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -291,19 +291,23 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev, goto nla_put_failure; /* cca mode */ - if (nla_put_u32(msg, NL802154_ATTR_CCA_MODE, - rdev->wpan_phy.cca.mode)) - goto nla_put_failure; - - if (rdev->wpan_phy.cca.mode == NL802154_CCA_ENERGY_CARRIER) { - if (nla_put_u32(msg, NL802154_ATTR_CCA_OPT, - rdev->wpan_phy.cca.opt)) + if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_MODE) { + if (nla_put_u32(msg, NL802154_ATTR_CCA_MODE, + rdev->wpan_phy.cca.mode)) goto nla_put_failure; + + if (rdev->wpan_phy.cca.mode == NL802154_CCA_ENERGY_CARRIER) { + if (nla_put_u32(msg, NL802154_ATTR_CCA_OPT, + rdev->wpan_phy.cca.opt)) + goto nla_put_failure; + } } - if (nla_put_s32(msg, NL802154_ATTR_TX_POWER, - rdev->wpan_phy.transmit_power)) - goto nla_put_failure; + if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER) { + if (nla_put_s32(msg, NL802154_ATTR_TX_POWER, + rdev->wpan_phy.transmit_power)) + goto nla_put_failure; + } finish: genlmsg_end(msg, hdr); @@ -637,6 +641,9 @@ static int nl802154_set_cca_mode(struct sk_buff *skb, struct genl_info *info) struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct wpan_phy_cca cca; + if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_MODE) + return -EOPNOTSUPP; + if (!info->attrs[NL802154_ATTR_CCA_MODE]) return -EINVAL; diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index bdccb4ecd30f..6dcbb3b5994c 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -91,19 +91,19 @@ static int mac802154_set_mac_params(struct net_device *dev, wpan_dev->frame_retries = params->frame_retries; wpan_dev->lbt = params->lbt; - if (local->hw.flags & IEEE802154_HW_TXPOWER) { + if (local->hw.phy->flags & WPAN_PHY_FLAG_TXPOWER) { ret = drv_set_tx_power(local, params->transmit_power); if (ret < 0) return ret; } - if (local->hw.flags & IEEE802154_HW_CCA_MODE) { + if (local->hw.phy->flags & WPAN_PHY_FLAG_CCA_MODE) { ret = drv_set_cca_mode(local, ¶ms->cca); if (ret < 0) return ret; } - if (local->hw.flags & IEEE802154_HW_CCA_ED_LEVEL) { + if (local->hw.phy->flags & WPAN_PHY_FLAG_CCA_ED_LEVEL) { ret = drv_set_cca_ed_level(local, params->cca_ed_level); if (ret < 0) return ret; -- cgit v1.2.3 From 65318680c97cca15e3678148b3a5acaa33e991ec Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:47 +0200 Subject: ieee802154: add iftypes capability This patch adds capability flags for supported interface types. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 2 +- net/ieee802154/nl802154.c | 3 ++- net/mac802154/main.c | 6 ++++++ 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index a12c6c5247e9..11bbf17ad865 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -80,7 +80,7 @@ wpan_phy_supported_bool(bool b, enum nl802154_supported_bool_states st) struct wpan_phy_supported { u32 channels[IEEE802154_MAX_PAGE + 1], - cca_modes, cca_opts; + cca_modes, cca_opts, iftypes; enum nl802154_supported_bool_states lbt; u8 min_minbe, max_minbe, min_maxbe, max_maxbe, min_csma_backoffs, max_csma_backoffs; diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 40fb0be009b2..3afb20e43ff8 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -579,7 +579,8 @@ static int nl802154_new_interface(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL802154_ATTR_IFTYPE]) { type = nla_get_u32(info->attrs[NL802154_ATTR_IFTYPE]); - if (type > NL802154_IFTYPE_MAX) + if (type > NL802154_IFTYPE_MAX || + !(rdev->wpan_phy.supported.iftypes & BIT(type))) return -EINVAL; } diff --git a/net/mac802154/main.c b/net/mac802154/main.c index ddcd6ff8d39c..356b346e1ee8 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -116,6 +116,9 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops) phy->supported.max_csma_backoffs = 5; phy->supported.lbt = NL802154_SUPPORTED_BOOL_FALSE; + /* always supported */ + phy->supported.iftypes = BIT(NL802154_IFTYPE_NODE); + return &local->hw; } EXPORT_SYMBOL(ieee802154_alloc_hw); @@ -181,6 +184,9 @@ int ieee802154_register_hw(struct ieee802154_hw *hw) local->phy->supported.max_frame_retries = -1; } + if (hw->flags & IEEE802154_HW_PROMISCUOUS) + local->phy->supported.iftypes |= BIT(NL802154_IFTYPE_MONITOR); + rc = wpan_phy_register(local->phy); if (rc < 0) goto out_wq; -- cgit v1.2.3 From 0e66545701014814360b08a7a43ca652f82b6e5a Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 17 May 2015 21:44:53 +0200 Subject: nl802154: add support for dump phy capabilities This patch add support to nl802154 to dump all phy capabilities which is inside the wpan_phy_supported struct. Also we introduce a new method to dumping supported channels. The new method will offer a easier interface and has lesser netlink traffic. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/nl802154.h | 57 ++++++++++++++++++++++ net/ieee802154/nl802154.c | 117 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 173 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/nl802154.h b/include/net/nl802154.h index 055277156eef..0badebd1de7f 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -100,6 +100,8 @@ enum nl802154_attrs { NL802154_ATTR_EXTENDED_ADDR, + NL802154_ATTR_WPAN_PHY_CAPS, + /* add attributes here, update the policy in nl802154.c */ __NL802154_ATTR_AFTER_LAST, @@ -119,6 +121,61 @@ enum nl802154_iftype { NL802154_IFTYPE_MAX = NUM_NL802154_IFTYPES - 1 }; +/** + * enum nl802154_wpan_phy_capability_attr - wpan phy capability attributes + * + * @__NL802154_CAP_ATTR_INVALID: attribute number 0 is reserved + * @NL802154_CAP_ATTR_CHANNELS: a nested attribute for nl802154_channel_attr + * @NL802154_CAP_ATTR_TX_POWERS: a nested attribute for + * nl802154_wpan_phy_tx_power + * @NL802154_CAP_ATTR_MIN_CCA_ED_LEVEL: minimum value for cca_ed_level + * @NL802154_CAP_ATTR_MAX_CCA_ED_LEVEL: maxmimum value for cca_ed_level + * @NL802154_CAP_ATTR_CCA_MODES: nl802154_cca_modes flags + * @NL802154_CAP_ATTR_CCA_OPTS: nl802154_cca_opts flags + * @NL802154_CAP_ATTR_MIN_MINBE: minimum of minbe value + * @NL802154_CAP_ATTR_MAX_MINBE: maximum of minbe value + * @NL802154_CAP_ATTR_MIN_MAXBE: minimum of maxbe value + * @NL802154_CAP_ATTR_MAX_MINBE: maximum of maxbe value + * @NL802154_CAP_ATTR_MIN_CSMA_BACKOFFS: minimum of csma backoff value + * @NL802154_CAP_ATTR_MAX_CSMA_BACKOFFS: maximum of csma backoffs value + * @NL802154_CAP_ATTR_MIN_FRAME_RETRIES: minimum of frame retries value + * @NL802154_CAP_ATTR_MAX_FRAME_RETRIES: maximum of frame retries value + * @NL802154_CAP_ATTR_IFTYPES: nl802154_iftype flags + * @NL802154_CAP_ATTR_LBT: nl802154_supported_bool_states flags + * @NL802154_CAP_ATTR_MAX: highest cap attribute currently defined + * @__NL802154_CAP_ATTR_AFTER_LAST: internal use + */ +enum nl802154_wpan_phy_capability_attr { + __NL802154_CAP_ATTR_INVALID, + + NL802154_CAP_ATTR_IFTYPES, + + NL802154_CAP_ATTR_CHANNELS, + NL802154_CAP_ATTR_TX_POWERS, + + NL802154_CAP_ATTR_CCA_ED_LEVELS, + NL802154_CAP_ATTR_CCA_MODES, + NL802154_CAP_ATTR_CCA_OPTS, + + NL802154_CAP_ATTR_MIN_MINBE, + NL802154_CAP_ATTR_MAX_MINBE, + + NL802154_CAP_ATTR_MIN_MAXBE, + NL802154_CAP_ATTR_MAX_MAXBE, + + NL802154_CAP_ATTR_MIN_CSMA_BACKOFFS, + NL802154_CAP_ATTR_MAX_CSMA_BACKOFFS, + + NL802154_CAP_ATTR_MIN_FRAME_RETRIES, + NL802154_CAP_ATTR_MAX_FRAME_RETRIES, + + NL802154_CAP_ATTR_LBT, + + /* keep last */ + __NL802154_CAP_ATTR_AFTER_LAST, + NL802154_CAP_ATTR_MAX = __NL802154_CAP_ATTR_AFTER_LAST - 1 +}; + /** * enum nl802154_cca_modes - cca modes * diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 3afb20e43ff8..54f4959fc9bb 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -225,6 +225,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = { [NL802154_ATTR_MAX_FRAME_RETRIES] = { .type = NLA_S8, }, [NL802154_ATTR_LBT_MODE] = { .type = NLA_U8, }, + + [NL802154_ATTR_WPAN_PHY_CAPS] = { .type = NLA_NESTED }, }; /* message building helper */ @@ -235,6 +237,28 @@ static inline void *nl802154hdr_put(struct sk_buff *skb, u32 portid, u32 seq, return genlmsg_put(skb, portid, seq, &nl802154_fam, flags, cmd); } +static int +nl802154_put_flags(struct sk_buff *msg, int attr, u32 mask) +{ + struct nlattr *nl_flags = nla_nest_start(msg, attr); + int i; + + if (!nl_flags) + return -ENOBUFS; + + i = 0; + while (mask) { + if ((mask & 1) && nla_put_flag(msg, i)) + return -ENOBUFS; + + mask >>= 1; + i++; + } + + nla_nest_end(msg, nl_flags); + return 0; +} + static int nl802154_send_wpan_phy_channels(struct cfg802154_registered_device *rdev, struct sk_buff *msg) @@ -256,6 +280,92 @@ nl802154_send_wpan_phy_channels(struct cfg802154_registered_device *rdev, return 0; } +static int +nl802154_put_capabilities(struct sk_buff *msg, + struct cfg802154_registered_device *rdev) +{ + const struct wpan_phy_supported *caps = &rdev->wpan_phy.supported; + struct nlattr *nl_caps, *nl_channels; + int i; + + nl_caps = nla_nest_start(msg, NL802154_ATTR_WPAN_PHY_CAPS); + if (!nl_caps) + return -ENOBUFS; + + nl_channels = nla_nest_start(msg, NL802154_CAP_ATTR_CHANNELS); + if (!nl_channels) + return -ENOBUFS; + + for (i = 0; i <= IEEE802154_MAX_PAGE; i++) { + if (caps->channels[i]) { + if (nl802154_put_flags(msg, i, caps->channels[i])) + return -ENOBUFS; + } + } + + nla_nest_end(msg, nl_channels); + + if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL) { + struct nlattr *nl_ed_lvls; + + nl_ed_lvls = nla_nest_start(msg, + NL802154_CAP_ATTR_CCA_ED_LEVELS); + if (!nl_ed_lvls) + return -ENOBUFS; + + for (i = 0; i < caps->cca_ed_levels_size; i++) { + if (nla_put_s32(msg, i, caps->cca_ed_levels[i])) + return -ENOBUFS; + } + + nla_nest_end(msg, nl_ed_lvls); + } + + if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER) { + struct nlattr *nl_tx_pwrs; + + nl_tx_pwrs = nla_nest_start(msg, NL802154_CAP_ATTR_TX_POWERS); + if (!nl_tx_pwrs) + return -ENOBUFS; + + for (i = 0; i < caps->tx_powers_size; i++) { + if (nla_put_s32(msg, i, caps->tx_powers[i])) + return -ENOBUFS; + } + + nla_nest_end(msg, nl_tx_pwrs); + } + + if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_MODE) { + if (nl802154_put_flags(msg, NL802154_CAP_ATTR_CCA_MODES, + caps->cca_modes) || + nl802154_put_flags(msg, NL802154_CAP_ATTR_CCA_OPTS, + caps->cca_opts)) + return -ENOBUFS; + } + + if (nla_put_u8(msg, NL802154_CAP_ATTR_MIN_MINBE, caps->min_minbe) || + nla_put_u8(msg, NL802154_CAP_ATTR_MAX_MINBE, caps->max_minbe) || + nla_put_u8(msg, NL802154_CAP_ATTR_MIN_MAXBE, caps->min_maxbe) || + nla_put_u8(msg, NL802154_CAP_ATTR_MAX_MAXBE, caps->max_maxbe) || + nla_put_u8(msg, NL802154_CAP_ATTR_MIN_CSMA_BACKOFFS, + caps->min_csma_backoffs) || + nla_put_u8(msg, NL802154_CAP_ATTR_MAX_CSMA_BACKOFFS, + caps->max_csma_backoffs) || + nla_put_s8(msg, NL802154_CAP_ATTR_MIN_FRAME_RETRIES, + caps->min_frame_retries) || + nla_put_s8(msg, NL802154_CAP_ATTR_MAX_FRAME_RETRIES, + caps->max_frame_retries) || + nl802154_put_flags(msg, NL802154_CAP_ATTR_IFTYPES, + caps->iftypes) || + nla_put_u32(msg, NL802154_CAP_ATTR_LBT, caps->lbt)) + return -ENOBUFS; + + nla_nest_end(msg, nl_caps); + + return 0; +} + static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev, enum nl802154_commands cmd, struct sk_buff *msg, u32 portid, u32 seq, @@ -286,7 +396,9 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev, rdev->wpan_phy.current_channel)) goto nla_put_failure; - /* supported channels array */ + /* TODO remove this behaviour, we still keep support it for a while + * so users can change the behaviour to the new one. + */ if (nl802154_send_wpan_phy_channels(rdev, msg)) goto nla_put_failure; @@ -309,6 +421,9 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev, goto nla_put_failure; } + if (nl802154_put_capabilities(msg, rdev)) + goto nla_put_failure; + finish: genlmsg_end(msg, hdr); return 0; -- cgit v1.2.3 From c9a70d43461d83818825ae065bb8fc887421e150 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 18 May 2015 16:31:47 +0530 Subject: net-next: ethtool: Added port speed macros. Signed-off-by: Parav Pandit Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 2e49fc880d29..ae832b45b44c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1264,15 +1264,19 @@ enum ethtool_sfeatures_retval_bits { * it was forced up into this mode or autonegotiated. */ -/* The forced speed, 10Mb, 100Mb, gigabit, [2.5|10|20|40|56]GbE. */ +/* The forced speed, 10Mb, 100Mb, gigabit, [2.5|5|10|20|25|40|50|56|100]GbE. */ #define SPEED_10 10 #define SPEED_100 100 #define SPEED_1000 1000 #define SPEED_2500 2500 +#define SPEED_5000 5000 #define SPEED_10000 10000 #define SPEED_20000 20000 +#define SPEED_25000 25000 #define SPEED_40000 40000 +#define SPEED_50000 50000 #define SPEED_56000 56000 +#define SPEED_100000 100000 #define SPEED_UNKNOWN -1 -- cgit v1.2.3 From 492135557dc090a1abb2cfbe1a412757e3ed68ab Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 19 May 2015 21:04:22 +0200 Subject: tcp: add rfc3168, section 6.1.1.1. fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This work as a follow-up of commit f7b3bec6f516 ("net: allow setting ecn via routing table") and adds RFC3168 section 6.1.1.1. fallback for outgoing ECN connections. In other words, this work adds a retry with a non-ECN setup SYN packet, as suggested from the RFC on the first timeout: [...] A host that receives no reply to an ECN-setup SYN within the normal SYN retransmission timeout interval MAY resend the SYN and any subsequent SYN retransmissions with CWR and ECE cleared. [...] Schematic client-side view when assuming the server is in tcp_ecn=2 mode, that is, Linux default since 2009 via commit 255cac91c3c9 ("tcp: extend ECN sysctl to allow server-side only ECN"): 1) Normal ECN-capable path: SYN ECE CWR -----> <----- SYN ACK ECE ACK -----> 2) Path with broken middlebox, when client has fallback: SYN ECE CWR ----X crappy middlebox drops packet (timeout, rtx) SYN -----> <----- SYN ACK ACK -----> In case we would not have the fallback implemented, the middlebox drop point would basically end up as: SYN ECE CWR ----X crappy middlebox drops packet (timeout, rtx) SYN ECE CWR ----X crappy middlebox drops packet (timeout, rtx) SYN ECE CWR ----X crappy middlebox drops packet (timeout, rtx) In any case, it's rather a smaller percentage of sites where there would occur such additional setup latency: it was found in end of 2014 that ~56% of IPv4 and 65% of IPv6 servers of Alexa 1 million list would negotiate ECN (aka tcp_ecn=2 default), 0.42% of these webservers will fail to connect when trying to negotiate with ECN (tcp_ecn=1) due to timeouts, which the fallback would mitigate with a slight latency trade-off. Recent related paper on this topic: Brian Trammell, Mirja Kühlewind, Damiano Boppart, Iain Learmonth, Gorry Fairhurst, and Richard Scheffenegger: "Enabling Internet-Wide Deployment of Explicit Congestion Notification." Proc. PAM 2015, New York. http://ecn.ethz.ch/ecn-pam15.pdf Thus, when net.ipv4.tcp_ecn=1 is being set, the patch will perform RFC3168, section 6.1.1.1. fallback on timeout. For users explicitly not wanting this which can be in DC use case, we add a net.ipv4.tcp_ecn_fallback knob that allows for disabling the fallback. tp->ecn_flags are not being cleared in tcp_ecn_clear_syn() on output, but rather we let tcp_ecn_rcv_synack() take that over on input path in case a SYN ACK ECE was delayed. Thus a spurious SYN retransmission will not prevent ECN being negotiated eventually in that case. Reference: https://www.ietf.org/proceedings/92/slides/slides-92-iccrg-1.pdf Reference: https://www.ietf.org/proceedings/89/slides/slides-89-tsvarea-1.pdf Signed-off-by: Daniel Borkmann Signed-off-by: Florian Westphal Signed-off-by: Mirja Kühlewind Signed-off-by: Brian Trammell Cc: Eric Dumazet Cc: Dave That Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/dctcp.txt | 1 + Documentation/networking/ip-sysctl.txt | 9 +++++++++ include/net/netns/ipv4.h | 2 ++ include/net/tcp.h | 2 ++ net/ipv4/sysctl_net_ipv4.c | 7 +++++++ net/ipv4/tcp_ipv4.c | 5 ++++- net/ipv4/tcp_output.c | 13 +++++++++++++ 7 files changed, 38 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/networking/dctcp.txt b/Documentation/networking/dctcp.txt index 0d5dfbc89ec9..13a857753208 100644 --- a/Documentation/networking/dctcp.txt +++ b/Documentation/networking/dctcp.txt @@ -8,6 +8,7 @@ the data center network to provide multi-bit feedback to the end hosts. To enable it on end hosts: sysctl -w net.ipv4.tcp_congestion_control=dctcp + sysctl -w net.ipv4.tcp_ecn_fallback=0 (optional) All switches in the data center network running DCTCP must support ECN marking and be configured for marking when reaching defined switch buffer diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 5095c63e50ed..cb083e0d682c 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -267,6 +267,15 @@ tcp_ecn - INTEGER but do not request ECN on outgoing connections. Default: 2 +tcp_ecn_fallback - BOOLEAN + If the kernel detects that ECN connection misbehaves, enable fall + back to non-ECN. Currently, this knob implements the fallback + from RFC3168, section 6.1.1.1., but we reserve that in future, + additional detection mechanisms could be implemented under this + knob. The value is not used, if tcp_ecn or per route (or congestion + control) ECN settings are disabled. + Default: 1 (fallback enabled) + tcp_fack - BOOLEAN Enable FACK congestion avoidance and fast retransmission. The value is not used, if tcp_sack is not enabled. diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 614a49be68a9..6848b8bb2e63 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -77,6 +77,8 @@ struct netns_ipv4 { struct local_ports ip_local_ports; int sysctl_tcp_ecn; + int sysctl_tcp_ecn_fallback; + int sysctl_ip_no_pmtu_disc; int sysctl_ip_fwd_use_pmtu; int sysctl_ip_nonlocal_bind; diff --git a/include/net/tcp.h b/include/net/tcp.h index 0d85223efa4c..2bb2bad21d5c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -712,6 +712,8 @@ static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) #define TCPHDR_ECE 0x40 #define TCPHDR_CWR 0x80 +#define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR) + /* This is what the send packet queuing engine uses to pass * TCP per-packet control information to the transmission code. * We also store the host-order sequence numbers in here too. diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index c3852a7ff3c7..841de32f1fee 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -820,6 +820,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_ecn_fallback", + .data = &init_net.ipv4.sysctl_tcp_ecn_fallback, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { .procname = "ip_local_port_range", .maxlen = sizeof(init_net.ipv4.ip_local_ports.range), diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 91cb4768a860..0cc4b5a630cd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2411,12 +2411,15 @@ static int __net_init tcp_sk_init(struct net *net) goto fail; *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; } + net->ipv4.sysctl_tcp_ecn = 2; + net->ipv4.sysctl_tcp_ecn_fallback = 1; + net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; - return 0; + return 0; fail: tcp_sk_exit(net); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 08c2cc40b26d..e29d43b5a0bb 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -350,6 +350,15 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) } } +static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) +{ + if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback) + /* tp->ecn_flags are cleared at a later point in time when + * SYN ACK is ultimatively being received. + */ + TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR); +} + static void tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th, struct sock *sk) @@ -2615,6 +2624,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) } } + /* RFC3168, section 6.1.1.1. ECN fallback */ + if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN) + tcp_ecn_clear_syn(sk, skb); + tcp_retrans_try_collapse(sk, skb, cur_mss); /* Make a copy, if the first transmission SKB clone we made -- cgit v1.2.3 From 06b2c61c92a9942769ee8da22d3ce8b8b935c038 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Tue, 19 May 2015 12:41:47 -0700 Subject: ip: remove unused function prototype ip_do_nat() function was removed prior to kernel 3.4. Remove the unnecessary function prototype as well. Reported-by: Florian Westphal Signed-off-by: Andy Zhou Signed-off-by: David S. Miller --- include/net/ip.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index cd7a6a458bb6..7921a36b805c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -110,7 +110,6 @@ int ip_output(struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct sock *sk, struct sk_buff *skb); int ip_do_fragment(struct sock *sk, struct sk_buff *skb, int (*output)(struct sock *, struct sk_buff *)); -int ip_do_nat(struct sk_buff *skb); void ip_send_check(struct iphdr *ip); int __ip_local_out(struct sk_buff *skb); int ip_local_out_sk(struct sock *sk, struct sk_buff *skb); -- cgit v1.2.3 From f8bdbb584749420da1a7fea8cc1df18e5c2c4d6c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 May 2015 15:04:53 +0200 Subject: mac80211: add missing drv_priv description for TXQ struct The kernel-doc description for the drv_priv member of struct ieee80211_txq was missing, leading to errors. Add a suitable description to fix that. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 67e0df14ba0f..887fe95b9805 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1728,6 +1728,7 @@ struct ieee80211_tx_control { * @sta: station table entry, %NULL for per-vif queue * @tid: the TID for this queue (unused for per-vif queue) * @ac: the AC for this queue + * @drv_priv: driver private area, sized by hw->txq_data_size * * The driver can obtain packets from this queue by calling * ieee80211_tx_dequeue(). -- cgit v1.2.3 From eb9344781a2f8381ed60cd9e662d9ced2d168ecb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 May 2015 13:26:55 -0700 Subject: tcp: add a force_schedule argument to sk_stream_alloc_skb() In commit 8e4d980ac215 ("tcp: fix behavior for epoll edge trigger") we fixed a possible hang of TCP sockets under memory pressure, by allowing sk_stream_alloc_skb() to use sk_forced_mem_schedule() if no packet is in socket write queue. It turns out there are other cases where we want to force memory schedule : tcp_fragment() & tso_fragment() need to split a big TSO packet into two smaller ones. If we block here because of TCP memory pressure, we can effectively block TCP socket from sending new data. If no further ACK is coming, this hang would be definitive, and socket has no chance to effectively reduce its memory usage. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 3 ++- net/ipv4/tcp.c | 19 +++++++++++-------- net/ipv4/tcp_output.c | 10 +++++----- 3 files changed, 18 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 4581a60636f8..26c1c3171e00 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2025,7 +2025,8 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk) } } -struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp); +struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, + bool force_schedule); /** * sk_page_frag - return an appropriate page_frag diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bb9bb844204f..ca1d476c80ef 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -808,7 +808,8 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, } EXPORT_SYMBOL(tcp_splice_read); -struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) +struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, + bool force_schedule) { struct sk_buff *skb; @@ -820,15 +821,15 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); if (likely(skb)) { - bool mem_schedule; + bool mem_scheduled; - if (skb_queue_len(&sk->sk_write_queue) == 0) { - mem_schedule = true; + if (force_schedule) { + mem_scheduled = true; sk_forced_mem_schedule(sk, skb->truesize); } else { - mem_schedule = sk_wmem_schedule(sk, skb->truesize); + mem_scheduled = sk_wmem_schedule(sk, skb->truesize); } - if (likely(mem_schedule)) { + if (likely(mem_scheduled)) { skb_reserve(skb, sk->sk_prot->max_header); /* * Make sure that we have exactly size bytes @@ -918,7 +919,8 @@ new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; - skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); + skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, + skb_queue_empty(&sk->sk_write_queue)); if (!skb) goto wait_for_memory; @@ -1154,7 +1156,8 @@ new_segment: skb = sk_stream_alloc_skb(sk, select_size(sk, sg), - sk->sk_allocation); + sk->sk_allocation, + skb_queue_empty(&sk->sk_write_queue)); if (!skb) goto wait_for_memory; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e29d43b5a0bb..3aebe0157dfa 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1172,7 +1172,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, return -ENOMEM; /* Get a new skb... force flag on. */ - buff = sk_stream_alloc_skb(sk, nsize, gfp); + buff = sk_stream_alloc_skb(sk, nsize, gfp, true); if (!buff) return -ENOMEM; /* We'll just try again later. */ @@ -1731,7 +1731,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, if (skb->len != skb->data_len) return tcp_fragment(sk, skb, len, mss_now, gfp); - buff = sk_stream_alloc_skb(sk, 0, gfp); + buff = sk_stream_alloc_skb(sk, 0, gfp, true); if (unlikely(!buff)) return -ENOMEM; @@ -1950,7 +1950,7 @@ static int tcp_mtu_probe(struct sock *sk) } /* We're allowed to probe. Build it now. */ - nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC); + nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false); if (!nskb) return -1; sk->sk_wmem_queued += nskb->truesize; @@ -3190,7 +3190,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) /* limit to order-0 allocations */ space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); - syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation); + syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation, false); if (!syn_data) goto fallback; syn_data->ip_summed = CHECKSUM_PARTIAL; @@ -3256,7 +3256,7 @@ int tcp_connect(struct sock *sk) return 0; } - buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); + buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true); if (unlikely(!buff)) return -ENOBUFS; -- cgit v1.2.3 From 04fd61ab36ec065e194ab5e74ae34a5240d992bb Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 19 May 2015 16:59:03 -0700 Subject: bpf: allow bpf programs to tail-call other bpf programs introduce bpf_tail_call(ctx, &jmp_table, index) helper function which can be used from BPF programs like: int bpf_prog(struct pt_regs *ctx) { ... bpf_tail_call(ctx, &jmp_table, index); ... } that is roughly equivalent to: int bpf_prog(struct pt_regs *ctx) { ... if (jmp_table[index]) return (*jmp_table[index])(ctx); ... } The important detail that it's not a normal call, but a tail call. The kernel stack is precious, so this helper reuses the current stack frame and jumps into another BPF program without adding extra call frame. It's trivially done in interpreter and a bit trickier in JITs. In case of x64 JIT the bigger part of generated assembler prologue is common for all programs, so it is simply skipped while jumping. Other JITs can do similar prologue-skipping optimization or do stack unwind before jumping into the next program. bpf_tail_call() arguments: ctx - context pointer jmp_table - one of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table index - index in the jump table Since all BPF programs are idenitified by file descriptor, user space need to populate the jmp_table with FDs of other BPF programs. If jmp_table[index] is empty the bpf_tail_call() doesn't jump anywhere and program execution continues as normal. New BPF_MAP_TYPE_PROG_ARRAY map type is introduced so that user space can populate this jmp_table array with FDs of other bpf programs. Programs can share the same jmp_table array or use multiple jmp_tables. The chain of tail calls can form unpredictable dynamic loops therefore tail_call_cnt is used to limit the number of calls and currently is set to 32. Use cases: Acked-by: Daniel Borkmann ========== - simplify complex programs by splitting them into a sequence of small programs - dispatch routine For tracing and future seccomp the program may be triggered on all system calls, but processing of syscall arguments will be different. It's more efficient to implement them as: int syscall_entry(struct seccomp_data *ctx) { bpf_tail_call(ctx, &syscall_jmp_table, ctx->nr /* syscall number */); ... default: process unknown syscall ... } int sys_write_event(struct seccomp_data *ctx) {...} int sys_read_event(struct seccomp_data *ctx) {...} syscall_jmp_table[__NR_write] = sys_write_event; syscall_jmp_table[__NR_read] = sys_read_event; For networking the program may call into different parsers depending on packet format, like: int packet_parser(struct __sk_buff *skb) { ... parse L2, L3 here ... __u8 ipproto = load_byte(skb, ... offsetof(struct iphdr, protocol)); bpf_tail_call(skb, &ipproto_jmp_table, ipproto); ... default: process unknown protocol ... } int parse_tcp(struct __sk_buff *skb) {...} int parse_udp(struct __sk_buff *skb) {...} ipproto_jmp_table[IPPROTO_TCP] = parse_tcp; ipproto_jmp_table[IPPROTO_UDP] = parse_udp; - for TC use case, bpf_tail_call() allows to implement reclassify-like logic - bpf_map_update_elem/delete calls into BPF_MAP_TYPE_PROG_ARRAY jump table are atomic, so user space can build chains of BPF programs on the fly Implementation details: ======================= - high performance of bpf_tail_call() is the goal. It could have been implemented without JIT changes as a wrapper on top of BPF_PROG_RUN() macro, but with two downsides: . all programs would have to pay performance penalty for this feature and tail call itself would be slower, since mandatory stack unwind, return, stack allocate would be done for every tailcall. . tailcall would be limited to programs running preempt_disabled, since generic 'void *ctx' doesn't have room for 'tail_call_cnt' and it would need to be either global per_cpu variable accessed by helper and by wrapper or global variable protected by locks. In this implementation x64 JIT bypasses stack unwind and jumps into the callee program after prologue. - bpf_prog_array_compatible() ensures that prog_type of callee and caller are the same and JITed/non-JITed flag is the same, since calling JITed program from non-JITed is invalid, since stack frames are different. Similarly calling kprobe type program from socket type program is invalid. - jump table is implemented as BPF_MAP_TYPE_PROG_ARRAY to reuse 'map' abstraction, its user space API and all of verifier logic. It's in the existing arraymap.c file, since several functions are shared with regular array map. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 22 +++++++++ include/linux/filter.h | 2 +- include/uapi/linux/bpf.h | 10 +++++ kernel/bpf/arraymap.c | 113 ++++++++++++++++++++++++++++++++++++++++++++--- kernel/bpf/core.c | 73 +++++++++++++++++++++++++++++- kernel/bpf/syscall.c | 23 +++++++++- kernel/bpf/verifier.c | 17 +++++++ kernel/trace/bpf_trace.c | 2 + net/core/filter.c | 2 + 9 files changed, 255 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d5cda067115a..8821b9a8689e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -126,6 +126,27 @@ struct bpf_prog_aux { struct work_struct work; }; +struct bpf_array { + struct bpf_map map; + u32 elem_size; + /* 'ownership' of prog_array is claimed by the first program that + * is going to use this map or by the first program which FD is stored + * in the map to make sure that all callers and callees have the same + * prog_type and JITed flag + */ + enum bpf_prog_type owner_prog_type; + bool owner_jited; + union { + char value[0] __aligned(8); + struct bpf_prog *prog[0] __aligned(8); + }; +}; +#define MAX_TAIL_CALL_CNT 32 + +u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); +void bpf_prog_array_map_clear(struct bpf_map *map); +bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); + #ifdef CONFIG_BPF_SYSCALL void bpf_register_prog_type(struct bpf_prog_type_list *tl); void bpf_register_map_type(struct bpf_map_type_list *tl); @@ -160,5 +181,6 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; +extern const struct bpf_func_proto bpf_tail_call_proto; #endif /* _LINUX_BPF_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 200be4a74a33..17724f6ea983 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -378,7 +378,7 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) int sk_filter(struct sock *sk, struct sk_buff *skb); -void bpf_prog_select_runtime(struct bpf_prog *fp); +int bpf_prog_select_runtime(struct bpf_prog *fp); void bpf_prog_free(struct bpf_prog *fp); struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a9ebdf5701e8..f0a9af8b4dae 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -113,6 +113,7 @@ enum bpf_map_type { BPF_MAP_TYPE_UNSPEC, BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_ARRAY, + BPF_MAP_TYPE_PROG_ARRAY, }; enum bpf_prog_type { @@ -210,6 +211,15 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_l4_csum_replace, + + /** + * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program + * @ctx: context pointer passed to next program + * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY + * @index: index inside array that selects specific program to run + * Return: 0 on success + */ + BPF_FUNC_tail_call, __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 8a6616583f38..614bcd4c1d74 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -14,12 +14,7 @@ #include #include #include - -struct bpf_array { - struct bpf_map map; - u32 elem_size; - char value[0] __aligned(8); -}; +#include /* Called from syscall */ static struct bpf_map *array_map_alloc(union bpf_attr *attr) @@ -154,3 +149,109 @@ static int __init register_array_map(void) return 0; } late_initcall(register_array_map); + +static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) +{ + /* only bpf_prog file descriptors can be stored in prog_array map */ + if (attr->value_size != sizeof(u32)) + return ERR_PTR(-EINVAL); + return array_map_alloc(attr); +} + +static void prog_array_map_free(struct bpf_map *map) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + int i; + + synchronize_rcu(); + + /* make sure it's empty */ + for (i = 0; i < array->map.max_entries; i++) + BUG_ON(array->prog[i] != NULL); + kvfree(array); +} + +static void *prog_array_map_lookup_elem(struct bpf_map *map, void *key) +{ + return NULL; +} + +/* only called from syscall */ +static int prog_array_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *prog, *old_prog; + u32 index = *(u32 *)key, ufd; + + if (map_flags != BPF_ANY) + return -EINVAL; + + if (index >= array->map.max_entries) + return -E2BIG; + + ufd = *(u32 *)value; + prog = bpf_prog_get(ufd); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + if (!bpf_prog_array_compatible(array, prog)) { + bpf_prog_put(prog); + return -EINVAL; + } + + old_prog = xchg(array->prog + index, prog); + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +static int prog_array_map_delete_elem(struct bpf_map *map, void *key) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *old_prog; + u32 index = *(u32 *)key; + + if (index >= array->map.max_entries) + return -E2BIG; + + old_prog = xchg(array->prog + index, NULL); + if (old_prog) { + bpf_prog_put(old_prog); + return 0; + } else { + return -ENOENT; + } +} + +/* decrement refcnt of all bpf_progs that are stored in this map */ +void bpf_prog_array_map_clear(struct bpf_map *map) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + int i; + + for (i = 0; i < array->map.max_entries; i++) + prog_array_map_delete_elem(map, &i); +} + +static const struct bpf_map_ops prog_array_ops = { + .map_alloc = prog_array_map_alloc, + .map_free = prog_array_map_free, + .map_get_next_key = array_map_get_next_key, + .map_lookup_elem = prog_array_map_lookup_elem, + .map_update_elem = prog_array_map_update_elem, + .map_delete_elem = prog_array_map_delete_elem, +}; + +static struct bpf_map_type_list prog_array_type __read_mostly = { + .ops = &prog_array_ops, + .type = BPF_MAP_TYPE_PROG_ARRAY, +}; + +static int __init register_prog_array_map(void) +{ + bpf_register_map_type(&prog_array_type); + return 0; +} +late_initcall(register_prog_array_map); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 54f0e7fcd0e2..d44b25cbe460 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -176,6 +176,15 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) return 0; } +const struct bpf_func_proto bpf_tail_call_proto = { + .func = NULL, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +}; + /** * __bpf_prog_run - run eBPF program on a given context * @ctx: is the data we are operating on @@ -244,6 +253,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG, /* Call instruction */ [BPF_JMP | BPF_CALL] = &&JMP_CALL, + [BPF_JMP | BPF_CALL | BPF_X] = &&JMP_TAIL_CALL, /* Jumps */ [BPF_JMP | BPF_JA] = &&JMP_JA, [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X, @@ -286,6 +296,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B, [BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW, }; + u32 tail_call_cnt = 0; void *ptr; int off; @@ -431,6 +442,30 @@ select_insn: BPF_R4, BPF_R5); CONT; + JMP_TAIL_CALL: { + struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *prog; + u64 index = BPF_R3; + + if (unlikely(index >= array->map.max_entries)) + goto out; + + if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT)) + goto out; + + tail_call_cnt++; + + prog = READ_ONCE(array->prog[index]); + if (unlikely(!prog)) + goto out; + + ARG1 = BPF_R1; + insn = prog->insnsi; + goto select_insn; +out: + CONT; + } /* JMP */ JMP_JA: insn += insn->off; @@ -619,6 +654,40 @@ void __weak bpf_int_jit_compile(struct bpf_prog *prog) { } +bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) +{ + if (array->owner_prog_type) { + if (array->owner_prog_type != fp->type) + return false; + if (array->owner_jited != fp->jited) + return false; + } else { + array->owner_prog_type = fp->type; + array->owner_jited = fp->jited; + } + return true; +} + +static int check_tail_call(const struct bpf_prog *fp) +{ + struct bpf_prog_aux *aux = fp->aux; + int i; + + for (i = 0; i < aux->used_map_cnt; i++) { + struct bpf_array *array; + struct bpf_map *map; + + map = aux->used_maps[i]; + if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) + continue; + array = container_of(map, struct bpf_array, map); + if (!bpf_prog_array_compatible(array, fp)) + return -EINVAL; + } + + return 0; +} + /** * bpf_prog_select_runtime - select execution runtime for BPF program * @fp: bpf_prog populated with internal BPF program @@ -626,7 +695,7 @@ void __weak bpf_int_jit_compile(struct bpf_prog *prog) * try to JIT internal BPF program, if JIT is not available select interpreter * BPF program will be executed via BPF_PROG_RUN() macro */ -void bpf_prog_select_runtime(struct bpf_prog *fp) +int bpf_prog_select_runtime(struct bpf_prog *fp) { fp->bpf_func = (void *) __bpf_prog_run; @@ -634,6 +703,8 @@ void bpf_prog_select_runtime(struct bpf_prog *fp) bpf_int_jit_compile(fp); /* Lock whole bpf_prog as read-only */ bpf_prog_lock_ro(fp); + + return check_tail_call(fp); } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3bae6c591914..98a69bd83069 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -68,6 +68,12 @@ static int bpf_map_release(struct inode *inode, struct file *filp) { struct bpf_map *map = filp->private_data; + if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) + /* prog_array stores refcnt-ed bpf_prog pointers + * release them all when user space closes prog_array_fd + */ + bpf_prog_array_map_clear(map); + bpf_map_put(map); return 0; } @@ -392,6 +398,19 @@ static void fixup_bpf_calls(struct bpf_prog *prog) */ BUG_ON(!prog->aux->ops->get_func_proto); + if (insn->imm == BPF_FUNC_tail_call) { + /* mark bpf_tail_call as different opcode + * to avoid conditional branch in + * interpeter for every normal call + * and to prevent accidental JITing by + * JIT compiler that doesn't support + * bpf_tail_call yet + */ + insn->imm = 0; + insn->code |= BPF_X; + continue; + } + fn = prog->aux->ops->get_func_proto(insn->imm); /* all functions that have prototype and verifier allowed * programs to call them, must be real in-kernel functions @@ -532,7 +551,9 @@ static int bpf_prog_load(union bpf_attr *attr) fixup_bpf_calls(prog); /* eBPF program is ready to be JITed */ - bpf_prog_select_runtime(prog); + err = bpf_prog_select_runtime(prog); + if (err < 0) + goto free_used_maps; err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); if (err < 0) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 47dcd3aa6e23..cfd9a40b9a5a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -907,6 +907,23 @@ static int check_call(struct verifier_env *env, int func_id) fn->ret_type, func_id); return -EINVAL; } + + if (map && map->map_type == BPF_MAP_TYPE_PROG_ARRAY && + func_id != BPF_FUNC_tail_call) + /* prog_array map type needs extra care: + * only allow to pass it into bpf_tail_call() for now. + * bpf_map_delete_elem() can be allowed in the future, + * while bpf_map_update_elem() must only be done via syscall + */ + return -EINVAL; + + if (func_id == BPF_FUNC_tail_call && + map->map_type != BPF_MAP_TYPE_PROG_ARRAY) + /* don't allow any other map type to be passed into + * bpf_tail_call() + */ + return -EINVAL; + return 0; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 2d56ce501632..646445e41bd4 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -172,6 +172,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_probe_read_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; + case BPF_FUNC_tail_call: + return &bpf_tail_call_proto; case BPF_FUNC_trace_printk: /* diff --git a/net/core/filter.c b/net/core/filter.c index 6805717be614..3adcca6f17a4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1421,6 +1421,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; + case BPF_FUNC_tail_call: + return &bpf_tail_call_proto; default: return NULL; } -- cgit v1.2.3 From f5af1f57a2914e290de40e2c93716da8885c4965 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 May 2015 10:59:01 -0700 Subject: inet_hashinfo: remove bsocket counter We no longer need bsocket atomic counter, as inet_csk_get_port() calls bind_conflict() regardless of its value, after commit 2b05ad33e1e624e ("tcp: bind() fix autoselection to share ports") This patch removes overhead of maintaining this counter and double inet_csk_get_port() calls under pressure. Signed-off-by: Eric Dumazet Cc: Marcelo Ricardo Leitner Cc: Flavio Leitner Acked-by: Flavio Leitner Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 2 -- net/ipv4/inet_connection_sock.c | 5 ----- net/ipv4/inet_hashtables.c | 7 ------- 3 files changed, 14 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 73fe0f9525d9..774d24151d4a 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -148,8 +148,6 @@ struct inet_hashinfo { */ struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] ____cacheline_aligned_in_smp; - - atomic_t bsockets; }; static inline struct inet_ehash_bucket *inet_ehash_bucket( diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8976ca423a07..b95fb263a13f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -127,11 +127,6 @@ again: (tb->num_owners < smallest_size || smallest_size == -1)) { smallest_size = tb->num_owners; smallest_rover = rover; - if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 && - !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { - snum = smallest_rover; - goto tb_found; - } } if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { snum = rover; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index c6fb80bd5826..3766bddb3e8a 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -90,10 +90,6 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, const unsigned short snum) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - - atomic_inc(&hashinfo->bsockets); - inet_sk(sk)->inet_num = snum; sk_add_bind_node(sk, &tb->owners); tb->num_owners++; @@ -111,8 +107,6 @@ static void __inet_put_port(struct sock *sk) struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_bucket *tb; - atomic_dec(&hashinfo->bsockets); - spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; __sk_del_bind_node(sk); @@ -608,7 +602,6 @@ void inet_hashinfo_init(struct inet_hashinfo *h) { int i; - atomic_set(&h->bsockets, 0); for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, -- cgit v1.2.3 From 2efd055c53c06b7e89c167c98069bab9afce7e59 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 20 May 2015 16:35:41 -0700 Subject: tcp: add tcpi_segs_in and tcpi_segs_out to tcp_info This patch tracks the total number of inbound and outbound segments on a TCP socket. One may use this number to have an idea on connection quality when compared against the retransmissions. RFC4898 named these : tcpEStatsPerfSegsIn and tcpEStatsPerfSegsOut These are a 32bit field each and can be fetched both from TCP_INFO getsockopt() if one has a handle on a TCP socket, or from inet_diag netlink facility (iproute2/ss patch will follow) Note that tp->segs_out was placed near tp->snd_nxt for good data locality and minimal performance impact, while tp->segs_in was placed near tp->bytes_received for the same reason. Join work with Eric Dumazet. Note that received SYN are accounted on the listener, but sent SYNACK are not accounted. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 7 ++++++- include/uapi/linux/tcp.h | 4 +++- net/ipv4/tcp.c | 2 ++ net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_minisocks.c | 1 + net/ipv4/tcp_output.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 7 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e6fb5df22db1..f0212026c77f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -149,11 +149,16 @@ struct tcp_sock { * sum(delta(rcv_nxt)), or how many bytes * were acked. */ + u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn + * total number of segments in. + */ u32 rcv_nxt; /* What we want to receive next */ u32 copied_seq; /* Head of yet unread data */ u32 rcv_wup; /* rcv_nxt on last window update sent */ u32 snd_nxt; /* Next sequence we send */ - + u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut + * The total number of segments sent. + */ u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked * sum(delta(snd_una)), or how many bytes * were acked. diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 51ebedba577f..65a77b071e22 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -192,8 +192,10 @@ struct tcp_info { __u64 tcpi_pacing_rate; __u64 tcpi_max_pacing_rate; - __u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */ + __u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */ __u64 tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */ + __u32 tcpi_segs_out; /* RFC4898 tcpEStatsPerfSegsOut */ + __u32 tcpi_segs_in; /* RFC4898 tcpEStatsPerfSegsIn */ }; /* for TCP_MD5SIG socket option */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0a3f9a00565b..7f3e721b9e69 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2695,6 +2695,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) spin_lock_bh(&sk->sk_lock.slock); info->tcpi_bytes_acked = tp->bytes_acked; info->tcpi_bytes_received = tp->bytes_received; + info->tcpi_segs_out = tp->segs_out; + info->tcpi_segs_in = tp->segs_in; spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0cc4b5a630cd..feb875769b8d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1626,6 +1626,7 @@ process: skb->dev = NULL; bh_lock_sock_nested(sk); + tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); ret = 0; if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ebe2ab2596ed..b62d15c86946 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -448,6 +448,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; + newtp->segs_in = 0; newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3aebe0157dfa..534e5fdb04c1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1027,6 +1027,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); + tp->segs_out += tcp_skb_pcount(skb); /* OK, its time to fill skb_shinfo(skb)->gso_segs */ skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b6575d665568..beac6bf840b9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1421,6 +1421,7 @@ process: skb->dev = NULL; bh_lock_sock_nested(sk); + tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); ret = 0; if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) -- cgit v1.2.3 From bd5850d39f10f9d216bff69bcbf5938680b862ae Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 21 May 2015 02:26:24 +0200 Subject: net: sched: pkt_cls: remove unused macros from uapi Jamal points out that this header also contains kernel internal magic that cannot be used from userspace for anything meaningful. Lets remove what the kernel doesn't use anymore and wrap remainder with __KERNEL__. Suggested-by: Jamal Hadi Salim Suggested-by: Alexei Starovoitov Signed-off-by: Florian Westphal Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 32 +++++--------------------------- 1 file changed, 5 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 39fb53d67b11..4f0d1bc3647d 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -4,6 +4,7 @@ #include #include +#ifdef __KERNEL__ /* I think i could have done better macros ; for now this is stolen from * some arch/mips code - jhs */ @@ -35,23 +36,6 @@ bits 9,10,11: redirect counter - redirect TTL. Loop avoidance * * */ -#ifndef __KERNEL__ -/* backwards compat for userspace only */ -#define TC_MUNGED _TC_MAKEMASK1(0) -#define SET_TC_MUNGED(v) ( TC_MUNGED | (v & ~TC_MUNGED)) -#define CLR_TC_MUNGED(v) ( v & ~TC_MUNGED) - -#define TC_OK2MUNGE _TC_MAKEMASK1(1) -#define SET_TC_OK2MUNGE(v) ( TC_OK2MUNGE | (v & ~TC_OK2MUNGE)) -#define CLR_TC_OK2MUNGE(v) ( v & ~TC_OK2MUNGE) - -#define S_TC_VERD _TC_MAKE32(2) -#define M_TC_VERD _TC_MAKEMASK(4,S_TC_VERD) -#define G_TC_VERD(x) _TC_GETVALUE(x,S_TC_VERD,M_TC_VERD) -#define V_TC_VERD(x) _TC_MAKEVALUE(x,S_TC_VERD) -#define SET_TC_VERD(v,n) ((V_TC_VERD(n)) | (v & ~M_TC_VERD)) -#endif - #define S_TC_FROM _TC_MAKE32(6) #define M_TC_FROM _TC_MAKEMASK(2,S_TC_FROM) #define G_TC_FROM(x) _TC_GETVALUE(x,S_TC_FROM,M_TC_FROM) @@ -65,20 +49,16 @@ bits 9,10,11: redirect counter - redirect TTL. Loop avoidance #define SET_TC_NCLS(v) ( TC_NCLS | (v & ~TC_NCLS)) #define CLR_TC_NCLS(v) ( v & ~TC_NCLS) -#ifndef __KERNEL__ -#define S_TC_RTTL _TC_MAKE32(9) -#define M_TC_RTTL _TC_MAKEMASK(3,S_TC_RTTL) -#define G_TC_RTTL(x) _TC_GETVALUE(x,S_TC_RTTL,M_TC_RTTL) -#define V_TC_RTTL(x) _TC_MAKEVALUE(x,S_TC_RTTL) -#define SET_TC_RTTL(v,n) ((V_TC_RTTL(n)) | (v & ~M_TC_RTTL)) -#endif - #define S_TC_AT _TC_MAKE32(12) #define M_TC_AT _TC_MAKEMASK(2,S_TC_AT) #define G_TC_AT(x) _TC_GETVALUE(x,S_TC_AT,M_TC_AT) #define V_TC_AT(x) _TC_MAKEVALUE(x,S_TC_AT) #define SET_TC_AT(v,n) ((V_TC_AT(n)) | (v & ~M_TC_AT)) +#define MAX_REC_LOOP 4 +#define MAX_RED_LOOP 4 +#endif + /* Action attributes */ enum { TCA_ACT_UNSPEC, @@ -98,8 +78,6 @@ enum { #define TCA_ACT_NOUNBIND 0 #define TCA_ACT_REPLACE 1 #define TCA_ACT_NOREPLACE 0 -#define MAX_REC_LOOP 4 -#define MAX_RED_LOOP 4 #define TC_ACT_UNSPEC (-1) #define TC_ACT_OK 0 -- cgit v1.2.3 From 4a3a8c0c3a613e481bea931f0d65dc4a7efaa9b9 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 22 May 2015 17:43:52 +0200 Subject: mac802154: remove pib lock This patch removes the pib lock which is now replaced by rtnl lock. The new interface already use the rtnl lock only. Nevertheless this patch will fix issues while using new and old interface at the same time. Signed-off-by: Alexander Aring Reviewed-by: Stefan Schmidt Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 2 -- net/ieee802154/core.c | 2 -- net/ieee802154/nl-phy.c | 6 +++--- net/mac802154/iface.c | 7 ------- net/mac802154/mib.c | 4 ++-- 5 files changed, 5 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 11bbf17ad865..c6aa1d210182 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -121,8 +121,6 @@ enum wpan_phy_flags { }; struct wpan_phy { - struct mutex pib_lock; - /* If multiple wpan_phys are registered and you're handed e.g. * a regular netdev with assigned ieee802154_ptr, you won't * know whether it points to a wpan_phy your driver has registered diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c index 2ee00e8a0308..b0248e934230 100644 --- a/net/ieee802154/core.c +++ b/net/ieee802154/core.c @@ -121,8 +121,6 @@ wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size) /* atomic_inc_return makes it start at 1, make it start at 0 */ rdev->wpan_phy_idx--; - mutex_init(&rdev->wpan_phy.pib_lock); - INIT_LIST_HEAD(&rdev->wpan_dev_list); device_initialize(&rdev->wpan_phy.dev); dev_set_name(&rdev->wpan_phy.dev, PHY_NAME "%d", rdev->wpan_phy_idx); diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index cbc0d09351e0..77d73014bde3 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -50,7 +50,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, if (!hdr) goto out; - mutex_lock(&phy->pib_lock); + rtnl_lock(); if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) || nla_put_u8(msg, IEEE802154_ATTR_PAGE, phy->current_page) || nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel)) @@ -63,13 +63,13 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, nla_put(msg, IEEE802154_ATTR_CHANNEL_PAGE_LIST, pages * sizeof(uint32_t), buf)) goto nla_put_failure; - mutex_unlock(&phy->pib_lock); + rtnl_unlock(); kfree(buf); genlmsg_end(msg, hdr); return 0; nla_put_failure: - mutex_unlock(&phy->pib_lock); + rtnl_unlock(); genlmsg_cancel(msg, hdr); out: kfree(buf); diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 2a5878889289..22f478be7489 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -242,7 +242,6 @@ static int mac802154_wpan_open(struct net_device *dev) struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); struct ieee802154_local *local = sdata->local; struct wpan_dev *wpan_dev = &sdata->wpan_dev; - struct wpan_phy *phy = sdata->local->phy; rc = ieee802154_check_concurrent_iface(sdata, sdata->vif.type); if (rc < 0) @@ -252,8 +251,6 @@ static int mac802154_wpan_open(struct net_device *dev) if (rc < 0) return rc; - mutex_lock(&phy->pib_lock); - if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) { rc = drv_set_promiscuous_mode(local, wpan_dev->promiscuous_mode); @@ -295,11 +292,7 @@ static int mac802154_wpan_open(struct net_device *dev) goto out; } - mutex_unlock(&phy->pib_lock); - return 0; - out: - mutex_unlock(&phy->pib_lock); return rc; } diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c index 5cf019a57fd7..033dfc7755c6 100644 --- a/net/mac802154/mib.c +++ b/net/mac802154/mib.c @@ -91,16 +91,16 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan) struct ieee802154_local *local = sdata->local; int res; + ASSERT_RTNL(); + BUG_ON(dev->type != ARPHRD_IEEE802154); res = drv_set_channel(local, page, chan); if (res) { pr_debug("set_channel failed\n"); } else { - mutex_lock(&local->phy->pib_lock); local->phy->current_channel = chan; local->phy->current_page = page; - mutex_unlock(&local->phy->pib_lock); } } -- cgit v1.2.3 From 344f8c119df742f2bf7098cf8fc326351f583249 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 22 May 2015 17:43:53 +0200 Subject: mac802154: use atomic ops for sequence incrementation This patch will use atomic operations for sequence number incrementation while MAC header generation. Upper layers like af_802154 or 6LoWPAN could call this function in a parallel context while generating 802.15.4 MAC header before queuing into wpan interfaces transmit queue. Signed-off-by: Alexander Aring Reviewed-by: Stefan Schmidt Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 4 ++-- include/net/ieee802154_netdev.h | 1 - net/ieee802154/6lowpan/core.c | 8 -------- net/mac802154/ieee802154_i.h | 1 - net/mac802154/iface.c | 9 ++++++--- net/mac802154/mac_cmd.c | 1 - net/mac802154/mib.c | 9 --------- 7 files changed, 8 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index c6aa1d210182..4de59aa96173 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -177,9 +177,9 @@ struct wpan_dev { __le64 extended_addr; /* MAC BSN field */ - u8 bsn; + atomic_t bsn; /* MAC DSN field */ - u8 dsn; + atomic_t dsn; u8 min_be; u8 max_be; diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 94a297052442..144fefb4d619 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -431,7 +431,6 @@ struct ieee802154_mlme_ops { */ __le16 (*get_pan_id)(const struct net_device *dev); __le16 (*get_short_addr)(const struct net_device *dev); - u8 (*get_dsn)(const struct net_device *dev); }; static inline struct ieee802154_mlme_ops * diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 0ae5822ef944..2e77fada7e54 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -69,13 +69,6 @@ static __le16 lowpan_get_short_addr(const struct net_device *dev) return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); } -static u8 lowpan_get_dsn(const struct net_device *dev) -{ - struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; - - return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev); -} - static struct header_ops lowpan_header_ops = { .create = lowpan_header_create, }; @@ -106,7 +99,6 @@ static const struct net_device_ops lowpan_netdev_ops = { static struct ieee802154_mlme_ops lowpan_mlme = { .get_pan_id = lowpan_get_pan_id, .get_short_addr = lowpan_get_short_addr, - .get_dsn = lowpan_get_dsn, }; static void lowpan_setup(struct net_device *dev) diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index 127ba18386fc..56b3847466e9 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -141,7 +141,6 @@ __le16 mac802154_dev_get_short_addr(const struct net_device *dev); __le16 mac802154_dev_get_pan_id(const struct net_device *dev); void mac802154_dev_set_pan_id(struct net_device *dev, __le16 val); void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan); -u8 mac802154_dev_get_dsn(const struct net_device *dev); int mac802154_get_params(struct net_device *dev, struct ieee802154_llsec_params *params); diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 22f478be7489..0ec7bc402e29 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -368,7 +368,7 @@ static int mac802154_header_create(struct sk_buff *skb, hdr.fc.type = cb->type; hdr.fc.security_enabled = cb->secen; hdr.fc.ack_request = cb->ackreq; - hdr.seq = ieee802154_mlme_ops(dev)->get_dsn(dev); + hdr.seq = atomic_inc_return(&dev->ieee802154_ptr->dsn) & 0xFF; if (mac802154_set_header_security(sdata, &hdr, cb) < 0) return -EINVAL; @@ -468,13 +468,16 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata, enum nl802154_iftype type) { struct wpan_dev *wpan_dev = &sdata->wpan_dev; + u8 tmp; /* set some type-dependent values */ sdata->vif.type = type; sdata->wpan_dev.iftype = type; - get_random_bytes(&wpan_dev->bsn, 1); - get_random_bytes(&wpan_dev->dsn, 1); + get_random_bytes(&tmp, sizeof(tmp)); + atomic_set(&wpan_dev->bsn, tmp); + get_random_bytes(&tmp, sizeof(tmp)); + atomic_set(&wpan_dev->dsn, tmp); /* defaults per 802.15.4-2011 */ wpan_dev->min_be = 3; diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index 6dcbb3b5994c..3b347d4d3d4c 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -153,7 +153,6 @@ struct ieee802154_mlme_ops mac802154_mlme_wpan = { .start_req = mac802154_mlme_start_req, .get_pan_id = mac802154_dev_get_pan_id, .get_short_addr = mac802154_dev_get_short_addr, - .get_dsn = mac802154_dev_get_dsn, .llsec = &mac802154_llsec_ops, diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c index 033dfc7755c6..00a62de9cc23 100644 --- a/net/mac802154/mib.c +++ b/net/mac802154/mib.c @@ -76,15 +76,6 @@ void mac802154_dev_set_pan_id(struct net_device *dev, __le16 val) spin_unlock_bh(&sdata->mib_lock); } -u8 mac802154_dev_get_dsn(const struct net_device *dev) -{ - struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); - - BUG_ON(dev->type != ARPHRD_IEEE802154); - - return sdata->wpan_dev.dsn++; -} - void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan) { struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); -- cgit v1.2.3 From c947f7e1e31a708f5a4ea8c1a627bec578cd9223 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 22 May 2015 17:43:54 +0200 Subject: mac802154: remove mib lock This patch removes the mib lock. The new locking mechanism is to protect the mib values with the rtnl lock. Note that this isn't always necessary if we have an interface up the most mib values are readonly (e.g. address settings). With this behaviour we can remove locking in hotpath like frame parsing completely. It depends on context if we need to hold the rtnl lock or not, this makes the callbacks of ieee802154_mlme_ops unnecessary because these callbacks hols always the locks. Signed-off-by: Alexander Aring Reviewed-by: Stefan Schmidt Signed-off-by: Marcel Holtmann --- include/net/ieee802154_netdev.h | 9 -------- net/ieee802154/6lowpan/core.c | 20 ----------------- net/ieee802154/6lowpan/tx.c | 2 +- net/ieee802154/nl-mac.c | 14 ++++++++---- net/ieee802154/socket.c | 12 +++++----- net/mac802154/ieee802154_i.h | 6 ----- net/mac802154/iface.c | 8 ------- net/mac802154/mac_cmd.c | 6 ++--- net/mac802154/mib.c | 50 ----------------------------------------- net/mac802154/rx.c | 5 ----- 10 files changed, 18 insertions(+), 114 deletions(-) (limited to 'include') diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 144fefb4d619..84a72a1382a8 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -422,15 +422,6 @@ struct ieee802154_mlme_ops { struct ieee802154_mac_params *params); struct ieee802154_llsec_ops *llsec; - - /* The fields below are required. */ - - /* - * FIXME: these should become the part of PIB/MIB interface. - * However we still don't have IB interface of any kind - */ - __le16 (*get_pan_id)(const struct net_device *dev); - __le16 (*get_short_addr)(const struct net_device *dev); }; static inline struct ieee802154_mlme_ops * diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 2e77fada7e54..f20a387a1011 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -55,20 +55,6 @@ LIST_HEAD(lowpan_devices); static int lowpan_open_count; -static __le16 lowpan_get_pan_id(const struct net_device *dev) -{ - struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; - - return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev); -} - -static __le16 lowpan_get_short_addr(const struct net_device *dev) -{ - struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; - - return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); -} - static struct header_ops lowpan_header_ops = { .create = lowpan_header_create, }; @@ -96,11 +82,6 @@ static const struct net_device_ops lowpan_netdev_ops = { .ndo_start_xmit = lowpan_xmit, }; -static struct ieee802154_mlme_ops lowpan_mlme = { - .get_pan_id = lowpan_get_pan_id, - .get_short_addr = lowpan_get_short_addr, -}; - static void lowpan_setup(struct net_device *dev) { dev->addr_len = IEEE802154_ADDR_LEN; @@ -116,7 +97,6 @@ static void lowpan_setup(struct net_device *dev) dev->netdev_ops = &lowpan_netdev_ops; dev->header_ops = &lowpan_header_ops; - dev->ml_priv = &lowpan_mlme; dev->destructor = free_netdev; dev->features |= NETIF_F_NETNS_LOCAL; } diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c index 2349070bd534..98acf7319754 100644 --- a/net/ieee802154/6lowpan/tx.c +++ b/net/ieee802154/6lowpan/tx.c @@ -207,7 +207,7 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev) /* prepare wpan address data */ sa.mode = IEEE802154_ADDR_LONG; - sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + sa.pan_id = lowpan_dev_info(dev)->real_dev->ieee802154_ptr->pan_id; sa.extended_addr = ieee802154_devaddr_from_raw(saddr); /* intra-PAN communications */ diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index cdc1cc3543f6..ada58a81b753 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -97,8 +97,10 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, BUG_ON(!phy); get_device(&phy->dev); - short_addr = ops->get_short_addr(dev); - pan_id = ops->get_pan_id(dev); + rtnl_lock(); + short_addr = dev->ieee802154_ptr->short_addr; + pan_id = dev->ieee802154_ptr->pan_id; + rtnl_unlock(); if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) || @@ -244,7 +246,9 @@ int ieee802154_associate_resp(struct sk_buff *skb, struct genl_info *info) addr.mode = IEEE802154_ADDR_LONG; addr.extended_addr = nla_get_hwaddr( info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]); - addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + rtnl_lock(); + addr.pan_id = dev->ieee802154_ptr->pan_id; + rtnl_unlock(); ret = ieee802154_mlme_ops(dev)->assoc_resp(dev, &addr, nla_get_shortaddr(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]), @@ -281,7 +285,9 @@ int ieee802154_disassociate_req(struct sk_buff *skb, struct genl_info *info) addr.short_addr = nla_get_shortaddr( info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]); } - addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + rtnl_lock(); + addr.pan_id = dev->ieee802154_ptr->pan_id; + rtnl_unlock(); ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr, nla_get_u8(info->attrs[IEEE802154_ATTR_REASON])); diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 7aaaf967df58..e5cc2537c2a3 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -64,10 +64,8 @@ ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr) if (tmp->type != ARPHRD_IEEE802154) continue; - pan_id = ieee802154_mlme_ops(tmp)->get_pan_id(tmp); - short_addr = - ieee802154_mlme_ops(tmp)->get_short_addr(tmp); - + pan_id = tmp->ieee802154_ptr->pan_id; + short_addr = tmp->ieee802154_ptr->short_addr; if (pan_id == addr->pan_id && short_addr == addr->short_addr) { dev = tmp; @@ -797,9 +795,9 @@ static int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) /* Data frame processing */ BUG_ON(dev->type != ARPHRD_IEEE802154); - pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev); - hw_addr = ieee802154_devaddr_from_raw(dev->dev_addr); + pan_id = dev->ieee802154_ptr->pan_id; + short_addr = dev->ieee802154_ptr->short_addr; + hw_addr = dev->ieee802154_ptr->extended_addr; read_lock(&dgram_lock); sk_for_each(sk, &dgram_head) { diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index 56b3847466e9..eec668f3637f 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -86,8 +86,6 @@ struct ieee802154_sub_if_data { unsigned long state; char name[IFNAMSIZ]; - spinlock_t mib_lock; - /* protects sec from concurrent access by netlink. access by * encrypt/decrypt/header_create safe without additional protection. */ @@ -136,10 +134,6 @@ ieee802154_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); enum hrtimer_restart ieee802154_xmit_ifs_timer(struct hrtimer *timer); /* MIB callbacks */ -void mac802154_dev_set_short_addr(struct net_device *dev, __le16 val); -__le16 mac802154_dev_get_short_addr(const struct net_device *dev); -__le16 mac802154_dev_get_pan_id(const struct net_device *dev); -void mac802154_dev_set_pan_id(struct net_device *dev, __le16 val); void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan); int mac802154_get_params(struct net_device *dev, diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 0ec7bc402e29..f30788d2702f 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -63,7 +63,6 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) int err = -ENOIOCTLCMD; rtnl_lock(); - spin_lock_bh(&sdata->mib_lock); switch (cmd) { case SIOCGIFADDR: @@ -88,7 +87,6 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } case SIOCSIFADDR: if (netif_running(dev)) { - spin_unlock_bh(&sdata->mib_lock); rtnl_unlock(); return -EBUSY; } @@ -111,7 +109,6 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; } - spin_unlock_bh(&sdata->mib_lock); rtnl_unlock(); return err; } @@ -374,8 +371,6 @@ static int mac802154_header_create(struct sk_buff *skb, return -EINVAL; if (!saddr) { - spin_lock_bh(&sdata->mib_lock); - if (wpan_dev->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST) || wpan_dev->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) || wpan_dev->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST)) { @@ -387,8 +382,6 @@ static int mac802154_header_create(struct sk_buff *skb, } hdr.source.pan_id = wpan_dev->pan_id; - - spin_unlock_bh(&sdata->mib_lock); } else { hdr.source = *(const struct ieee802154_addr *)saddr; } @@ -500,7 +493,6 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata, sdata->dev->ml_priv = &mac802154_mlme_wpan; wpan_dev->promiscuous_mode = false; - spin_lock_init(&sdata->mib_lock); mutex_init(&sdata->sec_mtx); mac802154_llsec_init(&sdata->sec); diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index 3b347d4d3d4c..5220c2b2735b 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -43,8 +43,8 @@ static int mac802154_mlme_start_req(struct net_device *dev, BUG_ON(addr->mode != IEEE802154_ADDR_SHORT); - mac802154_dev_set_pan_id(dev, addr->pan_id); - mac802154_dev_set_short_addr(dev, addr->short_addr); + dev->ieee802154_ptr->pan_id = addr->pan_id; + dev->ieee802154_ptr->short_addr = addr->short_addr; mac802154_dev_set_page_channel(dev, page, channel); if (ops->llsec) { @@ -151,8 +151,6 @@ static struct ieee802154_llsec_ops mac802154_llsec_ops = { struct ieee802154_mlme_ops mac802154_mlme_wpan = { .start_req = mac802154_mlme_start_req, - .get_pan_id = mac802154_dev_get_pan_id, - .get_short_addr = mac802154_dev_get_short_addr, .llsec = &mac802154_llsec_ops, diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c index 00a62de9cc23..73f94fbf8785 100644 --- a/net/mac802154/mib.c +++ b/net/mac802154/mib.c @@ -26,56 +26,6 @@ #include "ieee802154_i.h" #include "driver-ops.h" -void mac802154_dev_set_short_addr(struct net_device *dev, __le16 val) -{ - struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); - - BUG_ON(dev->type != ARPHRD_IEEE802154); - - spin_lock_bh(&sdata->mib_lock); - sdata->wpan_dev.short_addr = val; - spin_unlock_bh(&sdata->mib_lock); -} - -__le16 mac802154_dev_get_short_addr(const struct net_device *dev) -{ - struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); - __le16 ret; - - BUG_ON(dev->type != ARPHRD_IEEE802154); - - spin_lock_bh(&sdata->mib_lock); - ret = sdata->wpan_dev.short_addr; - spin_unlock_bh(&sdata->mib_lock); - - return ret; -} - -__le16 mac802154_dev_get_pan_id(const struct net_device *dev) -{ - struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); - __le16 ret; - - BUG_ON(dev->type != ARPHRD_IEEE802154); - - spin_lock_bh(&sdata->mib_lock); - ret = sdata->wpan_dev.pan_id; - spin_unlock_bh(&sdata->mib_lock); - - return ret; -} - -void mac802154_dev_set_pan_id(struct net_device *dev, __le16 val) -{ - struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); - - BUG_ON(dev->type != ARPHRD_IEEE802154); - - spin_lock_bh(&sdata->mib_lock); - sdata->wpan_dev.pan_id = val; - spin_unlock_bh(&sdata->mib_lock); -} - void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan) { struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index c0d67b2b4132..e0f10063cac3 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -47,8 +47,6 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, pr_debug("getting packet via slave interface %s\n", sdata->dev->name); - spin_lock_bh(&sdata->mib_lock); - span = wpan_dev->pan_id; sshort = wpan_dev->short_addr; @@ -83,13 +81,10 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, skb->pkt_type = PACKET_OTHERHOST; break; default: - spin_unlock_bh(&sdata->mib_lock); pr_debug("invalid dest mode\n"); goto fail; } - spin_unlock_bh(&sdata->mib_lock); - skb->dev = sdata->dev; rc = mac802154_llsec_decrypt(&sdata->sec, skb); -- cgit v1.2.3 From be12a1fe298e8be04d5215364f94654dff81b0bc Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 21 May 2015 16:59:58 +0200 Subject: net: skbuff: add skb_append_pagefrags and use it Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +++ net/core/skbuff.c | 18 ++++++++++++++++++ net/ipv4/ip_output.c | 15 ++++----------- 3 files changed, 25 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b617095adb88..f708936cdd23 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -861,6 +861,9 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, int len, int odd, struct sk_buff *skb), void *from, int length); +int skb_append_pagefrags(struct sk_buff *skb, struct page *page, + int offset, size_t size); + struct skb_seq_state { __u32 lower_offset; __u32 upper_offset; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f3fe9bd9e672..4f2babeaf18d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2915,6 +2915,24 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, } EXPORT_SYMBOL(skb_append_datato_frags); +int skb_append_pagefrags(struct sk_buff *skb, struct page *page, + int offset, size_t size) +{ + int i = skb_shinfo(skb)->nr_frags; + + if (skb_can_coalesce(skb, i, page, offset)) { + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size); + } else if (i < MAX_SKB_FRAGS) { + get_page(page); + skb_fill_page_desc(skb, i, page, offset, size); + } else { + return -EMSGSIZE; + } + + return 0; +} +EXPORT_SYMBOL_GPL(skb_append_pagefrags); + /** * skb_pull_rcsum - pull skb and update receive checksum * @skb: buffer to update diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8d91b922fcfe..451b009dae75 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1233,11 +1233,9 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, } while (size > 0) { - int i; - - if (skb_is_gso(skb)) + if (skb_is_gso(skb)) { len = size; - else { + } else { /* Check if the remaining data fits into current packet. */ len = mtu - skb->len; @@ -1289,15 +1287,10 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, continue; } - i = skb_shinfo(skb)->nr_frags; if (len > size) len = size; - if (skb_can_coalesce(skb, i, page, offset)) { - skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len); - } else if (i < MAX_SKB_FRAGS) { - get_page(page); - skb_fill_page_desc(skb, i, page, offset, len); - } else { + + if (skb_append_pagefrags(skb, page, offset, len)) { err = -EMSGSIZE; goto error; } -- cgit v1.2.3 From a60e3cc7c92973a31fad0fd04dc5cf4355d3d1ef Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 21 May 2015 17:00:00 +0200 Subject: net: make skb_splice_bits more configureable Prepare skb_splice_bits to be able to deal with AF_UNIX sockets. AF_UNIX sockets don't use lock_sock/release_sock and thus we have to use a callback to make the locking and unlocking configureable. Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 11 +++++++++-- net/core/skbuff.c | 45 ++++++++++++++++++++++++++++----------------- net/ipv4/tcp.c | 5 +++-- 3 files changed, 40 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f708936cdd23..6b41c15efa27 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -35,6 +35,7 @@ #include #include #include +#include /* A. Checksumming of received packets by device. * @@ -2699,9 +2700,15 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, __wsum csum); -int skb_splice_bits(struct sk_buff *skb, unsigned int offset, +ssize_t skb_socket_splice(struct sock *sk, + struct pipe_inode_info *pipe, + struct splice_pipe_desc *spd); +int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int len, - unsigned int flags); + unsigned int flags, + ssize_t (*splice_cb)(struct sock *, + struct pipe_inode_info *, + struct splice_pipe_desc *)); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4f2babeaf18d..02769fa4f5c8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1870,15 +1870,39 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, return false; } +ssize_t skb_socket_splice(struct sock *sk, + struct pipe_inode_info *pipe, + struct splice_pipe_desc *spd) +{ + int ret; + + /* Drop the socket lock, otherwise we have reverse + * locking dependencies between sk_lock and i_mutex + * here as compared to sendfile(). We enter here + * with the socket lock held, and splice_to_pipe() will + * grab the pipe inode lock. For sendfile() emulation, + * we call into ->sendpage() with the i_mutex lock held + * and networking will grab the socket lock. + */ + release_sock(sk); + ret = splice_to_pipe(pipe, spd); + lock_sock(sk); + + return ret; +} + /* * Map data from the skb to a pipe. Should handle both the linear part, * the fragments, and the frag list. It does NOT handle frag lists within * the frag list, if such a thing exists. We'd probably need to recurse to * handle that cleanly. */ -int skb_splice_bits(struct sk_buff *skb, unsigned int offset, +int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int tlen, - unsigned int flags) + unsigned int flags, + ssize_t (*splice_cb)(struct sock *, + struct pipe_inode_info *, + struct splice_pipe_desc *)) { struct partial_page partial[MAX_SKB_FRAGS]; struct page *pages[MAX_SKB_FRAGS]; @@ -1891,7 +1915,6 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, .spd_release = sock_spd_release, }; struct sk_buff *frag_iter; - struct sock *sk = skb->sk; int ret = 0; /* @@ -1914,20 +1937,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, } done: - if (spd.nr_pages) { - /* - * Drop the socket lock, otherwise we have reverse - * locking dependencies between sk_lock and i_mutex - * here as compared to sendfile(). We enter here - * with the socket lock held, and splice_to_pipe() will - * grab the pipe inode lock. For sendfile() emulation, - * we call into ->sendpage() with the i_mutex lock held - * and networking will grab the socket lock. - */ - release_sock(sk); - ret = splice_to_pipe(pipe, &spd); - lock_sock(sk); - } + if (spd.nr_pages) + ret = splice_cb(sk, pipe, &spd); return ret; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 90afcec3f427..65f791f74845 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -695,8 +695,9 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, struct tcp_splice_state *tss = rd_desc->arg.data; int ret; - ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len), - tss->flags); + ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe, + min(rd_desc->count, len), tss->flags, + skb_socket_splice); if (ret > 0) rd_desc->count -= ret; return ret; -- cgit v1.2.3 From 286c2349f6665c3e67f464a5faa14a0e28be4842 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 22 May 2015 20:55:56 -0700 Subject: ipv6: Clean up ipv6_select_ident() and ip6_fragment() This patch changes the ipv6_select_ident() signature to return a fragment id instead of taking a whole frag_hdr as a param to only set the frag_hdr->identification. It also cleans up ip6_fragment() to obtain the fragment id at the beginning instead of using multiple "if" later to check fragment id has been generated or not. Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Cc: Steffen Klassert Cc: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ipv6.h | 3 +-- net/ipv6/ip6_output.c | 17 ++++++----------- net/ipv6/output_core.c | 5 ++--- 3 files changed, 9 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index aab8190d16e8..8c4f881edbd2 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -671,8 +671,7 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } -void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr, - struct rt6_info *rt); +u32 ipv6_select_ident(struct net *net, struct rt6_info *rt); void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); int ip6_dst_hoplimit(struct dst_entry *dst); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index bc09cb97b840..05e2cdf938bd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -551,7 +551,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, struct frag_hdr *fh; unsigned int mtu, hlen, left, len; int hroom, troom; - __be32 frag_id = 0; + __be32 frag_id; int ptr, offset = 0, err = 0; u8 *prevhdr, nexthdr = 0; struct net *net = dev_net(skb_dst(skb)->dev); @@ -584,6 +584,8 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, } mtu -= hlen + sizeof(struct frag_hdr); + frag_id = ipv6_select_ident(net, rt); + if (skb_has_frag_list(skb)) { int first_len = skb_pagelen(skb); struct sk_buff *frag2; @@ -632,11 +634,10 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, skb_reset_network_header(skb); memcpy(skb_network_header(skb), tmp_hdr, hlen); - ipv6_select_ident(net, fh, rt); fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(IP6_MF); - frag_id = fh->identification; + fh->identification = frag_id; first_len = skb_pagelen(skb); skb->data_len = first_len - skb_headlen(skb); @@ -778,11 +779,7 @@ slow_path: */ fh->nexthdr = nexthdr; fh->reserved = 0; - if (!frag_id) { - ipv6_select_ident(net, fh, rt); - frag_id = fh->identification; - } else - fh->identification = frag_id; + fh->identification = frag_id; /* * Copy a block of the IP datagram. @@ -1064,7 +1061,6 @@ static inline int ip6_ufo_append_data(struct sock *sk, { struct sk_buff *skb; - struct frag_hdr fhdr; int err; /* There is support for UDP large send offload by network @@ -1106,8 +1102,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - sizeof(struct frag_hdr)) & ~7; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - ipv6_select_ident(sock_net(sk), &fhdr, rt); - skb_shinfo(skb)->ip6_frag_id = fhdr.identification; + skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk), rt); append: return skb_append_datato_frags(sk, skb, getfrag, from, diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 85892af57364..ef0e2326496b 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -60,8 +60,7 @@ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); -void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr, - struct rt6_info *rt) +u32 ipv6_select_ident(struct net *net, struct rt6_info *rt) { static u32 ip6_idents_hashrnd __read_mostly; u32 id; @@ -70,7 +69,7 @@ void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr, id = __ipv6_select_ident(net, ip6_idents_hashrnd, &rt->rt6i_dst.addr, &rt->rt6i_src.addr); - fhdr->identification = htonl(id); + return htonl(id); } EXPORT_SYMBOL(ipv6_select_ident); -- cgit v1.2.3 From fd0273d7939f2ce3247f6aac5f6b9a0135d4cd39 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 22 May 2015 20:55:57 -0700 Subject: ipv6: Remove external dependency on rt6i_dst and rt6i_src This patch removes the assumptions that the returned rt is always a RTF_CACHE entry with the rt6i_dst and rt6i_src containing the destination and source address. The dst and src can be recovered from the calling site. We may consider to rename (rt6i_dst, rt6i_src) to (rt6i_key_dst, rt6i_key_src) later. Signed-off-by: Martin KaFai Lau Reviewed-by: Hannes Frederic Sowa Cc: Steffen Klassert Cc: Julian Anastasov Signed-off-by: David S. Miller --- drivers/scsi/cxgbi/libcxgbi.c | 2 +- include/net/ipv6.h | 4 +++- net/ipv6/icmp.c | 2 +- net/ipv6/ip6_output.c | 13 ++++++++----- net/ipv6/ndisc.c | 2 +- net/ipv6/output_core.c | 10 ++++++---- net/ipv6/tcp_ipv6.c | 2 +- net/netfilter/ipvs/ip_vs_xmit.c | 4 ++-- net/sctp/ipv6.c | 3 ++- 9 files changed, 25 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index eb58afcfb73b..45d30398d7c3 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -728,7 +728,7 @@ static struct cxgbi_sock *cxgbi_check_route6(struct sockaddr *dst_addr) } ndev = n->dev; - if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { + if (ipv6_addr_is_multicast(&daddr6->sin6_addr)) { pr_info("multi-cast route %pI6 port %u, dev %s.\n", daddr6->sin6_addr.s6_addr, ntohs(daddr6->sin6_port), ndev->name); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8c4f881edbd2..b950a2000b7f 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -671,7 +671,9 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } -u32 ipv6_select_ident(struct net *net, struct rt6_info *rt); +u32 ipv6_select_ident(struct net *net, + const struct in6_addr *daddr, + const struct in6_addr *saddr); void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); int ip6_dst_hoplimit(struct dst_entry *dst); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 2c2b5d51f15c..24b359d1425b 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -207,7 +207,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, struct inet_peer *peer; peer = inet_getpeer_v6(net->ipv6.peers, - &rt->rt6i_dst.addr, 1); + &fl6->daddr, 1); res = inet_peer_xrlim_allow(peer, tmo); if (peer) inet_putpeer(peer); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 05e2cdf938bd..3097f2c5e890 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -459,7 +459,7 @@ int ip6_forward(struct sk_buff *skb) else target = &hdr->daddr; - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); + peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1); /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) @@ -584,7 +584,8 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, } mtu -= hlen + sizeof(struct frag_hdr); - frag_id = ipv6_select_ident(net, rt); + frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, + &ipv6_hdr(skb)->saddr); if (skb_has_frag_list(skb)) { int first_len = skb_pagelen(skb); @@ -1057,7 +1058,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, int transhdrlen, int mtu, unsigned int flags, - struct rt6_info *rt) + const struct flowi6 *fl6) { struct sk_buff *skb; @@ -1102,7 +1103,9 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - sizeof(struct frag_hdr)) & ~7; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk), rt); + skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk), + &fl6->daddr, + &fl6->saddr); append: return skb_append_datato_frags(sk, skb, getfrag, from, @@ -1327,7 +1330,7 @@ emsgsize: (sk->sk_type == SOCK_DGRAM)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, - transhdrlen, mtu, flags, rt); + transhdrlen, mtu, flags, fl6); if (err) goto error; return 0; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 96f153c0846b..0a05b35a90fc 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1506,7 +1506,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) "Redirect: destination is not a neighbour\n"); goto release; } - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); + peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1); ret = inet_peer_xrlim_allow(peer, 1*HZ); if (peer) inet_putpeer(peer); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index ef0e2326496b..055e85cb7b65 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -10,7 +10,8 @@ #include static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, - struct in6_addr *dst, struct in6_addr *src) + const struct in6_addr *dst, + const struct in6_addr *src) { u32 hash, id; @@ -60,15 +61,16 @@ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); -u32 ipv6_select_ident(struct net *net, struct rt6_info *rt) +u32 ipv6_select_ident(struct net *net, + const struct in6_addr *daddr, + const struct in6_addr *saddr) { static u32 ip6_idents_hashrnd __read_mostly; u32 id; net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); - id = __ipv6_select_ident(net, ip6_idents_hashrnd, &rt->rt6i_dst.addr, - &rt->rt6i_src.addr); + id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr); return htonl(id); } EXPORT_SYMBOL(ipv6_select_ident); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fefe4455e1f3..1a9390a370c0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -262,7 +262,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, rt = (struct rt6_info *) dst; if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && - ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr)) + ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr)) tcp_fetch_timewait_stamp(sk, dst); icsk->icsk_ext_hdr_len = 0; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 19986ec5f21a..38f862728f75 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -781,7 +781,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* From world but DNAT to loopback address? */ if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && - ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { + ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) { IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): " "stopping DNAT to loopback address"); @@ -1346,7 +1346,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* From world but DNAT to loopback address? */ if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && - ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { + ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) { IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI6\n", __func__, &cp->daddr.in6); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e703ff7fed40..17a0120ae5a5 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -332,7 +332,8 @@ out: rt = (struct rt6_info *)dst; t->dst = dst; t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; - pr_debug("rt6_dst:%pI6 rt6_src:%pI6\n", &rt->rt6i_dst.addr, + pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n", + &rt->rt6i_dst.addr, rt->rt6i_dst.plen, &fl6->saddr); } else { t->dst = NULL; -- cgit v1.2.3 From 2647a9b07032c5a95ddee1fcb65d95bddbc6b7f9 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 22 May 2015 20:55:58 -0700 Subject: ipv6: Remove external dependency on rt6i_gateway and RTF_ANYCAST When creating a RTF_CACHE route, RTF_ANYCAST is set based on rt6i_dst. Also, rt6i_gateway is always set to the nexthop while the nexthop could be a gateway or the rt6i_dst.addr. After removing the rt6i_dst and rt6i_src dependency in the last patch, we also need to stop the caller from depending on rt6i_gateway and RTF_ANYCAST. Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Cc: Steffen Klassert Cc: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip6_route.h | 19 ++++++++++++++----- net/bluetooth/6lowpan.c | 2 +- net/ipv6/icmp.c | 4 ++-- net/ipv6/ip6_output.c | 5 +++-- net/ipv6/route.c | 6 +----- net/netfilter/nf_conntrack_h323_main.c | 4 ++-- net/netfilter/xt_addrtype.c | 2 +- 7 files changed, 24 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 5e192068e6cb..4caf7d697dee 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -163,11 +163,14 @@ static inline bool ipv6_unicast_destination(const struct sk_buff *skb) return rt->rt6i_flags & RTF_LOCAL; } -static inline bool ipv6_anycast_destination(const struct sk_buff *skb) +static inline bool ipv6_anycast_destination(const struct dst_entry *dst, + const struct in6_addr *daddr) { - struct rt6_info *rt = (struct rt6_info *) skb_dst(skb); + struct rt6_info *rt = (struct rt6_info *)dst; - return rt->rt6i_flags & RTF_ANYCAST; + return rt->rt6i_flags & RTF_ANYCAST || + (rt->rt6i_dst.plen != 128 && + ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); } int ip6_fragment(struct sock *sk, struct sk_buff *skb, @@ -194,9 +197,15 @@ static inline bool ip6_sk_ignore_df(const struct sock *sk) inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; } -static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt) +static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, + struct in6_addr *daddr) { - return &rt->rt6i_gateway; + if (rt->rt6i_flags & RTF_GATEWAY) + return &rt->rt6i_gateway; + else if (rt->rt6i_flags & RTF_CACHE) + return &rt->rt6i_dst.addr; + else + return daddr; } #endif diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 1742b849fcff..f3d6046c8ee7 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -192,7 +192,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_dev *dev, if (ipv6_addr_any(nexthop)) return NULL; } else { - nexthop = rt6_nexthop(rt); + nexthop = rt6_nexthop(rt, daddr); /* We need to remember the address because it is needed * by bt_xmit() when sending the packet. In bt_xmit(), the diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 24b359d1425b..713d7434c911 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -337,7 +337,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, * We won't send icmp if the destination is known * anycast. */ - if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { + if (ipv6_anycast_destination(dst, &fl6->daddr)) { net_dbg_ratelimited("icmp6_send: acast source\n"); dst_release(dst); return ERR_PTR(-EINVAL); @@ -564,7 +564,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (!ipv6_unicast_destination(skb) && !(net->ipv6.sysctl.anycast_src_echo_reply && - ipv6_anycast_destination(skb))) + ipv6_anycast_destination(skb_dst(skb), saddr))) saddr = NULL; memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3097f2c5e890..61741676b987 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt6_nexthop((struct rt6_info *)dst); + nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); @@ -934,7 +934,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, */ rt = (struct rt6_info *) *dst; rcu_read_lock_bh(); - n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); + n = __ipv6_neigh_lookup_noref(rt->dst.dev, + rt6_nexthop(rt, &fl6->daddr)); err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; rcu_read_unlock_bh(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0c889cb89cc3..ce3b1754b4f5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1945,11 +1945,7 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, if (rt->rt6i_idev) in6_dev_hold(rt->rt6i_idev); rt->dst.lastuse = jiffies; - - if (ort->rt6i_flags & RTF_GATEWAY) - rt->rt6i_gateway = ort->rt6i_gateway; - else - rt->rt6i_gateway = *dest; + rt->rt6i_gateway = ort->rt6i_gateway; rt->rt6i_flags = ort->rt6i_flags; rt6_set_from(rt, ort); rt->rt6i_metric = 0; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 1d69f5b9748f..9511af04dc81 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -779,8 +779,8 @@ static int callforward_do_filter(struct net *net, flowi6_to_flowi(&fl1), false)) { if (!afinfo->route(net, (struct dst_entry **)&rt2, flowi6_to_flowi(&fl2), false)) { - if (ipv6_addr_equal(rt6_nexthop(rt1), - rt6_nexthop(rt2)) && + if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr), + rt6_nexthop(rt2, &fl2.daddr)) && rt1->dst.dev == rt2->dst.dev) ret = 1; dst_release(&rt2->dst); diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index fab6eea1bf38..5b4743cc0436 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -73,7 +73,7 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, if (dev == NULL && rt->rt6i_flags & RTF_LOCAL) ret |= XT_ADDRTYPE_LOCAL; - if (rt->rt6i_flags & RTF_ANYCAST) + if (ipv6_anycast_destination((struct dst_entry *)rt, addr)) ret |= XT_ADDRTYPE_ANYCAST; dst_release(&rt->dst); -- cgit v1.2.3 From 45e4fd26683c9a5f88600d91b08a484f7f09226a Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 22 May 2015 20:56:00 -0700 Subject: ipv6: Only create RTF_CACHE routes after encountering pmtu exception This patch creates a RTF_CACHE routes only after encountering a pmtu exception. After ip6_rt_update_pmtu() has inserted the RTF_CACHE route to the fib6 tree, the rt->rt6i_node->fn_sernum is bumped which will fail the ip6_dst_check() and trigger a relookup. Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Cc: Steffen Klassert Cc: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- net/ipv6/ip6_fib.c | 1 + net/ipv6/route.c | 100 ++++++++++++++++++++++++------------------------ 3 files changed, 53 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 4caf7d697dee..784ee3d01dbf 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -202,7 +202,7 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, { if (rt->rt6i_flags & RTF_GATEWAY) return &rt->rt6i_gateway; - else if (rt->rt6i_flags & RTF_CACHE) + else if (unlikely(rt->rt6i_flags & RTF_CACHE)) return &rt->rt6i_dst.addr; else return daddr; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index bde57b113009..83341b3a248d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -738,6 +738,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, rt6_clean_expires(iter); else rt6_set_expires(iter, rt->dst.expires); + iter->rt6i_pmtu = rt->rt6i_pmtu; return -EEXIST; } /* If we have the same destination and the same metric, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f199d6357b31..e7ae2430dfed 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -873,16 +873,13 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, struct flowi6 *fl6, int flags) { struct fib6_node *fn, *saved_fn; - struct rt6_info *rt, *nrt; + struct rt6_info *rt; int strict = 0; - int attempts = 3; - int err; strict |= flags & RT6_LOOKUP_F_IFACE; if (net->ipv6.devconf_all->forwarding == 0) strict |= RT6_LOOKUP_F_REACHABLE; -redo_fib6_lookup_lock: read_lock_bh(&table->tb6_lock); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); @@ -901,46 +898,12 @@ redo_rt6_select: strict &= ~RT6_LOOKUP_F_REACHABLE; fn = saved_fn; goto redo_rt6_select; - } else { - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); - goto out2; } } dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (rt->rt6i_flags & RTF_CACHE) - goto out2; - - if (!rt6_is_gw_or_nonexthop(rt) || - !(rt->dst.flags & DST_HOST) || !(rt->rt6i_flags & RTF_LOCAL)) - nrt = ip6_rt_cache_alloc(rt, &fl6->daddr, &fl6->saddr); - else - goto out2; - - ip6_rt_put(rt); - rt = nrt ? : net->ipv6.ip6_null_entry; - - dst_hold(&rt->dst); - if (nrt) { - err = ip6_ins_rt(nrt); - if (!err) - goto out2; - } - - if (--attempts <= 0) - goto out2; - - /* - * Race condition! In the gap, when table->tb6_lock was - * released someone could insert this route. Relookup. - */ - ip6_rt_put(rt); - goto redo_fib6_lookup_lock; - -out2: rt6_dst_from_metrics_check(rt); rt->dst.lastuse = jiffies; rt->dst.__use++; @@ -1113,24 +1076,63 @@ static void ip6_link_failure(struct sk_buff *skb) } } -static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) +static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) +{ + struct net *net = dev_net(rt->dst.dev); + + rt->rt6i_flags |= RTF_MODIFIED; + rt->rt6i_pmtu = mtu; + rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); +} + +static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, + const struct ipv6hdr *iph, u32 mtu) { struct rt6_info *rt6 = (struct rt6_info *)dst; - dst_confirm(dst); - if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) { - struct net *net = dev_net(dst->dev); + if (rt6->rt6i_flags & RTF_LOCAL) + return; - rt6->rt6i_flags |= RTF_MODIFIED; - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; + dst_confirm(dst); + mtu = max_t(u32, mtu, IPV6_MIN_MTU); + if (mtu >= dst_mtu(dst)) + return; - rt6->rt6i_pmtu = mtu; - rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); + if (rt6->rt6i_flags & RTF_CACHE) { + rt6_do_update_pmtu(rt6, mtu); + } else { + const struct in6_addr *daddr, *saddr; + struct rt6_info *nrt6; + + if (iph) { + daddr = &iph->daddr; + saddr = &iph->saddr; + } else if (sk) { + daddr = &sk->sk_v6_daddr; + saddr = &inet6_sk(sk)->saddr; + } else { + return; + } + nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); + if (nrt6) { + rt6_do_update_pmtu(nrt6, mtu); + + /* ip6_ins_rt(nrt6) will bump the + * rt6->rt6i_node->fn_sernum + * which will fail the next rt6_check() and + * invalidate the sk->sk_dst_cache. + */ + ip6_ins_rt(nrt6); + } } } +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) +{ + __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); +} + void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, u32 mark) { @@ -1147,7 +1149,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); + __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_update_pmtu); -- cgit v1.2.3 From b197df4f0f3782782e9ea8996e91b65ae33e8dd9 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 22 May 2015 20:56:01 -0700 Subject: ipv6: Add rt6_get_cookie() function Instead of doing the rt6->rt6i_node check whenever we need to get the route's cookie. Refactor it into rt6_get_cookie(). It is a prep work to handle FLOWI_FLAG_KNOWN_NH and also percpu rt6_info later. Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Cc: Steffen Klassert Cc: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 5 +++++ include/net/ip6_route.h | 2 +- net/ipv6/ip6_tunnel.c | 2 +- net/ipv6/tcp_ipv6.c | 3 +-- net/ipv6/xfrm6_policy.c | 6 ++---- net/netfilter/ipvs/ip_vs_xmit.c | 2 +- net/sctp/ipv6.c | 2 +- 7 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index e00018011028..a4bece6797da 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -159,6 +159,11 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) rt0->rt6i_flags |= RTF_EXPIRES; } +static inline u32 rt6_get_cookie(const struct rt6_info *rt) +{ + return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; +} + static inline void ip6_rt_put(struct rt6_info *rt) { /* dst_release() accepts a NULL parameter. diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 784ee3d01dbf..297629aadb19 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -145,7 +145,7 @@ static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_IPV6_SUBTREES np->saddr_cache = saddr; #endif - np->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; + np->dst_cookie = rt6_get_cookie(rt); } static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5cafd92c2312..2e67b660118b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -151,7 +151,7 @@ EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *) dst; - t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; + t->dst_cookie = rt6_get_cookie(rt); dst_release(t->dst_cache); t->dst_cache = dst; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1a9390a370c0..7be3d858cbf0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -99,8 +99,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) dst_hold(dst); sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; - if (rt->rt6i_node) - inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; + inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); } } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 6ae256b4c55a..ed0583c1b9fc 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -76,8 +76,7 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, { if (dst->ops->family == AF_INET6) { struct rt6_info *rt = (struct rt6_info *)dst; - if (rt->rt6i_node) - path->path_cookie = rt->rt6i_node->fn_sernum; + path->path_cookie = rt6_get_cookie(rt); } path->u.rt6.rt6i_nfheader_len = nfheader_len; @@ -105,8 +104,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, RTF_LOCAL); xdst->u.rt6.rt6i_metric = rt->rt6i_metric; xdst->u.rt6.rt6i_node = rt->rt6i_node; - if (rt->rt6i_node) - xdst->route_cookie = rt->rt6i_node->fn_sernum; + xdst->route_cookie = rt6_get_cookie(rt); xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; xdst->u.rt6.rt6i_dst = rt->rt6i_dst; xdst->u.rt6.rt6i_src = rt->rt6i_src; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 38f862728f75..5eff9f6dcfb8 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -435,7 +435,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, goto err_unreach; } rt = (struct rt6_info *) dst; - cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; + cookie = rt6_get_cookie(rt); __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); spin_unlock_bh(&dest->dst_lock); IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 17a0120ae5a5..e917d27328ea 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -331,7 +331,7 @@ out: rt = (struct rt6_info *)dst; t->dst = dst; - t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; + t->dst_cookie = rt6_get_cookie(rt); pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n", &rt->rt6i_dst.addr, rt->rt6i_dst.plen, &fl6->saddr); -- cgit v1.2.3 From 3da59bd94583d1239e4fbdee452265a160b9cd71 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 22 May 2015 20:56:03 -0700 Subject: ipv6: Create RTF_CACHE clone when FLOWI_FLAG_KNOWN_NH is set This patch always creates RTF_CACHE clone with DST_NOCACHE when FLOWI_FLAG_KNOWN_NH is set so that the rt6i_dst is set to the fl6->daddr. Signed-off-by: Martin KaFai Lau Acked-by: Julian Anastasov Tested-by: Julian Anastasov Cc: Hannes Frederic Sowa Cc: Steffen Klassert Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 3 +++ net/ipv6/route.c | 59 ++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 52 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index a4bece6797da..5556111022eb 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -161,6 +161,9 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) static inline u32 rt6_get_cookie(const struct rt6_info *rt) { + if (unlikely(rt->dst.flags & DST_NOCACHE)) + rt = (struct rt6_info *)(rt->dst.from); + return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e7ae2430dfed..9e4c3c5d1591 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -901,13 +901,34 @@ redo_rt6_select: } } - dst_hold(&rt->dst); + dst_use(&rt->dst, jiffies); read_unlock_bh(&table->tb6_lock); - rt6_dst_from_metrics_check(rt); - rt->dst.lastuse = jiffies; - rt->dst.__use++; + if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) { + goto done; + } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && + !(rt->rt6i_flags & RTF_GATEWAY))) { + /* Create a RTF_CACHE clone which will not be + * owned by the fib6 tree. It is for the special case where + * the daddr in the skb during the neighbor look-up is different + * from the fl6->daddr used to look-up route here. + */ + + struct rt6_info *uncached_rt; + + uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); + dst_release(&rt->dst); + + if (uncached_rt) + uncached_rt->dst.flags |= DST_NOCACHE; + else + uncached_rt = net->ipv6.ip6_null_entry; + dst_hold(&uncached_rt->dst); + return uncached_rt; + } +done: + rt6_dst_from_metrics_check(rt); return rt; } @@ -1019,6 +1040,26 @@ static void rt6_dst_from_metrics_check(struct rt6_info *rt) dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true); } +static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) +{ + if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) + return NULL; + + if (rt6_check_expired(rt)) + return NULL; + + return &rt->dst; +} + +static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) +{ + if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && + rt6_check((struct rt6_info *)(rt->dst.from), cookie)) + return &rt->dst; + else + return NULL; +} + static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) { struct rt6_info *rt; @@ -1029,15 +1070,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) * DST_OBSOLETE_FORCE_CHK which forces validation calls down * into this function always. */ - if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) - return NULL; - - if (rt6_check_expired(rt)) - return NULL; rt6_dst_from_metrics_check(rt); - return dst; + if (unlikely(dst->flags & DST_NOCACHE)) + return rt6_dst_from_check(rt, cookie); + else + return rt6_check(rt, cookie); } static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) -- cgit v1.2.3 From 8d0b94afdca84598912347e61defa846a0988d04 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 22 May 2015 20:56:04 -0700 Subject: ipv6: Keep track of DST_NOCACHE routes in case of iface down/unregister This patch keeps track of the DST_NOCACHE routes in a list and replaces its dev with loopback during the iface down/unregister event. Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Cc: Steffen Klassert Cc: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 3 ++ net/ipv6/route.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 79 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 5556111022eb..cc8f03c10c43 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -120,6 +120,9 @@ struct rt6_info { struct rt6key rt6i_src; struct rt6key rt6i_prefsrc; + struct list_head rt6i_uncached; + struct uncached_list *rt6i_uncached_list; + struct inet6_dev *rt6i_idev; u32 rt6i_metric; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9e4c3c5d1591..94ce1e0b5a33 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -105,6 +105,67 @@ static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *gwaddr, int ifindex); #endif +struct uncached_list { + spinlock_t lock; + struct list_head head; +}; + +static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); + +static void rt6_uncached_list_add(struct rt6_info *rt) +{ + struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); + + rt->dst.flags |= DST_NOCACHE; + rt->rt6i_uncached_list = ul; + + spin_lock_bh(&ul->lock); + list_add_tail(&rt->rt6i_uncached, &ul->head); + spin_unlock_bh(&ul->lock); +} + +static void rt6_uncached_list_del(struct rt6_info *rt) +{ + if (!list_empty(&rt->rt6i_uncached)) { + struct uncached_list *ul = rt->rt6i_uncached_list; + + spin_lock_bh(&ul->lock); + list_del(&rt->rt6i_uncached); + spin_unlock_bh(&ul->lock); + } +} + +static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) +{ + struct net_device *loopback_dev = net->loopback_dev; + int cpu; + + for_each_possible_cpu(cpu) { + struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); + struct rt6_info *rt; + + spin_lock_bh(&ul->lock); + list_for_each_entry(rt, &ul->head, rt6i_uncached) { + struct inet6_dev *rt_idev = rt->rt6i_idev; + struct net_device *rt_dev = rt->dst.dev; + + if (rt_idev && (rt_idev->dev == dev || !dev) && + rt_idev->dev != loopback_dev) { + rt->rt6i_idev = in6_dev_get(loopback_dev); + in6_dev_put(rt_idev); + } + + if (rt_dev && (rt_dev == dev || !dev) && + rt_dev != loopback_dev) { + rt->dst.dev = loopback_dev; + dev_hold(rt->dst.dev); + dev_put(rt_dev); + } + } + spin_unlock_bh(&ul->lock); + } +} + static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) { struct rt6_info *rt = (struct rt6_info *)dst; @@ -262,6 +323,7 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); INIT_LIST_HEAD(&rt->rt6i_siblings); + INIT_LIST_HEAD(&rt->rt6i_uncached); } return rt; } @@ -269,11 +331,14 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; - struct inet6_dev *idev = rt->rt6i_idev; struct dst_entry *from = dst->from; + struct inet6_dev *idev; dst_destroy_metrics_generic(dst); + rt6_uncached_list_del(rt); + + idev = rt->rt6i_idev; if (idev) { rt->rt6i_idev = NULL; in6_dev_put(idev); @@ -920,7 +985,7 @@ redo_rt6_select: dst_release(&rt->dst); if (uncached_rt) - uncached_rt->dst.flags |= DST_NOCACHE; + rt6_uncached_list_add(uncached_rt); else uncached_rt = net->ipv6.ip6_null_entry; dst_hold(&uncached_rt->dst); @@ -2367,6 +2432,7 @@ void rt6_ifdown(struct net *net, struct net_device *dev) fib6_clean_all(net, fib6_ifdown, &adn); icmp6_clean_all(fib6_ifdown, &adn); + rt6_uncached_list_flush_dev(net, dev); } struct rt6_mtu_change_arg { @@ -3263,6 +3329,7 @@ static struct notifier_block ip6_route_dev_notifier = { int __init ip6_route_init(void) { int ret; + int cpu; ret = -ENOMEM; ip6_dst_ops_template.kmem_cachep = @@ -3322,6 +3389,13 @@ int __init ip6_route_init(void) if (ret) goto out_register_late_subsys; + for_each_possible_cpu(cpu) { + struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); + + INIT_LIST_HEAD(&ul->head); + spin_lock_init(&ul->lock); + } + out: return ret; -- cgit v1.2.3 From d52d3997f843ffefaa8d8462790ffcaca6c74192 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 22 May 2015 20:56:06 -0700 Subject: ipv6: Create percpu rt6_info After the patch 'ipv6: Only create RTF_CACHE routes after encountering pmtu exception', we need to compensate the performance hit (bouncing dst->__refcnt). Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Cc: Steffen Klassert Cc: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 3 +- include/uapi/linux/ipv6_route.h | 1 + net/ipv6/ip6_fib.c | 24 +++++++- net/ipv6/route.c | 132 +++++++++++++++++++++++++++++++++++----- 4 files changed, 142 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index cc8f03c10c43..3b76849c190f 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -124,6 +124,7 @@ struct rt6_info { struct uncached_list *rt6i_uncached_list; struct inet6_dev *rt6i_idev; + struct rt6_info * __percpu *rt6i_pcpu; u32 rt6i_metric; u32 rt6i_pmtu; @@ -164,7 +165,7 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) static inline u32 rt6_get_cookie(const struct rt6_info *rt) { - if (unlikely(rt->dst.flags & DST_NOCACHE)) + if (rt->rt6i_flags & RTF_PCPU || unlikely(rt->dst.flags & DST_NOCACHE)) rt = (struct rt6_info *)(rt->dst.from); return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h index 2be7bd174751..f6598d1c886e 100644 --- a/include/uapi/linux/ipv6_route.h +++ b/include/uapi/linux/ipv6_route.h @@ -34,6 +34,7 @@ #define RTF_PREF(pref) ((pref) << 27) #define RTF_PREF_MASK 0x18000000 +#define RTF_PCPU 0x40000000 #define RTF_LOCAL 0x80000000 diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 83341b3a248d..55d19861ab20 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -154,10 +154,32 @@ static void node_free(struct fib6_node *fn) kmem_cache_free(fib6_node_kmem, fn); } +static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) +{ + int cpu; + + if (!non_pcpu_rt->rt6i_pcpu) + return; + + for_each_possible_cpu(cpu) { + struct rt6_info **ppcpu_rt; + struct rt6_info *pcpu_rt; + + ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu); + pcpu_rt = *ppcpu_rt; + if (pcpu_rt) { + dst_free(&pcpu_rt->dst); + *ppcpu_rt = NULL; + } + } +} + static void rt6_release(struct rt6_info *rt) { - if (atomic_dec_and_test(&rt->rt6i_ref)) + if (atomic_dec_and_test(&rt->rt6i_ref)) { + rt6_free_pcpu(rt); dst_free(&rt->dst); + } } static void fib6_link_table(struct net *net, struct fib6_table *tb) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 90c8eaa24565..1a1122a6bbf5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -165,11 +165,18 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) } } +static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) +{ + return dst_metrics_write_ptr(rt->dst.from); +} + static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) { struct rt6_info *rt = (struct rt6_info *)dst; - if (rt->rt6i_flags & RTF_CACHE) + if (rt->rt6i_flags & RTF_PCPU) + return rt6_pcpu_cow_metrics(rt); + else if (rt->rt6i_flags & RTF_CACHE) return NULL; else return dst_cow_metrics_generic(dst, old); @@ -309,10 +316,10 @@ static const struct rt6_info ip6_blk_hole_entry_template = { #endif /* allocate dst with ip6_dst_ops */ -static inline struct rt6_info *ip6_dst_alloc(struct net *net, - struct net_device *dev, - int flags, - struct fib6_table *table) +static struct rt6_info *__ip6_dst_alloc(struct net *net, + struct net_device *dev, + int flags, + struct fib6_table *table) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0, DST_OBSOLETE_FORCE_CHK, flags); @@ -327,6 +334,34 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, return rt; } +static struct rt6_info *ip6_dst_alloc(struct net *net, + struct net_device *dev, + int flags, + struct fib6_table *table) +{ + struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table); + + if (rt) { + rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); + if (rt->rt6i_pcpu) { + int cpu; + + for_each_possible_cpu(cpu) { + struct rt6_info **p; + + p = per_cpu_ptr(rt->rt6i_pcpu, cpu); + /* no one shares rt */ + *p = NULL; + } + } else { + dst_destroy((struct dst_entry *)rt); + return NULL; + } + } + + return rt; +} + static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; @@ -335,6 +370,9 @@ static void ip6_dst_destroy(struct dst_entry *dst) dst_destroy_metrics_generic(dst); + if (rt->rt6i_pcpu) + free_percpu(rt->rt6i_pcpu); + rt6_uncached_list_del(rt); idev = rt->rt6i_idev; @@ -912,11 +950,11 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, * Clone the route. */ - if (ort->rt6i_flags & RTF_CACHE) + if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) ort = (struct rt6_info *)ort->dst.from; - rt = ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, - 0, ort->rt6i_table); + rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, + 0, ort->rt6i_table); if (!rt) return NULL; @@ -943,6 +981,54 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, return rt; } +static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) +{ + struct rt6_info *pcpu_rt; + + pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev), + rt->dst.dev, rt->dst.flags, + rt->rt6i_table); + + if (!pcpu_rt) + return NULL; + ip6_rt_copy_init(pcpu_rt, rt); + pcpu_rt->rt6i_protocol = rt->rt6i_protocol; + pcpu_rt->rt6i_flags |= RTF_PCPU; + return pcpu_rt; +} + +/* It should be called with read_lock_bh(&tb6_lock) acquired */ +static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) +{ + struct rt6_info *pcpu_rt, *prev, **p; + + p = this_cpu_ptr(rt->rt6i_pcpu); + pcpu_rt = *p; + + if (pcpu_rt) + goto done; + + pcpu_rt = ip6_rt_pcpu_alloc(rt); + if (!pcpu_rt) { + struct net *net = dev_net(rt->dst.dev); + + pcpu_rt = net->ipv6.ip6_null_entry; + goto done; + } + + prev = cmpxchg(p, NULL, pcpu_rt); + if (prev) { + /* If someone did it before us, return prev instead */ + dst_destroy(&pcpu_rt->dst); + pcpu_rt = prev; + } + +done: + dst_hold(&pcpu_rt->dst); + rt6_dst_from_metrics_check(pcpu_rt); + return pcpu_rt; +} + static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, int flags) { @@ -975,11 +1061,13 @@ redo_rt6_select: } } - dst_use(&rt->dst, jiffies); - read_unlock_bh(&table->tb6_lock); if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) { - goto done; + dst_use(&rt->dst, jiffies); + read_unlock_bh(&table->tb6_lock); + + rt6_dst_from_metrics_check(rt); + return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && !(rt->rt6i_flags & RTF_GATEWAY))) { /* Create a RTF_CACHE clone which will not be @@ -990,6 +1078,9 @@ redo_rt6_select: struct rt6_info *uncached_rt; + dst_use(&rt->dst, jiffies); + read_unlock_bh(&table->tb6_lock); + uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); dst_release(&rt->dst); @@ -997,13 +1088,22 @@ redo_rt6_select: rt6_uncached_list_add(uncached_rt); else uncached_rt = net->ipv6.ip6_null_entry; + dst_hold(&uncached_rt->dst); return uncached_rt; - } -done: - rt6_dst_from_metrics_check(rt); - return rt; + } else { + /* Get a percpu copy */ + + struct rt6_info *pcpu_rt; + + rt->dst.lastuse = jiffies; + rt->dst.__use++; + pcpu_rt = rt6_get_pcpu_route(rt); + read_unlock_bh(&table->tb6_lock); + + return pcpu_rt; + } } static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, @@ -1147,7 +1247,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt6_dst_from_metrics_check(rt); - if (unlikely(dst->flags & DST_NOCACHE)) + if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE)) return rt6_dst_from_check(rt, cookie); else return rt6_check(rt, cookie); -- cgit v1.2.3 From 7f1598678d4c05e3e085bf780a5ab3119637ac3c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 May 2015 16:02:21 -0700 Subject: ipv6: ipv6_select_ident() returns a __be32 ipv6_select_ident() returns a 32bit value in network order. Fixes: 286c2349f666 ("ipv6: Clean up ipv6_select_ident() and ip6_fragment()") Signed-off-by: Eric Dumazet Reported-by: kbuild test robot Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/net/ipv6.h | 6 +++--- net/ipv6/output_core.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index b950a2000b7f..35d485c78080 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -671,9 +671,9 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } -u32 ipv6_select_ident(struct net *net, - const struct in6_addr *daddr, - const struct in6_addr *saddr); +__be32 ipv6_select_ident(struct net *net, + const struct in6_addr *daddr, + const struct in6_addr *saddr); void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); int ip6_dst_hoplimit(struct dst_entry *dst); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 055e85cb7b65..21678acd4521 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -61,9 +61,9 @@ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); -u32 ipv6_select_ident(struct net *net, - const struct in6_addr *daddr, - const struct in6_addr *saddr) +__be32 ipv6_select_ident(struct net *net, + const struct in6_addr *daddr, + const struct in6_addr *saddr) { static u32 ip6_idents_hashrnd __read_mostly; u32 id; -- cgit v1.2.3 From cacce073bfd2b4091fbc58e906e7a9a21f538ff6 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Thu, 14 May 2015 23:05:49 +0200 Subject: bcma: add module_bcma_driver() This makes it possible to save some lines of code in drivers with an simple bcma driver registration. Signed-off-by: Hauke Mehrtens Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index e34f906647d3..2ff4a9961e1d 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -305,6 +305,15 @@ int __bcma_driver_register(struct bcma_driver *drv, struct module *owner); extern void bcma_driver_unregister(struct bcma_driver *drv); +/* module_bcma_driver() - Helper macro for drivers that don't do + * anything special in module init/exit. This eliminates a lot of + * boilerplate. Each module may only use this macro once, and + * calling it replaces module_init() and module_exit() + */ +#define module_bcma_driver(__bcma_driver) \ + module_driver(__bcma_driver, bcma_driver_register, \ + bcma_driver_unregister) + /* Set a fallback SPROM. * See kdoc at the function definition for complete documentation. */ extern int bcma_arch_register_fallback_sprom( -- cgit v1.2.3 From 80279fb7ba5b71981a60988b0307afa43f78f6b1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 May 2015 16:22:20 +0200 Subject: cfg80211: properly send NL80211_ATTR_DISCONNECTED_BY_AP in disconnect When we disconnect from the AP, drivers call cfg80211_disconnect(). This doesn't know whether the disconnection was initiated locally or by the AP though, which can cause problems with the supplicant, for example with WPS. This issue obviously doesn't show up with any mac80211 based driver since mac80211 doesn't call this function. Fix this by requiring drivers to indicate whether the disconnect is locally generated or not. I've tried to update the drivers, but may not have gotten the values correct, and some drivers may currently not be able to report correct values. In case of doubt I left it at false, which is the current behaviour. For libertas, make adjustments as indicated by Dan Williams. Reported-by: Matthieu Mauger Tested-by: Matthieu Mauger Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 4 ++-- drivers/net/wireless/ath/wil6210/main.c | 2 +- drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c | 4 ++-- drivers/net/wireless/libertas/cfg.c | 13 ++++++------- drivers/net/wireless/libertas/cfg.h | 3 ++- drivers/net/wireless/libertas/cmd.h | 3 ++- drivers/net/wireless/libertas/cmdresp.c | 13 ++++++++----- drivers/net/wireless/mwifiex/join.c | 2 +- drivers/net/wireless/mwifiex/sta_event.c | 2 +- drivers/net/wireless/rndis_wlan.c | 2 +- drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c | 2 +- drivers/staging/wlan-ng/cfg80211.c | 2 +- include/net/cfg80211.h | 4 +++- net/wireless/core.h | 1 + net/wireless/sme.c | 4 +++- net/wireless/util.c | 3 ++- 16 files changed, 37 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index cce4625a53ad..a511ef3614b9 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -889,7 +889,7 @@ void ath6kl_cfg80211_disconnect_event(struct ath6kl_vif *vif, u8 reason, GFP_KERNEL); } else if (vif->sme_state == SME_CONNECTED) { cfg80211_disconnected(vif->ndev, proto_reason, - NULL, 0, GFP_KERNEL); + NULL, 0, false, GFP_KERNEL); } vif->sme_state = SME_DISCONNECTED; @@ -3467,7 +3467,7 @@ void ath6kl_cfg80211_stop(struct ath6kl_vif *vif) GFP_KERNEL); break; case SME_CONNECTED: - cfg80211_disconnected(vif->ndev, 0, NULL, 0, GFP_KERNEL); + cfg80211_disconnected(vif->ndev, 0, NULL, 0, true, GFP_KERNEL); break; } diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index c2a238426425..38b953e108a7 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -224,7 +224,7 @@ static void _wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid, if (test_bit(wil_status_fwconnected, wil->status)) { clear_bit(wil_status_fwconnected, wil->status); cfg80211_disconnected(ndev, reason_code, - NULL, 0, GFP_KERNEL); + NULL, 0, false, GFP_KERNEL); } else if (test_bit(wil_status_fwconnecting, wil->status)) { cfg80211_connect_result(ndev, bssid, NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, diff --git a/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c index 8a15ebbce4a3..2e4e42245b8f 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c @@ -1262,7 +1262,7 @@ static void brcmf_link_down(struct brcmf_cfg80211_vif *vif, u16 reason) } clear_bit(BRCMF_VIF_STATUS_CONNECTED, &vif->sme_state); cfg80211_disconnected(vif->wdev.netdev, reason, NULL, 0, - GFP_KERNEL); + true, GFP_KERNEL); } clear_bit(BRCMF_VIF_STATUS_CONNECTING, &vif->sme_state); @@ -1928,7 +1928,7 @@ brcmf_cfg80211_disconnect(struct wiphy *wiphy, struct net_device *ndev, clear_bit(BRCMF_VIF_STATUS_CONNECTED, &ifp->vif->sme_state); clear_bit(BRCMF_VIF_STATUS_CONNECTING, &ifp->vif->sme_state); - cfg80211_disconnected(ndev, reason_code, NULL, 0, GFP_KERNEL); + cfg80211_disconnected(ndev, reason_code, NULL, 0, true, GFP_KERNEL); memcpy(&scbval.ea, &profile->bssid, ETH_ALEN); scbval.val = cpu_to_le32(reason_code); diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c index 1a4d558022d8..8317afd065b4 100644 --- a/drivers/net/wireless/libertas/cfg.c +++ b/drivers/net/wireless/libertas/cfg.c @@ -835,14 +835,13 @@ static int lbs_cfg_scan(struct wiphy *wiphy, * Events */ -void lbs_send_disconnect_notification(struct lbs_private *priv) +void lbs_send_disconnect_notification(struct lbs_private *priv, + bool locally_generated) { lbs_deb_enter(LBS_DEB_CFG80211); - cfg80211_disconnected(priv->dev, - 0, - NULL, 0, - GFP_KERNEL); + cfg80211_disconnected(priv->dev, 0, NULL, 0, locally_generated, + GFP_KERNEL); lbs_deb_leave(LBS_DEB_CFG80211); } @@ -1458,7 +1457,7 @@ int lbs_disconnect(struct lbs_private *priv, u16 reason) cfg80211_disconnected(priv->dev, reason, - NULL, 0, + NULL, 0, true, GFP_KERNEL); priv->connect_status = LBS_DISCONNECTED; @@ -2031,7 +2030,7 @@ static int lbs_leave_ibss(struct wiphy *wiphy, struct net_device *dev) ret = lbs_cmd_with_response(priv, CMD_802_11_AD_HOC_STOP, &cmd); /* TODO: consider doing this at MACREG_INT_CODE_ADHOC_BCN_LOST time */ - lbs_mac_event_disconnected(priv); + lbs_mac_event_disconnected(priv, true); lbs_deb_leave_args(LBS_DEB_CFG80211, "ret %d", ret); return ret; diff --git a/drivers/net/wireless/libertas/cfg.h b/drivers/net/wireless/libertas/cfg.h index 10995f59fe34..acccc2922401 100644 --- a/drivers/net/wireless/libertas/cfg.h +++ b/drivers/net/wireless/libertas/cfg.h @@ -10,7 +10,8 @@ struct wireless_dev *lbs_cfg_alloc(struct device *dev); int lbs_cfg_register(struct lbs_private *priv); void lbs_cfg_free(struct lbs_private *priv); -void lbs_send_disconnect_notification(struct lbs_private *priv); +void lbs_send_disconnect_notification(struct lbs_private *priv, + bool locally_generated); void lbs_send_mic_failureevent(struct lbs_private *priv, u32 event); void lbs_scan_done(struct lbs_private *priv); diff --git a/drivers/net/wireless/libertas/cmd.h b/drivers/net/wireless/libertas/cmd.h index 4279e8ab95f2..0c5444b02c64 100644 --- a/drivers/net/wireless/libertas/cmd.h +++ b/drivers/net/wireless/libertas/cmd.h @@ -68,7 +68,8 @@ int lbs_process_command_response(struct lbs_private *priv, u8 *data, u32 len); /* From cmdresp.c */ -void lbs_mac_event_disconnected(struct lbs_private *priv); +void lbs_mac_event_disconnected(struct lbs_private *priv, + bool locally_generated); diff --git a/drivers/net/wireless/libertas/cmdresp.c b/drivers/net/wireless/libertas/cmdresp.c index 65f18f1e869c..e5442e8956f7 100644 --- a/drivers/net/wireless/libertas/cmdresp.c +++ b/drivers/net/wireless/libertas/cmdresp.c @@ -19,10 +19,13 @@ * reset link state etc. * * @priv: A pointer to struct lbs_private structure + * @locally_generated: indicates disconnect was requested locally + * (usually by userspace) * * returns: n/a */ -void lbs_mac_event_disconnected(struct lbs_private *priv) +void lbs_mac_event_disconnected(struct lbs_private *priv, + bool locally_generated) { if (priv->connect_status != LBS_CONNECTED) return; @@ -36,7 +39,7 @@ void lbs_mac_event_disconnected(struct lbs_private *priv) msleep_interruptible(1000); if (priv->wdev->iftype == NL80211_IFTYPE_STATION) - lbs_send_disconnect_notification(priv); + lbs_send_disconnect_notification(priv, locally_generated); /* report disconnect to upper layer */ netif_stop_queue(priv->dev); @@ -229,17 +232,17 @@ int lbs_process_event(struct lbs_private *priv, u32 event) case MACREG_INT_CODE_DEAUTHENTICATED: lbs_deb_cmd("EVENT: deauthenticated\n"); - lbs_mac_event_disconnected(priv); + lbs_mac_event_disconnected(priv, false); break; case MACREG_INT_CODE_DISASSOCIATED: lbs_deb_cmd("EVENT: disassociated\n"); - lbs_mac_event_disconnected(priv); + lbs_mac_event_disconnected(priv, false); break; case MACREG_INT_CODE_LINK_LOST_NO_SCAN: lbs_deb_cmd("EVENT: link lost\n"); - lbs_mac_event_disconnected(priv); + lbs_mac_event_disconnected(priv, true); break; case MACREG_INT_CODE_PS_SLEEP: diff --git a/drivers/net/wireless/mwifiex/join.c b/drivers/net/wireless/mwifiex/join.c index 411a6c2f4aca..080ec3422db9 100644 --- a/drivers/net/wireless/mwifiex/join.c +++ b/drivers/net/wireless/mwifiex/join.c @@ -1421,7 +1421,7 @@ int mwifiex_deauthenticate(struct mwifiex_private *priv, u8 *mac) ret = mwifiex_deauthenticate_infra(priv, mac); if (ret) cfg80211_disconnected(priv->netdev, 0, NULL, 0, - GFP_KERNEL); + true, GFP_KERNEL); break; case NL80211_IFTYPE_ADHOC: return mwifiex_send_cmd(priv, HostCmd_CMD_802_11_AD_HOC_STOP, diff --git a/drivers/net/wireless/mwifiex/sta_event.c b/drivers/net/wireless/mwifiex/sta_event.c index 0dc7a1d3993d..c9064b88d6a4 100644 --- a/drivers/net/wireless/mwifiex/sta_event.c +++ b/drivers/net/wireless/mwifiex/sta_event.c @@ -133,7 +133,7 @@ mwifiex_reset_connect_state(struct mwifiex_private *priv, u16 reason_code) if (priv->bss_mode == NL80211_IFTYPE_STATION || priv->bss_mode == NL80211_IFTYPE_P2P_CLIENT) { cfg80211_disconnected(priv->netdev, reason_code, NULL, 0, - GFP_KERNEL); + false, GFP_KERNEL); } eth_zero_addr(priv->cfg_bssid); diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index d72ff8e7125d..43db6976102f 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -2861,7 +2861,7 @@ static void rndis_wlan_do_link_down_work(struct usbnet *usbdev) deauthenticate(usbdev); - cfg80211_disconnected(usbdev->net, 0, NULL, 0, GFP_KERNEL); + cfg80211_disconnected(usbdev->net, 0, NULL, 0, true, GFP_KERNEL); } netif_carrier_off(usbdev->net); diff --git a/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c index bc95ce89af06..5ab2f6978209 100644 --- a/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c @@ -379,7 +379,7 @@ void rtw_cfg80211_indicate_disconnect(struct rtw_adapter *padapter) GFP_ATOMIC); } else { cfg80211_disconnected(padapter->pnetdev, 0, NULL, - 0, GFP_ATOMIC); + 0, false, GFP_ATOMIC); } } } diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c index 7c87aecf4744..342e2b30c48f 100644 --- a/drivers/staging/wlan-ng/cfg80211.c +++ b/drivers/staging/wlan-ng/cfg80211.c @@ -722,7 +722,7 @@ void prism2_connect_result(wlandevice_t *wlandev, u8 failed) void prism2_disconnected(wlandevice_t *wlandev) { cfg80211_disconnected(wlandev->netdev, 0, NULL, - 0, GFP_KERNEL); + 0, false, GFP_KERNEL); } void prism2_roamed(wlandevice_t *wlandev) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d63ecec73090..a741678f24a2 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4575,13 +4575,15 @@ void cfg80211_roamed_bss(struct net_device *dev, struct cfg80211_bss *bss, * @ie: information elements of the deauth/disassoc frame (may be %NULL) * @ie_len: length of IEs * @reason: reason code for the disconnection, set it to 0 if unknown + * @locally_generated: disconnection was requested locally * @gfp: allocation flags * * After it calls this function, the driver should enter an idle state * and not try to connect to any AP any more. */ void cfg80211_disconnected(struct net_device *dev, u16 reason, - const u8 *ie, size_t ie_len, gfp_t gfp); + const u8 *ie, size_t ie_len, + bool locally_generated, gfp_t gfp); /** * cfg80211_ready_on_channel - notification of remain_on_channel start diff --git a/net/wireless/core.h b/net/wireless/core.h index 801cd49c5a0c..311eef26bf88 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -222,6 +222,7 @@ struct cfg80211_event { const u8 *ie; size_t ie_len; u16 reason; + bool locally_generated; } dc; struct { u8 bssid[ETH_ALEN]; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index d11454f87bac..8020b5b094d4 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -938,7 +938,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, } void cfg80211_disconnected(struct net_device *dev, u16 reason, - const u8 *ie, size_t ie_len, gfp_t gfp) + const u8 *ie, size_t ie_len, + bool locally_generated, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); @@ -954,6 +955,7 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, ev->dc.ie_len = ie_len; memcpy((void *)ev->dc.ie, ie, ie_len); ev->dc.reason = reason; + ev->dc.locally_generated = locally_generated; spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); diff --git a/net/wireless/util.c b/net/wireless/util.c index 70051ab52f4f..4cb34557b873 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -887,7 +887,8 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) case EVENT_DISCONNECTED: __cfg80211_disconnected(wdev->netdev, ev->dc.ie, ev->dc.ie_len, - ev->dc.reason, true); + ev->dc.reason, + !ev->dc.locally_generated); break; case EVENT_IBSS_JOINED: __cfg80211_ibss_joined(wdev->netdev, ev->ij.bssid, -- cgit v1.2.3 From ebddf1a8d78aa3436353fae75c4396e50cb2d6cf Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 26 May 2015 18:41:20 +0200 Subject: netfilter: nf_tables: allow to bind table to net_device This patch adds the internal NFT_AF_NEEDS_DEV flag to indicate that you must attach this table to a net_device. This change is required by the follow up patch that introduces the new netdev table. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 8 ++++++ include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 46 ++++++++++++++++++++++++++++---- 3 files changed, 51 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e6bcf55dcf20..3d6f48ca40a7 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -819,6 +819,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) * @name: name of the table + * @dev: this table is bound to this device (if any) */ struct nft_table { struct list_head list; @@ -828,6 +829,11 @@ struct nft_table { u32 use; u16 flags; char name[NFT_TABLE_MAXNAMELEN]; + struct net_device *dev; +}; + +enum nft_af_flags { + NFT_AF_NEEDS_DEV = (1 << 0), }; /** @@ -838,6 +844,7 @@ struct nft_table { * @nhooks: number of hooks in this family * @owner: module owner * @tables: used internally + * @flags: family flags * @nops: number of hook ops in this family * @hook_ops_init: initialization function for chain hook ops * @hooks: hookfn overrides for packet validation @@ -848,6 +855,7 @@ struct nft_af_info { unsigned int nhooks; struct module *owner; struct list_head tables; + u32 flags; unsigned int nops; void (*hook_ops_init)(struct nf_hook_ops *, unsigned int); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 5fa1cd04762e..89a671e0f5e7 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -146,12 +146,14 @@ enum nft_table_flags { * @NFTA_TABLE_NAME: name of the table (NLA_STRING) * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32) * @NFTA_TABLE_USE: number of chains in this table (NLA_U32) + * @NFTA_TABLE_DEV: net device name (NLA_STRING) */ enum nft_table_attributes { NFTA_TABLE_UNSPEC, NFTA_TABLE_NAME, NFTA_TABLE_FLAGS, NFTA_TABLE_USE, + NFTA_TABLE_DEV, __NFTA_TABLE_MAX }; #define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ad9d11fb29fd..2fd4e99dd074 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -399,6 +399,8 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, + [NFTA_TABLE_DEV] = { .type = NLA_STRING, + .len = IFNAMSIZ - 1 }, }; static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, @@ -423,6 +425,10 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use))) goto nla_put_failure; + if (table->dev && + nla_put_string(skb, NFTA_TABLE_DEV, table->dev->name)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; @@ -608,6 +614,11 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if (flags == ctx->table->flags) return 0; + if ((ctx->afi->flags & NFT_AF_NEEDS_DEV) && + ctx->nla[NFTA_TABLE_DEV] && + nla_strcmp(ctx->nla[NFTA_TABLE_DEV], ctx->table->dev->name)) + return -EOPNOTSUPP; + trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, sizeof(struct nft_trans_table)); if (trans == NULL) @@ -645,6 +656,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, struct nft_table *table; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct net_device *dev = NULL; u32 flags = 0; struct nft_ctx ctx; int err; @@ -679,30 +691,50 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, return -EINVAL; } + if (afi->flags & NFT_AF_NEEDS_DEV) { + char ifname[IFNAMSIZ]; + + if (!nla[NFTA_TABLE_DEV]) + return -EOPNOTSUPP; + + nla_strlcpy(ifname, nla[NFTA_TABLE_DEV], IFNAMSIZ); + dev = dev_get_by_name(net, ifname); + if (!dev) + return -ENOENT; + } else if (nla[NFTA_TABLE_DEV]) { + return -EOPNOTSUPP; + } + + err = -EAFNOSUPPORT; if (!try_module_get(afi->owner)) - return -EAFNOSUPPORT; + goto err1; err = -ENOMEM; table = kzalloc(sizeof(*table), GFP_KERNEL); if (table == NULL) - goto err1; + goto err2; nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN); INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); table->flags = flags; + table->dev = dev; nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); if (err < 0) - goto err2; + goto err3; list_add_tail_rcu(&table->list, &afi->tables); return 0; -err2: +err3: kfree(table); -err1: +err2: module_put(afi->owner); +err1: + if (dev != NULL) + dev_put(dev); + return err; } @@ -806,6 +838,9 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx) { BUG_ON(ctx->table->use > 0); + if (ctx->table->dev) + dev_put(ctx->table->dev); + kfree(ctx->table); module_put(ctx->afi->owner); } @@ -1361,6 +1396,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, ops->priority = priority; ops->priv = chain; ops->hook = afi->hooks[ops->hooknum]; + ops->dev = table->dev; if (hookfn) ops->hook = hookfn; if (afi->hook_ops_init) -- cgit v1.2.3 From ed6c4136f1571bd6ab362afc3410905a8a69ca42 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 26 May 2015 18:41:40 +0200 Subject: netfilter: nf_tables: add netdev table to filter from ingress This allows us to create netdev tables that contain ingress chains. Use skb_header_pointer() as we may see shared sk_buffs at this stage. This change provides access to the existing nf_tables features from the ingress hook. Signed-off-by: Pablo Neira Ayuso --- include/net/netns/nftables.h | 1 + net/netfilter/Kconfig | 5 ++ net/netfilter/Makefile | 1 + net/netfilter/nf_tables_netdev.c | 183 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 190 insertions(+) create mode 100644 net/netfilter/nf_tables_netdev.c (limited to 'include') diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index eee608b12cc9..c80781146019 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -13,6 +13,7 @@ struct netns_nftables { struct nft_af_info *inet; struct nft_af_info *arp; struct nft_af_info *bridge; + struct nft_af_info *netdev; unsigned int base_seq; u8 gencursor; }; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 9a89e7c67d78..bd5aaebaa9a2 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -456,6 +456,11 @@ config NF_TABLES_INET help This option enables support for a mixed IPv4/IPv6 "inet" table. +config NF_TABLES_NETDEV + tristate "Netfilter nf_tables netdev tables support" + help + This option enables support for the "netdev" table. + config NFT_EXTHDR tristate "Netfilter nf_tables IPv6 exthdr module" help diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index a87d8b8ec730..70d026d46fe7 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -75,6 +75,7 @@ nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o obj-$(CONFIG_NF_TABLES) += nf_tables.o obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o +obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c new file mode 100644 index 000000000000..04cb17057f46 --- /dev/null +++ b/net/netfilter/nf_tables_netdev.c @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2015 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +static inline void +nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt, + const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct iphdr *iph, _iph; + u32 len, thoff; + + nft_set_pktinfo(pkt, ops, skb, state); + + iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph), + &_iph); + if (!iph) + return; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return; + + len = ntohs(iph->tot_len); + thoff = iph->ihl * 4; + if (skb->len < len) + return; + else if (len < thoff) + return; + + pkt->tprot = iph->protocol; + pkt->xt.thoff = thoff; + pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET; +} + +static inline void +__nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, + const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6hdr *ip6h, _ip6h; + unsigned int thoff = 0; + unsigned short frag_off; + int protohdr; + u32 pkt_len; + + ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h), + &_ip6h); + if (!ip6h) + return; + + if (ip6h->version != 6) + return; + + pkt_len = ntohs(ip6h->payload_len); + if (pkt_len + sizeof(*ip6h) > skb->len) + return; + + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); + if (protohdr < 0) + return; + + pkt->tprot = protohdr; + pkt->xt.thoff = thoff; + pkt->xt.fragoff = frag_off; +#endif +} + +static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, + const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + nft_set_pktinfo(pkt, ops, skb, state); + __nft_netdev_set_pktinfo_ipv6(pkt, ops, skb, state); +} + +static unsigned int +nft_do_chain_netdev(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nft_pktinfo pkt; + + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + nft_netdev_set_pktinfo_ipv4(&pkt, ops, skb, state); + break; + case htons(ETH_P_IPV6): + nft_netdev_set_pktinfo_ipv6(&pkt, ops, skb, state); + break; + default: + nft_set_pktinfo(&pkt, ops, skb, state); + break; + } + + return nft_do_chain(&pkt, ops); +} + +static struct nft_af_info nft_af_netdev __read_mostly = { + .family = NFPROTO_NETDEV, + .nhooks = NF_NETDEV_NUMHOOKS, + .owner = THIS_MODULE, + .flags = NFT_AF_NEEDS_DEV, + .nops = 1, + .hooks = { + [NF_NETDEV_INGRESS] = nft_do_chain_netdev, + }, +}; + +static int nf_tables_netdev_init_net(struct net *net) +{ + net->nft.netdev = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.netdev == NULL) + return -ENOMEM; + + memcpy(net->nft.netdev, &nft_af_netdev, sizeof(nft_af_netdev)); + + if (nft_register_afinfo(net, net->nft.netdev) < 0) + goto err; + + return 0; +err: + kfree(net->nft.netdev); + return -ENOMEM; +} + +static void nf_tables_netdev_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.netdev); + kfree(net->nft.netdev); +} + +static struct pernet_operations nf_tables_netdev_net_ops = { + .init = nf_tables_netdev_init_net, + .exit = nf_tables_netdev_exit_net, +}; + +static const struct nf_chain_type nft_filter_chain_netdev = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_NETDEV, + .owner = THIS_MODULE, + .hook_mask = (1 << NF_NETDEV_INGRESS), +}; + +static int __init nf_tables_netdev_init(void) +{ + int ret; + + nft_register_chain_type(&nft_filter_chain_netdev); + ret = register_pernet_subsys(&nf_tables_netdev_net_ops); + if (ret < 0) + nft_unregister_chain_type(&nft_filter_chain_netdev); + + return ret; +} + +static void __exit nf_tables_netdev_exit(void) +{ + unregister_pernet_subsys(&nf_tables_netdev_net_ops); + nft_unregister_chain_type(&nft_filter_chain_netdev); +} + +module_init(nf_tables_netdev_init); +module_exit(nf_tables_netdev_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NFT_FAMILY(5); /* NFPROTO_NETDEV */ -- cgit v1.2.3 From d0997b44c9081071e39417b188f7db6d1ea37341 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Mon, 25 May 2015 15:38:33 +0300 Subject: ieee802154: Remove ieee802154_reduced_mlme_ops references. As there doesn't seem to be a definition of it or any users of it. Signed-off-by: Lennert Buytenhek Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/ieee802154_netdev.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 84a72a1382a8..0a87975128ec 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -430,10 +430,4 @@ ieee802154_mlme_ops(const struct net_device *dev) return dev->ml_priv; } -static inline struct ieee802154_reduced_mlme_ops * -ieee802154_reduced_mlme_ops(const struct net_device *dev) -{ - return dev->ml_priv; -} - #endif -- cgit v1.2.3 From 095dc8e0c3686d586a01a50abc3e1bb9ac633054 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 May 2015 07:55:34 -0700 Subject: tcp: fix/cleanup inet_ehash_locks_alloc() If tcp ehash table is constrained to a very small number of buckets (eg boot parameter thash_entries=128), then we can crash if spinlock array has more entries. While we are at it, un-inline inet_ehash_locks_alloc() and make following changes : - Budget 2 cache lines per cpu worth of 'spinlocks' - Try to kmalloc() the array to avoid extra TLB pressure. (Most servers at Google allocate 8192 bytes for this hash table) - Get rid of various #ifdef Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 47 +++---------------------------------------- net/ipv4/inet_hashtables.c | 31 ++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 774d24151d4a..b73c88a19dd4 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -24,7 +24,6 @@ #include #include #include -#include #include #include @@ -164,52 +163,12 @@ static inline spinlock_t *inet_ehash_lockp( return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask]; } -static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) -{ - unsigned int i, size = 256; -#if defined(CONFIG_PROVE_LOCKING) - unsigned int nr_pcpus = 2; -#else - unsigned int nr_pcpus = num_possible_cpus(); -#endif - if (nr_pcpus >= 4) - size = 512; - if (nr_pcpus >= 8) - size = 1024; - if (nr_pcpus >= 16) - size = 2048; - if (nr_pcpus >= 32) - size = 4096; - if (sizeof(spinlock_t) != 0) { -#ifdef CONFIG_NUMA - if (size * sizeof(spinlock_t) > PAGE_SIZE) - hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t)); - else -#endif - hashinfo->ehash_locks = kmalloc(size * sizeof(spinlock_t), - GFP_KERNEL); - if (!hashinfo->ehash_locks) - return ENOMEM; - for (i = 0; i < size; i++) - spin_lock_init(&hashinfo->ehash_locks[i]); - } - hashinfo->ehash_locks_mask = size - 1; - return 0; -} +int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo); static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) { - if (hashinfo->ehash_locks) { -#ifdef CONFIG_NUMA - unsigned int size = (hashinfo->ehash_locks_mask + 1) * - sizeof(spinlock_t); - if (size > PAGE_SIZE) - vfree(hashinfo->ehash_locks); - else -#endif - kfree(hashinfo->ehash_locks); - hashinfo->ehash_locks = NULL; - } + kvfree(hashinfo->ehash_locks); + hashinfo->ehash_locks = NULL; } struct inet_bind_bucket * diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 3766bddb3e8a..185efef0f125 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -609,3 +610,33 @@ void inet_hashinfo_init(struct inet_hashinfo *h) } } EXPORT_SYMBOL_GPL(inet_hashinfo_init); + +int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) +{ + unsigned int i, nblocks = 1; + + if (sizeof(spinlock_t) != 0) { + /* allocate 2 cache lines or at least one spinlock per cpu */ + nblocks = max_t(unsigned int, + 2 * L1_CACHE_BYTES / sizeof(spinlock_t), + 1); + nblocks = roundup_pow_of_two(nblocks * num_possible_cpus()); + + /* no more locks than number of hash buckets */ + nblocks = min(nblocks, hashinfo->ehash_mask + 1); + + hashinfo->ehash_locks = kmalloc_array(nblocks, sizeof(spinlock_t), + GFP_KERNEL | __GFP_NOWARN); + if (!hashinfo->ehash_locks) + hashinfo->ehash_locks = vmalloc(nblocks * sizeof(spinlock_t)); + + if (!hashinfo->ehash_locks) + return -ENOMEM; + + for (i = 0; i < nblocks; i++) + spin_lock_init(&hashinfo->ehash_locks[i]); + } + hashinfo->ehash_locks_mask = nblocks - 1; + return 0; +} +EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc); -- cgit v1.2.3 From e463d88c36d42211aa72ed76d32fb8bf37820ef1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 26 May 2015 12:19:58 -0700 Subject: net: phy: Add phy_interface_is_rgmii helper RGMII interfaces come in 4 different flavors that the PHY library needs to care about: regular RGMII (no delays), RGMII with either RX or TX delay, and both. In order to avoid errors of checking only for one type of RGMII interface and miss the 3 others, introduce a convenience function which tests for all values. Suggested-by: David S. Miller Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 701c7a3946e0..a26c3f84b8dd 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -677,6 +677,17 @@ static inline bool phy_is_internal(struct phy_device *phydev) return phydev->is_internal; } +/** + * phy_interface_is_rgmii - Convenience function for testing if a PHY interface + * is RGMII (all variants) + * @phydev: the phy_device struct + */ +static inline bool phy_interface_is_rgmii(struct phy_device *phydev) +{ + return phydev->interface >= PHY_INTERFACE_MODE_RGMII && + phydev->interface <= PHY_INTERFACE_MODE_RGMII_TXID; +} + /** * phy_write_mmd - Convenience function for writing a register * on an MMD on a given PHY. -- cgit v1.2.3 From 0f999b09f5c1b135e840501840dbcd01fad66f79 Mon Sep 17 00:00:00 2001 From: Varka Bhadram Date: Wed, 27 May 2015 09:10:54 +0530 Subject: ieee802154: add set transmit power support This patch adds transmission power setting support for IEEE-802.15.4 devices via nl802154. Signed-off-by: Varka Bhadram Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 1 + net/ieee802154/nl802154.c | 30 ++++++++++++++++++++++++++++++ net/ieee802154/rdev-ops.h | 12 ++++++++++++ net/ieee802154/trace.h | 15 +++++++++++++++ net/mac802154/cfg.c | 19 +++++++++++++++++++ 5 files changed, 77 insertions(+) (limited to 'include') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 4de59aa96173..2e3bb012c2ff 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -44,6 +44,7 @@ struct cfg802154_ops { int (*set_channel)(struct wpan_phy *wpan_phy, u8 page, u8 channel); int (*set_cca_mode)(struct wpan_phy *wpan_phy, const struct wpan_phy_cca *cca); + int (*set_tx_power)(struct wpan_phy *wpan_phy, s32 power); int (*set_pan_id)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 pan_id); int (*set_short_addr)(struct wpan_phy *wpan_phy, diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 212a795b33c9..5c9d524ba02c 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -783,6 +783,28 @@ static int nl802154_set_cca_mode(struct sk_buff *skb, struct genl_info *info) return rdev_set_cca_mode(rdev, &cca); } +static int nl802154_set_tx_power(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg802154_registered_device *rdev = info->user_ptr[0]; + s32 power; + int i; + + if (!(rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER)) + return -EOPNOTSUPP; + + if (!info->attrs[NL802154_ATTR_TX_POWER]) + return -EINVAL; + + power = nla_get_s32(info->attrs[NL802154_ATTR_TX_POWER]); + + for (i = 0; i < rdev->wpan_phy.supported.tx_powers_size; i++) { + if (power == rdev->wpan_phy.supported.tx_powers[i]) + return rdev_set_tx_power(rdev, power); + } + + return -EINVAL; +} + static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; @@ -1093,6 +1115,14 @@ static const struct genl_ops nl802154_ops[] = { .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, }, + { + .cmd = NL802154_CMD_SET_TX_POWER, + .doit = nl802154_set_tx_power, + .policy = nl802154_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | + NL802154_FLAG_NEED_RTNL, + }, { .cmd = NL802154_CMD_SET_PAN_ID, .doit = nl802154_set_pan_id, diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h index 7b5a9dd94fe5..36456118a6ec 100644 --- a/net/ieee802154/rdev-ops.h +++ b/net/ieee802154/rdev-ops.h @@ -74,6 +74,18 @@ rdev_set_cca_mode(struct cfg802154_registered_device *rdev, return ret; } +static inline int +rdev_set_tx_power(struct cfg802154_registered_device *rdev, + s32 power) +{ + int ret; + + trace_802154_rdev_set_tx_power(&rdev->wpan_phy, power); + ret = rdev->ops->set_tx_power(&rdev->wpan_phy, power); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; +} + static inline int rdev_set_pan_id(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, __le16 pan_id) diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h index 5ac25eb6ed17..1f1cecc420d7 100644 --- a/net/ieee802154/trace.h +++ b/net/ieee802154/trace.h @@ -93,6 +93,21 @@ TRACE_EVENT(802154_rdev_set_channel, __entry->page, __entry->channel) ); +TRACE_EVENT(802154_rdev_set_tx_power, + TP_PROTO(struct wpan_phy *wpan_phy, s32 power), + TP_ARGS(wpan_phy, power), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + __field(s32, power) + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + __entry->power = power; + ), + TP_printk(WPAN_PHY_PR_FMT ", power: %d", WPAN_PHY_PR_ARG, + __entry->power) +); + TRACE_EVENT(802154_rdev_set_cca_mode, TP_PROTO(struct wpan_phy *wpan_phy, const struct wpan_phy_cca *cca), TP_ARGS(wpan_phy, cca), diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c index d5290ea49dca..ab12fa296eb3 100644 --- a/net/mac802154/cfg.c +++ b/net/mac802154/cfg.c @@ -105,6 +105,24 @@ ieee802154_set_cca_mode(struct wpan_phy *wpan_phy, return ret; } +static int +ieee802154_set_tx_power(struct wpan_phy *wpan_phy, s32 power) +{ + struct ieee802154_local *local = wpan_phy_priv(wpan_phy); + int ret; + + ASSERT_RTNL(); + + if (wpan_phy->transmit_power == power) + return 0; + + ret = drv_set_tx_power(local, power); + if (!ret) + wpan_phy->transmit_power = power; + + return ret; +} + static int ieee802154_set_pan_id(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 pan_id) @@ -195,6 +213,7 @@ const struct cfg802154_ops mac802154_config_ops = { .del_virtual_intf = ieee802154_del_iface, .set_channel = ieee802154_set_channel, .set_cca_mode = ieee802154_set_cca_mode, + .set_tx_power = ieee802154_set_tx_power, .set_pan_id = ieee802154_set_pan_id, .set_short_addr = ieee802154_set_short_addr, .set_backoff_exponent = ieee802154_set_backoff_exponent, -- cgit v1.2.3 From d6b915e29f4adea94bc02ba7675bb4f84e6a1abd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 22 May 2015 16:32:51 +0200 Subject: ip_fragment: don't forward defragmented DF packet We currently always send fragments without DF bit set. Thus, given following setup: mtu1500 - mtu1500:1400 - mtu1400:1280 - mtu1280 A R1 R2 B Where R1 and R2 run linux with netfilter defragmentation/conntrack enabled, then if Host A sent a fragmented packet _with_ DF set to B, R1 will respond with icmp too big error if one of these fragments exceeded 1400 bytes. However, if R1 receives fragment sizes 1200 and 100, it would forward the reassembled packet without refragmenting, i.e. R2 will send an icmp error in response to a packet that was never sent, citing mtu that the original sender never exceeded. The other minor issue is that a refragmentation on R1 will conceal the MTU of R2-B since refragmentation does not set DF bit on the fragments. This modifies ip_fragment so that we track largest fragment size seen both for DF and non-DF packets, and set frag_max_size to the largest value. If the DF fragment size is larger or equal to the non-df one, we will consider the packet a path mtu probe: We set DF bit on the reassembled skb and also tag it with a new IPCB flag to force refragmentation even if skb fits outdev mtu. We will also set DF bit on each fragment in this case. Joint work with Hannes Frederic Sowa. Reported-by: Jesse Gross Signed-off-by: Florian Westphal Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 +- include/net/ip.h | 1 + net/ipv4/ip_fragment.c | 31 ++++++++++++++++++++++++++----- net/ipv4/ip_output.c | 12 ++++++++++-- 4 files changed, 38 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 8d1765577acc..e1300b3dd597 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -43,7 +43,7 @@ enum { * @len: total length of the original datagram * @meat: length of received fragments so far * @flags: fragment queue flags - * @max_size: (ipv4 only) maximum received fragment size with IP_DF set + * @max_size: maximum received fragment size * @net: namespace that this frag belongs to */ struct inet_frag_queue { diff --git a/include/net/ip.h b/include/net/ip.h index 7921a36b805c..9b976cf99122 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -45,6 +45,7 @@ struct inet_skb_parm { #define IPSKB_FRAG_COMPLETE BIT(3) #define IPSKB_REROUTED BIT(4) #define IPSKB_DOREDIRECT BIT(5) +#define IPSKB_FRAG_PMTU BIT(6) u16 frag_max_size; }; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 47fa64ee82b1..a50dc6d408d1 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -75,6 +75,7 @@ struct ipq { __be16 id; u8 protocol; u8 ecn; /* RFC3168 support */ + u16 max_df_size; /* largest frag with DF set seen */ int iif; unsigned int rid; struct inet_peer *peer; @@ -326,6 +327,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { struct sk_buff *prev, *next; struct net_device *dev; + unsigned int fragsize; int flags, offset; int ihl, end; int err = -ENOENT; @@ -481,9 +483,14 @@ found: if (offset == 0) qp->q.flags |= INET_FRAG_FIRST_IN; + fragsize = skb->len + ihl; + + if (fragsize > qp->q.max_size) + qp->q.max_size = fragsize; + if (ip_hdr(skb)->frag_off & htons(IP_DF) && - skb->len + ihl > qp->q.max_size) - qp->q.max_size = skb->len + ihl; + fragsize > qp->max_df_size) + qp->max_df_size = fragsize; if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && qp->q.meat == qp->q.len) { @@ -613,13 +620,27 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->next = NULL; head->dev = dev; head->tstamp = qp->q.stamp; - IPCB(head)->frag_max_size = qp->q.max_size; + IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size); iph = ip_hdr(head); - /* max_size != 0 implies at least one fragment had IP_DF set */ - iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0; iph->tot_len = htons(len); iph->tos |= ecn; + + /* When we set IP_DF on a refragmented skb we must also force a + * call to ip_fragment to avoid forwarding a DF-skb of size s while + * original sender only sent fragments of size f (where f < s). + * + * We only set DF/IPSKB_FRAG_PMTU if such DF fragment was the largest + * frag seen to avoid sending tiny DF-fragments in case skb was built + * from one very small df-fragment and one large non-df frag. + */ + if (qp->max_df_size == qp->q.max_size) { + IPCB(head)->flags |= IPSKB_FRAG_PMTU; + iph->frag_off = htons(IP_DF); + } else { + iph->frag_off = 0; + } + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; qp->q.fragments_tail = NULL; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d6dd8ba04441..f5f5ef1cebd5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -278,7 +278,7 @@ static int ip_finish_output(struct sock *sk, struct sk_buff *skb) if (skb_is_gso(skb)) return ip_finish_output_gso(sk, skb, mtu); - if (skb->len > mtu) + if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU)) return ip_fragment(sk, skb, mtu, ip_finish_output2); return ip_finish_output2(sk, skb); @@ -492,7 +492,10 @@ static int ip_fragment(struct sock *sk, struct sk_buff *skb, { struct iphdr *iph = ip_hdr(skb); - if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || + if ((iph->frag_off & htons(IP_DF)) == 0) + return ip_do_fragment(sk, skb, output); + + if (unlikely(!skb->ignore_df || (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size > mtu))) { struct rtable *rt = skb_rtable(skb); @@ -537,6 +540,8 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb, iph = ip_hdr(skb); mtu = ip_skb_dst_mtu(skb); + if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu) + mtu = IPCB(skb)->frag_max_size; /* * Setup starting values. @@ -732,6 +737,9 @@ slow_path: iph = ip_hdr(skb2); iph->frag_off = htons((offset >> 3)); + if (IPCB(skb)->flags & IPSKB_FRAG_PMTU) + iph->frag_off |= htons(IP_DF); + /* ANK: dirty, but effective trick. Upgrade options only if * the segment to be fragmented was THE FIRST (otherwise, * options are already fixed) and make it ONCE -- cgit v1.2.3 From b69644c1c72e179738dd5c7e52e99d8550189472 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 27 May 2015 13:42:10 +0200 Subject: nl802154: add support to set cca ed level This patch adds support for setting the current cca ed level value over nl802154. Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 1 + net/ieee802154/nl802154.c | 30 ++++++++++++++++++++++++++++++ net/ieee802154/rdev-ops.h | 11 +++++++++++ net/ieee802154/trace.h | 15 +++++++++++++++ net/mac802154/cfg.c | 19 +++++++++++++++++++ 5 files changed, 76 insertions(+) (limited to 'include') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 2e3bb012c2ff..290a9a69af07 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -44,6 +44,7 @@ struct cfg802154_ops { int (*set_channel)(struct wpan_phy *wpan_phy, u8 page, u8 channel); int (*set_cca_mode)(struct wpan_phy *wpan_phy, const struct wpan_phy_cca *cca); + int (*set_cca_ed_level)(struct wpan_phy *wpan_phy, s32 ed_level); int (*set_tx_power)(struct wpan_phy *wpan_phy, s32 power); int (*set_pan_id)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 pan_id); diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index a89d9d024126..7dbb1f4ce7df 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -790,6 +790,28 @@ static int nl802154_set_cca_mode(struct sk_buff *skb, struct genl_info *info) return rdev_set_cca_mode(rdev, &cca); } +static int nl802154_set_cca_ed_level(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg802154_registered_device *rdev = info->user_ptr[0]; + s32 ed_level; + int i; + + if (!(rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL)) + return -EOPNOTSUPP; + + if (!info->attrs[NL802154_ATTR_CCA_ED_LEVEL]) + return -EINVAL; + + ed_level = nla_get_s32(info->attrs[NL802154_ATTR_CCA_ED_LEVEL]); + + for (i = 0; i < rdev->wpan_phy.supported.cca_ed_levels_size; i++) { + if (ed_level == rdev->wpan_phy.supported.cca_ed_levels[i]) + return rdev_set_cca_ed_level(rdev, ed_level); + } + + return -EINVAL; +} + static int nl802154_set_tx_power(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; @@ -1122,6 +1144,14 @@ static const struct genl_ops nl802154_ops[] = { .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, }, + { + .cmd = NL802154_CMD_SET_CCA_ED_LEVEL, + .doit = nl802154_set_cca_ed_level, + .policy = nl802154_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | + NL802154_FLAG_NEED_RTNL, + }, { .cmd = NL802154_CMD_SET_TX_POWER, .doit = nl802154_set_tx_power, diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h index 36456118a6ec..b2155a123f6c 100644 --- a/net/ieee802154/rdev-ops.h +++ b/net/ieee802154/rdev-ops.h @@ -74,6 +74,17 @@ rdev_set_cca_mode(struct cfg802154_registered_device *rdev, return ret; } +static inline int +rdev_set_cca_ed_level(struct cfg802154_registered_device *rdev, s32 ed_level) +{ + int ret; + + trace_802154_rdev_set_cca_ed_level(&rdev->wpan_phy, ed_level); + ret = rdev->ops->set_cca_ed_level(&rdev->wpan_phy, ed_level); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; +} + static inline int rdev_set_tx_power(struct cfg802154_registered_device *rdev, s32 power) diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h index ac790bb1b4fe..73eb7605c1eb 100644 --- a/net/ieee802154/trace.h +++ b/net/ieee802154/trace.h @@ -123,6 +123,21 @@ TRACE_EVENT(802154_rdev_set_cca_mode, WPAN_CCA_PR_ARG) ); +TRACE_EVENT(802154_rdev_set_cca_ed_level, + TP_PROTO(struct wpan_phy *wpan_phy, s32 ed_level), + TP_ARGS(wpan_phy, ed_level), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + __field(s32, ed_level) + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + __entry->ed_level = ed_level; + ), + TP_printk(WPAN_PHY_PR_FMT ", ed_level: %d", WPAN_PHY_PR_ARG, + __entry->ed_level) +); + DECLARE_EVENT_CLASS(802154_le16_template, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 le16arg), diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c index ab12fa296eb3..317c4662e544 100644 --- a/net/mac802154/cfg.c +++ b/net/mac802154/cfg.c @@ -105,6 +105,24 @@ ieee802154_set_cca_mode(struct wpan_phy *wpan_phy, return ret; } +static int +ieee802154_set_cca_ed_level(struct wpan_phy *wpan_phy, s32 ed_level) +{ + struct ieee802154_local *local = wpan_phy_priv(wpan_phy); + int ret; + + ASSERT_RTNL(); + + if (wpan_phy->cca_ed_level == ed_level) + return 0; + + ret = drv_set_cca_ed_level(local, ed_level); + if (!ret) + wpan_phy->cca_ed_level = ed_level; + + return ret; +} + static int ieee802154_set_tx_power(struct wpan_phy *wpan_phy, s32 power) { @@ -213,6 +231,7 @@ const struct cfg802154_ops mac802154_config_ops = { .del_virtual_intf = ieee802154_del_iface, .set_channel = ieee802154_set_channel, .set_cca_mode = ieee802154_set_cca_mode, + .set_cca_ed_level = ieee802154_set_cca_ed_level, .set_tx_power = ieee802154_set_tx_power, .set_pan_id = ieee802154_set_pan_id, .set_short_addr = ieee802154_set_short_addr, -- cgit v1.2.3 From e5c4708b2b6fec0300db60ee3cf4b4ae96430a12 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 26 May 2015 19:20:14 -0700 Subject: pci: Add Cavium PCI vendor id This vendor id will be used by network (vNIC), USB (xHCI), SATA (AHCI), GPIO, I2C, MMC and maybe other drivers for ThunderX SoC. Acked-by: Bjorn Helgaas Signed-off-by: Sunil Goutham Signed-off-by: Aleksey Makarov Signed-off-by: David S. Miller --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2f7b9a40f627..2972c7f3aa1d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2329,6 +2329,8 @@ #define PCI_DEVICE_ID_ALTIMA_AC9100 0x03ea #define PCI_DEVICE_ID_ALTIMA_AC1003 0x03eb +#define PCI_VENDOR_ID_CAVIUM 0x177d + #define PCI_VENDOR_ID_BELKIN 0x1799 #define PCI_DEVICE_ID_BELKIN_F5D7010V7 0x701f -- cgit v1.2.3 From ed2dfd900992aa7b6b3d0abd8ec9a7e9d2c7f827 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 May 2015 11:34:37 -0700 Subject: tcp/dccp: warn user for preferred ip_local_port_range After commit 07f4c90062f8f ("tcp/dccp: try to not exhaust ip_local_port_range in connect()") it is advised to have an even number of ports described in /proc/sys/net/ipv4/ip_local_port_range This means start/end values should have a different parity. Let's warn sysadmins of this, so that they can update their settings if they want to. Suggested-by: David S. Miller Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + net/ipv4/sysctl_net_ipv4.c | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 6848b8bb2e63..c68926b4899c 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -19,6 +19,7 @@ struct sock; struct local_ports { seqlock_t lock; int range[2]; + bool warned; }; struct ping_group_range { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e64892769607..0330ab2e2b63 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -45,7 +45,13 @@ static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; /* Update system visible IP port range */ static void set_local_port_range(struct net *net, int range[2]) { + bool same_parity = !((range[0] ^ range[1]) & 1); + write_seqlock(&net->ipv4.ip_local_ports.lock); + if (same_parity && !net->ipv4.ip_local_ports.warned) { + net->ipv4.ip_local_ports.warned = true; + pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n"); + } net->ipv4.ip_local_ports.range[0] = range[0]; net->ipv4.ip_local_ports.range[1] = range[1]; write_sequnlock(&net->ipv4.ip_local_ports.lock); -- cgit v1.2.3 From 8cf6f497de405ca9eb87ffb34d90699962d10125 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 26 May 2015 08:22:49 -0700 Subject: ethtool: Add helper routines to pass vf to rx_flow_spec The ring_cookie is 64 bits wide which is much larger than can be used for actual queue index values. So provide some helper routines to pack a VF index into the cookie. This is useful to steer packets to a VF ring without having to know the queue layout of the device. CC: Alex Duyck Signed-off-by: John Fastabend Signed-off-by: Jeff Kirsher --- include/uapi/linux/ethtool.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index ae832b45b44c..0594933cdf55 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -796,6 +796,31 @@ struct ethtool_rx_flow_spec { __u32 location; }; +/* How rings are layed out when accessing virtual functions or + * offloaded queues is device specific. To allow users to do flow + * steering and specify these queues the ring cookie is partitioned + * into a 32bit queue index with an 8 bit virtual function id. + * This also leaves the 3bytes for further specifiers. It is possible + * future devices may support more than 256 virtual functions if + * devices start supporting PCIe w/ARI. However at the moment I + * do not know of any devices that support this so I do not reserve + * space for this at this time. If a future patch consumes the next + * byte it should be aware of this possiblity. + */ +#define ETHTOOL_RX_FLOW_SPEC_RING 0x00000000FFFFFFFFLL +#define ETHTOOL_RX_FLOW_SPEC_RING_VF 0x000000FF00000000LL +#define ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF 32 +static inline __u64 ethtool_get_flow_spec_ring(__u64 ring_cookie) +{ + return ETHTOOL_RX_FLOW_SPEC_RING & ring_cookie; +}; + +static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie) +{ + return (ETHTOOL_RX_FLOW_SPEC_RING_VF & ring_cookie) >> + ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF; +}; + /** * struct ethtool_rxnfc - command to get or set RX flow classification rules * @cmd: Specific command number - %ETHTOOL_GRXFH, %ETHTOOL_SRXFH, -- cgit v1.2.3 From f7b81d67d0547c6ce246c89900eaa41303a3f89a Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 27 May 2015 11:02:46 -0700 Subject: clk: qcom: Add support for NSS/GMAC clocks and resets Add the NSS/GMAC clocks and the TCM clock and NSS resets. Signed-off-by: Stephen Boyd Signed-off-by: Mathieu Olivari Signed-off-by: David S. Miller --- drivers/clk/qcom/gcc-ipq806x.c | 594 ++++++++++++++++++++++++++- include/dt-bindings/clock/qcom,gcc-ipq806x.h | 2 + include/dt-bindings/reset/qcom,gcc-ipq806x.h | 43 ++ 3 files changed, 638 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/clk/qcom/gcc-ipq806x.c b/drivers/clk/qcom/gcc-ipq806x.c index a50936a17376..563969942a1d 100644 --- a/drivers/clk/qcom/gcc-ipq806x.c +++ b/drivers/clk/qcom/gcc-ipq806x.c @@ -140,12 +140,47 @@ static struct clk_regmap pll14_vote = { }, }; +#define NSS_PLL_RATE(f, _l, _m, _n, i) \ + { \ + .freq = f, \ + .l = _l, \ + .m = _m, \ + .n = _n, \ + .ibits = i, \ + } + +static struct pll_freq_tbl pll18_freq_tbl[] = { + NSS_PLL_RATE(550000000, 44, 0, 1, 0x01495625), + NSS_PLL_RATE(733000000, 58, 16, 25, 0x014b5625), +}; + +static struct clk_pll pll18 = { + .l_reg = 0x31a4, + .m_reg = 0x31a8, + .n_reg = 0x31ac, + .config_reg = 0x31b4, + .mode_reg = 0x31a0, + .status_reg = 0x31b8, + .status_bit = 16, + .post_div_shift = 16, + .post_div_width = 1, + .freq_tbl = pll18_freq_tbl, + .clkr.hw.init = &(struct clk_init_data){ + .name = "pll18", + .parent_names = (const char *[]){ "pxo" }, + .num_parents = 1, + .ops = &clk_pll_ops, + }, +}; + enum { P_PXO, P_PLL8, P_PLL3, P_PLL0, P_CXO, + P_PLL14, + P_PLL18, }; static const struct parent_map gcc_pxo_pll8_map[] = { @@ -197,6 +232,22 @@ static const char *gcc_pxo_pll8_pll0_map[] = { "pll0_vote", }; +static const struct parent_map gcc_pxo_pll8_pll14_pll18_pll0_map[] = { + { P_PXO, 0 }, + { P_PLL8, 4 }, + { P_PLL0, 2 }, + { P_PLL14, 5 }, + { P_PLL18, 1 } +}; + +static const char *gcc_pxo_pll8_pll14_pll18_pll0[] = { + "pxo", + "pll8_vote", + "pll0_vote", + "pll14", + "pll18", +}; + static struct freq_tbl clk_tbl_gsbi_uart[] = { { 1843200, P_PLL8, 2, 6, 625 }, { 3686400, P_PLL8, 2, 12, 625 }, @@ -2202,6 +2253,472 @@ static struct clk_branch ebi2_aon_clk = { }, }; +static const struct freq_tbl clk_tbl_gmac[] = { + { 133000000, P_PLL0, 1, 50, 301 }, + { 266000000, P_PLL0, 1, 127, 382 }, + { } +}; + +static struct clk_dyn_rcg gmac_core1_src = { + .ns_reg[0] = 0x3cac, + .ns_reg[1] = 0x3cb0, + .md_reg[0] = 0x3ca4, + .md_reg[1] = 0x3ca8, + .bank_reg = 0x3ca0, + .mn[0] = { + .mnctr_en_bit = 8, + .mnctr_reset_bit = 7, + .mnctr_mode_shift = 5, + .n_val_shift = 16, + .m_val_shift = 16, + .width = 8, + }, + .mn[1] = { + .mnctr_en_bit = 8, + .mnctr_reset_bit = 7, + .mnctr_mode_shift = 5, + .n_val_shift = 16, + .m_val_shift = 16, + .width = 8, + }, + .s[0] = { + .src_sel_shift = 0, + .parent_map = gcc_pxo_pll8_pll14_pll18_pll0_map, + }, + .s[1] = { + .src_sel_shift = 0, + .parent_map = gcc_pxo_pll8_pll14_pll18_pll0_map, + }, + .p[0] = { + .pre_div_shift = 3, + .pre_div_width = 2, + }, + .p[1] = { + .pre_div_shift = 3, + .pre_div_width = 2, + }, + .mux_sel_bit = 0, + .freq_tbl = clk_tbl_gmac, + .clkr = { + .enable_reg = 0x3ca0, + .enable_mask = BIT(1), + .hw.init = &(struct clk_init_data){ + .name = "gmac_core1_src", + .parent_names = gcc_pxo_pll8_pll14_pll18_pll0, + .num_parents = 5, + .ops = &clk_dyn_rcg_ops, + }, + }, +}; + +static struct clk_branch gmac_core1_clk = { + .halt_reg = 0x3c20, + .halt_bit = 4, + .hwcg_reg = 0x3cb4, + .hwcg_bit = 6, + .clkr = { + .enable_reg = 0x3cb4, + .enable_mask = BIT(4), + .hw.init = &(struct clk_init_data){ + .name = "gmac_core1_clk", + .parent_names = (const char *[]){ + "gmac_core1_src", + }, + .num_parents = 1, + .ops = &clk_branch_ops, + .flags = CLK_SET_RATE_PARENT, + }, + }, +}; + +static struct clk_dyn_rcg gmac_core2_src = { + .ns_reg[0] = 0x3ccc, + .ns_reg[1] = 0x3cd0, + .md_reg[0] = 0x3cc4, + .md_reg[1] = 0x3cc8, + .bank_reg = 0x3ca0, + .mn[0] = { + .mnctr_en_bit = 8, + .mnctr_reset_bit = 7, + .mnctr_mode_shift = 5, + .n_val_shift = 16, + .m_val_shift = 16, + .width = 8, + }, + .mn[1] = { + .mnctr_en_bit = 8, + .mnctr_reset_bit = 7, + .mnctr_mode_shift = 5, + .n_val_shift = 16, + .m_val_shift = 16, + .width = 8, + }, + .s[0] = { + .src_sel_shift = 0, + .parent_map = gcc_pxo_pll8_pll14_pll18_pll0_map, + }, + .s[1] = { + .src_sel_shift = 0, + .parent_map = gcc_pxo_pll8_pll14_pll18_pll0_map, + }, + .p[0] = { + .pre_div_shift = 3, + .pre_div_width = 2, + }, + .p[1] = { + .pre_div_shift = 3, + .pre_div_width = 2, + }, + .mux_sel_bit = 0, + .freq_tbl = clk_tbl_gmac, + .clkr = { + .enable_reg = 0x3cc0, + .enable_mask = BIT(1), + .hw.init = &(struct clk_init_data){ + .name = "gmac_core2_src", + .parent_names = gcc_pxo_pll8_pll14_pll18_pll0, + .num_parents = 5, + .ops = &clk_dyn_rcg_ops, + }, + }, +}; + +static struct clk_branch gmac_core2_clk = { + .halt_reg = 0x3c20, + .halt_bit = 5, + .hwcg_reg = 0x3cd4, + .hwcg_bit = 6, + .clkr = { + .enable_reg = 0x3cd4, + .enable_mask = BIT(4), + .hw.init = &(struct clk_init_data){ + .name = "gmac_core2_clk", + .parent_names = (const char *[]){ + "gmac_core2_src", + }, + .num_parents = 1, + .ops = &clk_branch_ops, + .flags = CLK_SET_RATE_PARENT, + }, + }, +}; + +static struct clk_dyn_rcg gmac_core3_src = { + .ns_reg[0] = 0x3cec, + .ns_reg[1] = 0x3cf0, + .md_reg[0] = 0x3ce4, + .md_reg[1] = 0x3ce8, + .bank_reg = 0x3ce0, + .mn[0] = { + .mnctr_en_bit = 8, + .mnctr_reset_bit = 7, + .mnctr_mode_shift = 5, + .n_val_shift = 16, + .m_val_shift = 16, + .width = 8, + }, + .mn[1] = { + .mnctr_en_bit = 8, + .mnctr_reset_bit = 7, + .mnctr_mode_shift = 5, + .n_val_shift = 16, + .m_val_shift = 16, + .width = 8, + }, + .s[0] = { + .src_sel_shift = 0, + .parent_map = gcc_pxo_pll8_pll14_pll18_pll0_map, + }, + .s[1] = { + .src_sel_shift = 0, + .parent_map = gcc_pxo_pll8_pll14_pll18_pll0_map, + }, + .p[0] = { + .pre_div_shift = 3, + .pre_div_width = 2, + }, + .p[1] = { + .pre_div_shift = 3, + .pre_div_width = 2, + }, + .mux_sel_bit = 0, + .freq_tbl = clk_tbl_gmac, + .clkr = { + .enable_reg = 0x3ce0, + .enable_mask = BIT(1), + .hw.init = &(struct clk_init_data){ + .name = "gmac_core3_src", + .parent_names = gcc_pxo_pll8_pll14_pll18_pll0, + .num_parents = 5, + .ops = &clk_dyn_rcg_ops, + }, + }, +}; + +static struct clk_branch gmac_core3_clk = { + .halt_reg = 0x3c20, + .halt_bit = 6, + .hwcg_reg = 0x3cf4, + .hwcg_bit = 6, + .clkr = { + .enable_reg = 0x3cf4, + .enable_mask = BIT(4), + .hw.init = &(struct clk_init_data){ + .name = "gmac_core3_clk", + .parent_names = (const char *[]){ + "gmac_core3_src", + }, + .num_parents = 1, + .ops = &clk_branch_ops, + .flags = CLK_SET_RATE_PARENT, + }, + }, +}; + +static struct clk_dyn_rcg gmac_core4_src = { + .ns_reg[0] = 0x3d0c, + .ns_reg[1] = 0x3d10, + .md_reg[0] = 0x3d04, + .md_reg[1] = 0x3d08, + .bank_reg = 0x3d00, + .mn[0] = { + .mnctr_en_bit = 8, + .mnctr_reset_bit = 7, + .mnctr_mode_shift = 5, + .n_val_shift = 16, + .m_val_shift = 16, + .width = 8, + }, + .mn[1] = { + .mnctr_en_bit = 8, + .mnctr_reset_bit = 7, + .mnctr_mode_shift = 5, + .n_val_shift = 16, + .m_val_shift = 16, + .width = 8, + }, + .s[0] = { + .src_sel_shift = 0, + .parent_map = gcc_pxo_pll8_pll14_pll18_pll0_map, + }, + .s[1] = { + .src_sel_shift = 0, + .parent_map = gcc_pxo_pll8_pll14_pll18_pll0_map, + }, + .p[0] = { + .pre_div_shift = 3, + .pre_div_width = 2, + }, + .p[1] = { + .pre_div_shift = 3, + .pre_div_width = 2, + }, + .mux_sel_bit = 0, + .freq_tbl = clk_tbl_gmac, + .clkr = { + .enable_reg = 0x3d00, + .enable_mask = BIT(1), + .hw.init = &(struct clk_init_data){ + .name = "gmac_core4_src", + .parent_names = gcc_pxo_pll8_pll14_pll18_pll0, + .num_parents = 5, + .ops = &clk_dyn_rcg_ops, + }, + }, +}; + +static struct clk_branch gmac_core4_clk = { + .halt_reg = 0x3c20, + .halt_bit = 7, + .hwcg_reg = 0x3d14, + .hwcg_bit = 6, + .clkr = { + .enable_reg = 0x3d14, + .enable_mask = BIT(4), + .hw.init = &(struct clk_init_data){ + .name = "gmac_core4_clk", + .parent_names = (const char *[]){ + "gmac_core4_src", + }, + .num_parents = 1, + .ops = &clk_branch_ops, + .flags = CLK_SET_RATE_PARENT, + }, + }, +}; + +static const struct freq_tbl clk_tbl_nss_tcm[] = { + { 266000000, P_PLL0, 3, 0, 0 }, + { 400000000, P_PLL0, 2, 0, 0 }, + { } +}; + +static struct clk_dyn_rcg nss_tcm_src = { + .ns_reg[0] = 0x3dc4, + .ns_reg[1] = 0x3dc8, + .bank_reg = 0x3dc0, + .s[0] = { + .src_sel_shift = 0, + .parent_map = gcc_pxo_pll8_pll14_pll18_pll0_map, + }, + .s[1] = { + .src_sel_shift = 0, + .parent_map = gcc_pxo_pll8_pll14_pll18_pll0_map, + }, + .p[0] = { + .pre_div_shift = 3, + .pre_div_width = 4, + }, + .p[1] = { + .pre_div_shift = 3, + .pre_div_width = 4, + }, + .mux_sel_bit = 0, + .freq_tbl = clk_tbl_nss_tcm, + .clkr = { + .enable_reg = 0x3dc0, + .enable_mask = BIT(1), + .hw.init = &(struct clk_init_data){ + .name = "nss_tcm_src", + .parent_names = gcc_pxo_pll8_pll14_pll18_pll0, + .num_parents = 5, + .ops = &clk_dyn_rcg_ops, + }, + }, +}; + +static struct clk_branch nss_tcm_clk = { + .halt_reg = 0x3c20, + .halt_bit = 14, + .clkr = { + .enable_reg = 0x3dd0, + .enable_mask = BIT(6) | BIT(4), + .hw.init = &(struct clk_init_data){ + .name = "nss_tcm_clk", + .parent_names = (const char *[]){ + "nss_tcm_src", + }, + .num_parents = 1, + .ops = &clk_branch_ops, + .flags = CLK_SET_RATE_PARENT, + }, + }, +}; + +static const struct freq_tbl clk_tbl_nss[] = { + { 110000000, P_PLL18, 1, 1, 5 }, + { 275000000, P_PLL18, 2, 0, 0 }, + { 550000000, P_PLL18, 1, 0, 0 }, + { 733000000, P_PLL18, 1, 0, 0 }, + { } +}; + +static struct clk_dyn_rcg ubi32_core1_src_clk = { + .ns_reg[0] = 0x3d2c, + .ns_reg[1] = 0x3d30, + .md_reg[0] = 0x3d24, + .md_reg[1] = 0x3d28, + .bank_reg = 0x3d20, + .mn[0] = { + .mnctr_en_bit = 8, + .mnctr_reset_bit = 7, + .mnctr_mode_shift = 5, + .n_val_shift = 16, + .m_val_shift = 16, + .width = 8, + }, + .mn[1] = { + .mnctr_en_bit = 8, + .mnctr_reset_bit = 7, + .mnctr_mode_shift = 5, + .n_val_shift = 16, + .m_val_shift = 16, + .width = 8, + }, + .s[0] = { + .src_sel_shift = 0, + .parent_map = gcc_pxo_pll8_pll14_pll18_pll0_map, + }, + .s[1] = { + .src_sel_shift = 0, + .parent_map = gcc_pxo_pll8_pll14_pll18_pll0_map, + }, + .p[0] = { + .pre_div_shift = 3, + .pre_div_width = 2, + }, + .p[1] = { + .pre_div_shift = 3, + .pre_div_width = 2, + }, + .mux_sel_bit = 0, + .freq_tbl = clk_tbl_nss, + .clkr = { + .enable_reg = 0x3d20, + .enable_mask = BIT(1), + .hw.init = &(struct clk_init_data){ + .name = "ubi32_core1_src_clk", + .parent_names = gcc_pxo_pll8_pll14_pll18_pll0, + .num_parents = 5, + .ops = &clk_dyn_rcg_ops, + .flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE, + }, + }, +}; + +static struct clk_dyn_rcg ubi32_core2_src_clk = { + .ns_reg[0] = 0x3d4c, + .ns_reg[1] = 0x3d50, + .md_reg[0] = 0x3d44, + .md_reg[1] = 0x3d48, + .bank_reg = 0x3d40, + .mn[0] = { + .mnctr_en_bit = 8, + .mnctr_reset_bit = 7, + .mnctr_mode_shift = 5, + .n_val_shift = 16, + .m_val_shift = 16, + .width = 8, + }, + .mn[1] = { + .mnctr_en_bit = 8, + .mnctr_reset_bit = 7, + .mnctr_mode_shift = 5, + .n_val_shift = 16, + .m_val_shift = 16, + .width = 8, + }, + .s[0] = { + .src_sel_shift = 0, + .parent_map = gcc_pxo_pll8_pll14_pll18_pll0_map, + }, + .s[1] = { + .src_sel_shift = 0, + .parent_map = gcc_pxo_pll8_pll14_pll18_pll0_map, + }, + .p[0] = { + .pre_div_shift = 3, + .pre_div_width = 2, + }, + .p[1] = { + .pre_div_shift = 3, + .pre_div_width = 2, + }, + .mux_sel_bit = 0, + .freq_tbl = clk_tbl_nss, + .clkr = { + .enable_reg = 0x3d40, + .enable_mask = BIT(1), + .hw.init = &(struct clk_init_data){ + .name = "ubi32_core2_src_clk", + .parent_names = gcc_pxo_pll8_pll14_pll18_pll0, + .num_parents = 5, + .ops = &clk_dyn_rcg_ops, + .flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE, + }, + }, +}; + static struct clk_regmap *gcc_ipq806x_clks[] = { [PLL0] = &pll0.clkr, [PLL0_VOTE] = &pll0_vote, @@ -2211,6 +2728,7 @@ static struct clk_regmap *gcc_ipq806x_clks[] = { [PLL8_VOTE] = &pll8_vote, [PLL14] = &pll14.clkr, [PLL14_VOTE] = &pll14_vote, + [PLL18] = &pll18.clkr, [GSBI1_UART_SRC] = &gsbi1_uart_src.clkr, [GSBI1_UART_CLK] = &gsbi1_uart_clk.clkr, [GSBI2_UART_SRC] = &gsbi2_uart_src.clkr, @@ -2307,6 +2825,18 @@ static struct clk_regmap *gcc_ipq806x_clks[] = { [USB_FS1_SYSTEM_CLK] = &usb_fs1_sys_clk.clkr, [EBI2_CLK] = &ebi2_clk.clkr, [EBI2_AON_CLK] = &ebi2_aon_clk.clkr, + [GMAC_CORE1_CLK_SRC] = &gmac_core1_src.clkr, + [GMAC_CORE1_CLK] = &gmac_core1_clk.clkr, + [GMAC_CORE2_CLK_SRC] = &gmac_core2_src.clkr, + [GMAC_CORE2_CLK] = &gmac_core2_clk.clkr, + [GMAC_CORE3_CLK_SRC] = &gmac_core3_src.clkr, + [GMAC_CORE3_CLK] = &gmac_core3_clk.clkr, + [GMAC_CORE4_CLK_SRC] = &gmac_core4_src.clkr, + [GMAC_CORE4_CLK] = &gmac_core4_clk.clkr, + [UBI32_CORE1_CLK_SRC] = &ubi32_core1_src_clk.clkr, + [UBI32_CORE2_CLK_SRC] = &ubi32_core2_src_clk.clkr, + [NSSTCM_CLK_SRC] = &nss_tcm_src.clkr, + [NSSTCM_CLK] = &nss_tcm_clk.clkr, }; static const struct qcom_reset_map gcc_ipq806x_resets[] = { @@ -2425,6 +2955,48 @@ static const struct qcom_reset_map gcc_ipq806x_resets[] = { [USB30_1_PHY_RESET] = { 0x3b58, 0 }, [NSSFB0_RESET] = { 0x3b60, 6 }, [NSSFB1_RESET] = { 0x3b60, 7 }, + [UBI32_CORE1_CLKRST_CLAMP_RESET] = { 0x3d3c, 3}, + [UBI32_CORE1_CLAMP_RESET] = { 0x3d3c, 2 }, + [UBI32_CORE1_AHB_RESET] = { 0x3d3c, 1 }, + [UBI32_CORE1_AXI_RESET] = { 0x3d3c, 0 }, + [UBI32_CORE2_CLKRST_CLAMP_RESET] = { 0x3d5c, 3 }, + [UBI32_CORE2_CLAMP_RESET] = { 0x3d5c, 2 }, + [UBI32_CORE2_AHB_RESET] = { 0x3d5c, 1 }, + [UBI32_CORE2_AXI_RESET] = { 0x3d5c, 0 }, + [GMAC_CORE1_RESET] = { 0x3cbc, 0 }, + [GMAC_CORE2_RESET] = { 0x3cdc, 0 }, + [GMAC_CORE3_RESET] = { 0x3cfc, 0 }, + [GMAC_CORE4_RESET] = { 0x3d1c, 0 }, + [GMAC_AHB_RESET] = { 0x3e24, 0 }, + [NSS_CH0_RST_RX_CLK_N_RESET] = { 0x3b60, 0 }, + [NSS_CH0_RST_TX_CLK_N_RESET] = { 0x3b60, 1 }, + [NSS_CH0_RST_RX_125M_N_RESET] = { 0x3b60, 2 }, + [NSS_CH0_HW_RST_RX_125M_N_RESET] = { 0x3b60, 3 }, + [NSS_CH0_RST_TX_125M_N_RESET] = { 0x3b60, 4 }, + [NSS_CH1_RST_RX_CLK_N_RESET] = { 0x3b60, 5 }, + [NSS_CH1_RST_TX_CLK_N_RESET] = { 0x3b60, 6 }, + [NSS_CH1_RST_RX_125M_N_RESET] = { 0x3b60, 7 }, + [NSS_CH1_HW_RST_RX_125M_N_RESET] = { 0x3b60, 8 }, + [NSS_CH1_RST_TX_125M_N_RESET] = { 0x3b60, 9 }, + [NSS_CH2_RST_RX_CLK_N_RESET] = { 0x3b60, 10 }, + [NSS_CH2_RST_TX_CLK_N_RESET] = { 0x3b60, 11 }, + [NSS_CH2_RST_RX_125M_N_RESET] = { 0x3b60, 12 }, + [NSS_CH2_HW_RST_RX_125M_N_RESET] = { 0x3b60, 13 }, + [NSS_CH2_RST_TX_125M_N_RESET] = { 0x3b60, 14 }, + [NSS_CH3_RST_RX_CLK_N_RESET] = { 0x3b60, 15 }, + [NSS_CH3_RST_TX_CLK_N_RESET] = { 0x3b60, 16 }, + [NSS_CH3_RST_RX_125M_N_RESET] = { 0x3b60, 17 }, + [NSS_CH3_HW_RST_RX_125M_N_RESET] = { 0x3b60, 18 }, + [NSS_CH3_RST_TX_125M_N_RESET] = { 0x3b60, 19 }, + [NSS_RST_RX_250M_125M_N_RESET] = { 0x3b60, 20 }, + [NSS_RST_TX_250M_125M_N_RESET] = { 0x3b60, 21 }, + [NSS_QSGMII_TXPI_RST_N_RESET] = { 0x3b60, 22 }, + [NSS_QSGMII_CDR_RST_N_RESET] = { 0x3b60, 23 }, + [NSS_SGMII2_CDR_RST_N_RESET] = { 0x3b60, 24 }, + [NSS_SGMII3_CDR_RST_N_RESET] = { 0x3b60, 25 }, + [NSS_CAL_PRBS_RST_N_RESET] = { 0x3b60, 26 }, + [NSS_LCKDT_RST_N_RESET] = { 0x3b60, 27 }, + [NSS_SRDS_N_RESET] = { 0x3b60, 28 }, }; static const struct regmap_config gcc_ipq806x_regmap_config = { @@ -2453,6 +3025,8 @@ static int gcc_ipq806x_probe(struct platform_device *pdev) { struct clk *clk; struct device *dev = &pdev->dev; + struct regmap *regmap; + int ret; /* Temporary until RPM clocks supported */ clk = clk_register_fixed_rate(dev, "cxo", NULL, CLK_IS_ROOT, 25000000); @@ -2463,7 +3037,25 @@ static int gcc_ipq806x_probe(struct platform_device *pdev) if (IS_ERR(clk)) return PTR_ERR(clk); - return qcom_cc_probe(pdev, &gcc_ipq806x_desc); + ret = qcom_cc_probe(pdev, &gcc_ipq806x_desc); + if (ret) + return ret; + + regmap = dev_get_regmap(dev, NULL); + if (!regmap) + return -ENODEV; + + /* Setup PLL18 static bits */ + regmap_update_bits(regmap, 0x31a4, 0xffffffc0, 0x40000400); + regmap_write(regmap, 0x31b0, 0x3080); + + /* Set GMAC footswitch sleep/wakeup values */ + regmap_write(regmap, 0x3cb8, 8); + regmap_write(regmap, 0x3cd8, 8); + regmap_write(regmap, 0x3cf8, 8); + regmap_write(regmap, 0x3d18, 8); + + return 0; } static int gcc_ipq806x_remove(struct platform_device *pdev) diff --git a/include/dt-bindings/clock/qcom,gcc-ipq806x.h b/include/dt-bindings/clock/qcom,gcc-ipq806x.h index ebd63fd05649..dc4254b8cbbc 100644 --- a/include/dt-bindings/clock/qcom,gcc-ipq806x.h +++ b/include/dt-bindings/clock/qcom,gcc-ipq806x.h @@ -289,5 +289,7 @@ #define UBI32_CORE1_CLK 279 #define UBI32_CORE2_CLK 280 #define EBI2_AON_CLK 281 +#define NSSTCM_CLK_SRC 282 +#define NSSTCM_CLK 283 #endif diff --git a/include/dt-bindings/reset/qcom,gcc-ipq806x.h b/include/dt-bindings/reset/qcom,gcc-ipq806x.h index 0ad5ef930b5d..de9c8140931a 100644 --- a/include/dt-bindings/reset/qcom,gcc-ipq806x.h +++ b/include/dt-bindings/reset/qcom,gcc-ipq806x.h @@ -129,4 +129,47 @@ #define USB30_1_PHY_RESET 112 #define NSSFB0_RESET 113 #define NSSFB1_RESET 114 +#define UBI32_CORE1_CLKRST_CLAMP_RESET 115 +#define UBI32_CORE1_CLAMP_RESET 116 +#define UBI32_CORE1_AHB_RESET 117 +#define UBI32_CORE1_AXI_RESET 118 +#define UBI32_CORE2_CLKRST_CLAMP_RESET 119 +#define UBI32_CORE2_CLAMP_RESET 120 +#define UBI32_CORE2_AHB_RESET 121 +#define UBI32_CORE2_AXI_RESET 122 +#define GMAC_CORE1_RESET 123 +#define GMAC_CORE2_RESET 124 +#define GMAC_CORE3_RESET 125 +#define GMAC_CORE4_RESET 126 +#define GMAC_AHB_RESET 127 +#define NSS_CH0_RST_RX_CLK_N_RESET 128 +#define NSS_CH0_RST_TX_CLK_N_RESET 129 +#define NSS_CH0_RST_RX_125M_N_RESET 130 +#define NSS_CH0_HW_RST_RX_125M_N_RESET 131 +#define NSS_CH0_RST_TX_125M_N_RESET 132 +#define NSS_CH1_RST_RX_CLK_N_RESET 133 +#define NSS_CH1_RST_TX_CLK_N_RESET 134 +#define NSS_CH1_RST_RX_125M_N_RESET 135 +#define NSS_CH1_HW_RST_RX_125M_N_RESET 136 +#define NSS_CH1_RST_TX_125M_N_RESET 137 +#define NSS_CH2_RST_RX_CLK_N_RESET 138 +#define NSS_CH2_RST_TX_CLK_N_RESET 139 +#define NSS_CH2_RST_RX_125M_N_RESET 140 +#define NSS_CH2_HW_RST_RX_125M_N_RESET 141 +#define NSS_CH2_RST_TX_125M_N_RESET 142 +#define NSS_CH3_RST_RX_CLK_N_RESET 143 +#define NSS_CH3_RST_TX_CLK_N_RESET 144 +#define NSS_CH3_RST_RX_125M_N_RESET 145 +#define NSS_CH3_HW_RST_RX_125M_N_RESET 146 +#define NSS_CH3_RST_TX_125M_N_RESET 147 +#define NSS_RST_RX_250M_125M_N_RESET 148 +#define NSS_RST_TX_250M_125M_N_RESET 149 +#define NSS_QSGMII_TXPI_RST_N_RESET 150 +#define NSS_QSGMII_CDR_RST_N_RESET 151 +#define NSS_SGMII2_CDR_RST_N_RESET 152 +#define NSS_SGMII3_CDR_RST_N_RESET 153 +#define NSS_CAL_PRBS_RST_N_RESET 154 +#define NSS_LCKDT_RST_N_RESET 155 +#define NSS_SRDS_N_RESET 156 + #endif -- cgit v1.2.3 From 5790cf3c00c2f92aacba348e13f8a9a8f5dd96bd Mon Sep 17 00:00:00 2001 From: Mathieu Olivari Date: Wed, 27 May 2015 11:02:47 -0700 Subject: stmmac: add phy-handle support to the platform layer On stmmac driver, PHY specification in device-tree was done using the non-standard property "snps,phy-addr". Specifying a PHY on a different MDIO bus that the one within the stmmac controller doesn't seem to be possible when device-tree is used. This change adds support for the phy-handle property, as specified in Documentation/devicetree/bindings/net/ethernet.txt. Signed-off-by: Mathieu Olivari Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 28 ++++++++++++++-------- .../net/ethernet/stmicro/stmmac/stmmac_platform.c | 6 ++++- include/linux/stmmac.h | 1 + 3 files changed, 24 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e4f273976071..31c64169f2ec 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -52,6 +52,7 @@ #include "stmmac_ptp.h" #include "stmmac.h" #include +#include #define STMMAC_ALIGN(x) L1_CACHE_ALIGN(x) @@ -816,18 +817,25 @@ static int stmmac_init_phy(struct net_device *dev) priv->speed = 0; priv->oldduplex = -1; - if (priv->plat->phy_bus_name) - snprintf(bus_id, MII_BUS_ID_SIZE, "%s-%x", - priv->plat->phy_bus_name, priv->plat->bus_id); - else - snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x", - priv->plat->bus_id); + if (priv->plat->phy_node) { + phydev = of_phy_connect(dev, priv->plat->phy_node, + &stmmac_adjust_link, 0, interface); + } else { + if (priv->plat->phy_bus_name) + snprintf(bus_id, MII_BUS_ID_SIZE, "%s-%x", + priv->plat->phy_bus_name, priv->plat->bus_id); + else + snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x", + priv->plat->bus_id); - snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id, - priv->plat->phy_addr); - pr_debug("stmmac_init_phy: trying to attach to %s\n", phy_id_fmt); + snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id, + priv->plat->phy_addr); + pr_debug("stmmac_init_phy: trying to attach to %s\n", + phy_id_fmt); - phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link, interface); + phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link, + interface); + } if (IS_ERR(phydev)) { pr_err("%s: Could not attach to PHY\n", dev->name); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 1664c0186f5b..8d23155a1a7e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "stmmac.h" #include "stmmac_platform.h" @@ -144,13 +145,16 @@ static int stmmac_probe_config_dt(struct platform_device *pdev, /* Default to phy auto-detection */ plat->phy_addr = -1; + /* If we find a phy-handle property, use it as the PHY */ + plat->phy_node = of_parse_phandle(np, "phy-handle", 0); + /* "snps,phy-addr" is not a standard property. Mark it as deprecated * and warn of its use. Remove this when phy node support is added. */ if (of_property_read_u32(np, "snps,phy-addr", &plat->phy_addr) == 0) dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n"); - if (plat->phy_bus_name) + if (plat->phy_node || plat->phy_bus_name) plat->mdio_bus_data = NULL; else plat->mdio_bus_data = diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 7f484a239f53..c735f5c91eea 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -99,6 +99,7 @@ struct plat_stmmacenet_data { int phy_addr; int interface; struct stmmac_mdio_bus_data *mdio_bus_data; + struct device_node *phy_node; struct stmmac_dma_cfg *dma_cfg; int clk_csr; int has_gmac; -- cgit v1.2.3 From 37e82c2f974b72c9ab49c787ef7b5bb1aec12768 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 27 May 2015 15:30:39 -0700 Subject: bpf: allow BPF programs access skb->skb_iif and skb->dev->ifindex fields classic BPF already exposes skb->dev->ifindex via SKF_AD_IFINDEX extension. Allow eBPF program to access it as well. Note that classic aborts execution of the program if 'skb->dev == NULL' (which is inconvenient for program writers), whereas eBPF returns zero in such case. Also expose the 'skb_iif' field, since programs triggered by redirected packet need to known the original interface index. Summary: __skb->ifindex -> skb->dev->ifindex __skb->ingress_ifindex -> skb->skb_iif Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 2 ++ net/core/filter.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f0a9af8b4dae..72f3080afa1e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -236,6 +236,8 @@ struct __sk_buff { __u32 vlan_tci; __u32 vlan_proto; __u32 priority; + __u32 ingress_ifindex; + __u32 ifindex; }; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/net/core/filter.c b/net/core/filter.c index 3adcca6f17a4..2c30d6632d66 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1499,6 +1499,24 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, offsetof(struct sk_buff, priority)); break; + case offsetof(struct __sk_buff, ingress_ifindex): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4); + + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, skb_iif)); + break; + + case offsetof(struct __sk_buff, ifindex): + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); + + *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)), + dst_reg, src_reg, + offsetof(struct sk_buff, dev)); + *insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1); + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg, + offsetof(struct net_device, ifindex)); + break; + case offsetof(struct __sk_buff, mark): return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn); -- cgit v1.2.3 From f4fb874cf076f9eafdd15c0a88cd0f0397b95e43 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 27 May 2015 21:07:26 -0400 Subject: if_vlan: fix vlaue -> value typo Fixes "vlaue" for "value" in include/linux/if_vlan.h. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b9ab677c0c0a..a40d29846ac2 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -416,7 +416,7 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb, /** * __vlan_get_tag - get the VLAN ID that is part of the payload * @skb: skbuff to query - * @vlan_tci: buffer to store vlaue + * @vlan_tci: buffer to store value * * Returns error if the skb is not of VLAN type */ @@ -435,7 +435,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) /** * __vlan_hwaccel_get_tag - get the VLAN ID that is in @skb->cb[] * @skb: skbuff to query - * @vlan_tci: buffer to store vlaue + * @vlan_tci: buffer to store value * * Returns error if @skb->vlan_tci is not set correctly */ @@ -456,7 +456,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, /** * vlan_get_tag - get the VLAN ID from the skb * @skb: skbuff to query - * @vlan_tci: buffer to store vlaue + * @vlan_tci: buffer to store value * * Returns error if the skb is not VLAN tagged */ -- cgit v1.2.3 From 64ffaa2159b752e6c263dc57eaaaed7367d37493 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 28 May 2015 22:28:38 +0300 Subject: net/mlx5_core,mlx5_ib: Do not use vmap() on coherent memory As David Daney pointed in mlx4_core driver [1], mlx5_core is also misusing the DMA-API. This patch is removing the code that vmap() memory allocated by dma_alloc_coherent(). After this patch, users of this drivers might fail allocating resources on memory fragmeneted systems. This will be fixed later on. [1] - https://patchwork.ozlabs.org/patch/458531/ CC: David Daney Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/cq.c | 3 +- drivers/infiniband/hw/mlx5/qp.c | 2 +- drivers/infiniband/hw/mlx5/srq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/alloc.c | 96 +++++-------------------- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 3 +- include/linux/mlx5/driver.h | 9 +-- 6 files changed, 22 insertions(+), 93 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 2ee6b1051975..4e88b18cf62e 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -590,8 +590,7 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, { int err; - err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, - PAGE_SIZE * 2, &buf->buf); + err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, &buf->buf); if (err) return err; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d35f62d4f4c5..426eb88dfa49 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -768,7 +768,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift); - err = mlx5_buf_alloc(dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf); + err = mlx5_buf_alloc(dev->mdev, qp->buf_size, &qp->buf); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); goto err_uuar; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 02d77a29764d..4242e1ded868 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -165,7 +165,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, return err; } - if (mlx5_buf_alloc(dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) { + if (mlx5_buf_alloc(dev->mdev, buf_size, &srq->buf)) { mlx5_ib_dbg(dev, "buf alloc failed\n"); err = -ENOMEM; goto err_db; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index ac0f7bf4be95..0715b497511f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -42,95 +42,36 @@ #include "mlx5_core.h" /* Handling for queue buffers -- we allocate a bunch of memory and - * register it in a memory region at HCA virtual address 0. If the - * requested size is > max_direct, we split the allocation into - * multiple pages, so we don't require too much contiguous memory. + * register it in a memory region at HCA virtual address 0. */ -int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct, - struct mlx5_buf *buf) +int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf) { dma_addr_t t; buf->size = size; - if (size <= max_direct) { - buf->nbufs = 1; - buf->npages = 1; - buf->page_shift = (u8)get_order(size) + PAGE_SHIFT; - buf->direct.buf = dma_zalloc_coherent(&dev->pdev->dev, - size, &t, GFP_KERNEL); - if (!buf->direct.buf) - return -ENOMEM; - - buf->direct.map = t; - - while (t & ((1 << buf->page_shift) - 1)) { - --buf->page_shift; - buf->npages *= 2; - } - } else { - int i; - - buf->direct.buf = NULL; - buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; - buf->npages = buf->nbufs; - buf->page_shift = PAGE_SHIFT; - buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), - GFP_KERNEL); - if (!buf->page_list) - return -ENOMEM; - - for (i = 0; i < buf->nbufs; i++) { - buf->page_list[i].buf = - dma_zalloc_coherent(&dev->pdev->dev, PAGE_SIZE, - &t, GFP_KERNEL); - if (!buf->page_list[i].buf) - goto err_free; - - buf->page_list[i].map = t; - } - - if (BITS_PER_LONG == 64) { - struct page **pages; - pages = kmalloc(sizeof(*pages) * buf->nbufs, GFP_KERNEL); - if (!pages) - goto err_free; - for (i = 0; i < buf->nbufs; i++) - pages[i] = virt_to_page(buf->page_list[i].buf); - buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); - kfree(pages); - if (!buf->direct.buf) - goto err_free; - } - } + buf->npages = 1; + buf->page_shift = (u8)get_order(size) + PAGE_SHIFT; + buf->direct.buf = dma_zalloc_coherent(&dev->pdev->dev, + size, &t, GFP_KERNEL); + if (!buf->direct.buf) + return -ENOMEM; - return 0; + buf->direct.map = t; -err_free: - mlx5_buf_free(dev, buf); + while (t & ((1 << buf->page_shift) - 1)) { + --buf->page_shift; + buf->npages *= 2; + } - return -ENOMEM; + return 0; } EXPORT_SYMBOL_GPL(mlx5_buf_alloc); void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf) { - int i; - - if (buf->nbufs == 1) - dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf, - buf->direct.map); - else { - if (BITS_PER_LONG == 64) - vunmap(buf->direct.buf); - - for (i = 0; i < buf->nbufs; i++) - if (buf->page_list[i].buf) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, - buf->page_list[i].buf, - buf->page_list[i].map); - kfree(buf->page_list); - } + dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf, + buf->direct.map); } EXPORT_SYMBOL_GPL(mlx5_buf_free); @@ -230,10 +171,7 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas) int i; for (i = 0; i < buf->npages; i++) { - if (buf->nbufs == 1) - addr = buf->direct.map + (i << buf->page_shift); - else - addr = buf->page_list[i].map; + addr = buf->direct.map + (i << buf->page_shift); pas[i] = cpu_to_be64(addr); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 58800e4f3958..3f511bd84489 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -346,8 +346,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int inlen; eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); - err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, 2 * PAGE_SIZE, - &eq->buf); + err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); if (err) return err; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9a90e7523dc2..c4cf25ffcc16 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -334,8 +334,6 @@ struct mlx5_buf_list { struct mlx5_buf { struct mlx5_buf_list direct; - struct mlx5_buf_list *page_list; - int nbufs; int npages; int size; u8 page_shift; @@ -586,11 +584,7 @@ struct mlx5_pas { static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset) { - if (likely(BITS_PER_LONG == 64 || buf->nbufs == 1)) return buf->direct.buf + offset; - else - return buf->page_list[offset >> PAGE_SHIFT].buf + - (offset & (PAGE_SIZE - 1)); } extern struct workqueue_struct *mlx5_core_wq; @@ -669,8 +663,7 @@ void mlx5_health_cleanup(void); void __init mlx5_health_init(void); void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev); -int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct, - struct mlx5_buf *buf); +int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf); void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf); struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); -- cgit v1.2.3 From db058a186f98b057c19c42f7b10d9a96fd3b5d59 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 28 May 2015 22:28:39 +0300 Subject: net/mlx5_core: Set irq affinity hints Preparation for upcoming ethernet driver. - Move msix array from eq_table struct to priv since its not related to eq_table - Intorduce irq_info struct to hold all irq information - Move name from mlx5_eq to irq_info struct since it is irq property. - Set IRQ affinity hints Signed-off-by: Achiad Shochat Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 16 ++-- drivers/net/ethernet/mellanox/mlx5/core/main.c | 111 ++++++++++++++++++++++--- include/linux/mlx5/driver.h | 11 ++- 3 files changed, 117 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 3f511bd84489..516efc25fc4f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -339,7 +339,7 @@ static void init_eq_buf(struct mlx5_eq *eq) int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, struct mlx5_uar *uar) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_priv *priv = &dev->priv; struct mlx5_create_eq_mbox_in *in; struct mlx5_create_eq_mbox_out out; int err; @@ -377,14 +377,15 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, goto err_in; } - snprintf(eq->name, MLX5_MAX_EQ_NAME, "%s@pci:%s", + snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); + eq->eqn = out.eq_number; eq->irqn = vecidx; eq->dev = dev; eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(table->msix_arr[vecidx].vector, mlx5_msix_handler, 0, - eq->name, eq); + err = request_irq(priv->msix_arr[vecidx].vector, mlx5_msix_handler, 0, + priv->irq_info[vecidx].name, eq); if (err) goto err_eq; @@ -400,7 +401,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, return 0; err_irq: - free_irq(table->msix_arr[vecidx].vector, eq); + free_irq(priv->msix_arr[vecidx].vector, eq); err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -416,16 +417,15 @@ EXPORT_SYMBOL_GPL(mlx5_create_map_eq); int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { - struct mlx5_eq_table *table = &dev->priv.eq_table; int err; mlx5_debug_eq_remove(dev, eq); - free_irq(table->msix_arr[eq->irqn].vector, eq); + free_irq(dev->priv.msix_arr[eq->irqn].vector, eq); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); - synchronize_irq(table->msix_arr[eq->irqn].vector); + synchronize_irq(dev->priv.msix_arr[eq->irqn].vector); mlx5_buf_free(dev, &eq->buf); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 28425e5ea91f..55085b01b4ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -208,7 +209,8 @@ static void release_bar(struct pci_dev *pdev) static int mlx5_enable_msix(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_priv *priv = &dev->priv; + struct mlx5_eq_table *table = &priv->eq_table; int num_eqs = 1 << dev->caps.gen.log_max_eq; int nvec; int i; @@ -218,14 +220,16 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev) if (nvec <= MLX5_EQ_VEC_COMP_BASE) return -ENOMEM; - table->msix_arr = kzalloc(nvec * sizeof(*table->msix_arr), GFP_KERNEL); - if (!table->msix_arr) - return -ENOMEM; + priv->msix_arr = kcalloc(nvec, sizeof(*priv->msix_arr), GFP_KERNEL); + + priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL); + if (!priv->msix_arr || !priv->irq_info) + goto err_free_msix; for (i = 0; i < nvec; i++) - table->msix_arr[i].entry = i; + priv->msix_arr[i].entry = i; - nvec = pci_enable_msix_range(dev->pdev, table->msix_arr, + nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr, MLX5_EQ_VEC_COMP_BASE + 1, nvec); if (nvec < 0) return nvec; @@ -233,14 +237,20 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev) table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; return 0; + +err_free_msix: + kfree(priv->irq_info); + kfree(priv->msix_arr); + return -ENOMEM; } static void mlx5_disable_msix(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_priv *priv = &dev->priv; pci_disable_msix(dev->pdev); - kfree(table->msix_arr); + kfree(priv->irq_info); + kfree(priv->msix_arr); } struct mlx5_reg_host_endianess { @@ -507,6 +517,77 @@ static int mlx5_core_disable_hca(struct mlx5_core_dev *dev) return 0; } +static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + struct mlx5_priv *priv = &mdev->priv; + struct msix_entry *msix = priv->msix_arr; + int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; + int numa_node = dev_to_node(&mdev->pdev->dev); + int err; + + if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); + return -ENOMEM; + } + + err = cpumask_set_cpu_local_first(i, numa_node, priv->irq_info[i].mask); + if (err) { + mlx5_core_warn(mdev, "cpumask_set_cpu_local_first failed"); + goto err_clear_mask; + } + + err = irq_set_affinity_hint(irq, priv->irq_info[i].mask); + if (err) { + mlx5_core_warn(mdev, "irq_set_affinity_hint failed,irq 0x%.4x", + irq); + goto err_clear_mask; + } + + return 0; + +err_clear_mask: + free_cpumask_var(priv->irq_info[i].mask); + return err; +} + +static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + struct mlx5_priv *priv = &mdev->priv; + struct msix_entry *msix = priv->msix_arr; + int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; + + irq_set_affinity_hint(irq, NULL); + free_cpumask_var(priv->irq_info[i].mask); +} + +static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) +{ + int err; + int i; + + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { + err = mlx5_irq_set_affinity_hint(mdev, i); + if (err) + goto err_out; + } + + return 0; + +err_out: + for (i--; i >= 0; i--) + mlx5_irq_clear_affinity_hint(mdev, i); + + return err; +} + +static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev) +{ + int i; + + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) + mlx5_irq_clear_affinity_hint(mdev, i); +} + int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn) { struct mlx5_eq_table *table = &dev->priv.eq_table; @@ -549,7 +630,7 @@ static void free_comp_eqs(struct mlx5_core_dev *dev) static int alloc_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; - char name[MLX5_MAX_EQ_NAME]; + char name[MLX5_MAX_IRQ_NAME]; struct mlx5_eq *eq; int ncomp_vec; int nent; @@ -566,7 +647,7 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) goto clean; } - snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(dev, eq, i + MLX5_EQ_VEC_COMP_BASE, nent, 0, name, &dev->priv.uuari.uars[0]); @@ -730,6 +811,12 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) goto err_stop_eqs; } + err = mlx5_irq_set_affinity_hints(dev); + if (err) { + dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); + goto err_free_comp_eqs; + } + MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); mlx5_init_cq_table(dev); @@ -739,6 +826,9 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) return 0; +err_free_comp_eqs: + free_comp_eqs(dev); + err_stop_eqs: mlx5_stop_eqs(dev); @@ -793,6 +883,7 @@ static void mlx5_dev_cleanup(struct mlx5_core_dev *dev) mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cleanup_cq_table(dev); + mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_free_uuars(dev, &priv->uuari); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c4cf25ffcc16..9e8979502826 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -85,7 +85,7 @@ enum { }; enum { - MLX5_MAX_EQ_NAME = 32 + MLX5_MAX_IRQ_NAME = 32 }; enum { @@ -349,7 +349,6 @@ struct mlx5_eq { u8 eqn; int nent; u64 mask; - char name[MLX5_MAX_EQ_NAME]; struct list_head list; int index; struct mlx5_rsc_debug *dbg; @@ -412,7 +411,6 @@ struct mlx5_eq_table { struct mlx5_eq pages_eq; struct mlx5_eq async_eq; struct mlx5_eq cmd_eq; - struct msix_entry *msix_arr; int num_comp_vectors; /* protect EQs list */ @@ -465,9 +463,16 @@ struct mlx5_mr_table { struct radix_tree_root tree; }; +struct mlx5_irq_info { + cpumask_var_t mask; + char name[MLX5_MAX_IRQ_NAME]; +}; + struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; + struct msix_entry *msix_arr; + struct mlx5_irq_info *irq_info; struct mlx5_uuar_info uuari; MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock); -- cgit v1.2.3 From e281682bf29438848daac11627216bceb1507b71 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 28 May 2015 22:28:40 +0300 Subject: net/mlx5_core: HW data structs/types definitions cleanup mlx5_ifc.h was heavily modified here since it is now generated by a script from the device specification (PRM rev 0.25). This specification is backward compatible to existing hardware. Some structures/fields were added here in order to enable the Ethernet functionality of the driver. Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 17 +- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +- drivers/net/ethernet/mellanox/mlx5/core/mcg.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/uar.c | 37 + include/linux/mlx5/device.h | 113 +- include/linux/mlx5/driver.h | 4 +- include/linux/mlx5/mlx5_ifc.h | 6608 +++++++++++++++++++++++- include/linux/mlx5/qp.h | 25 + 9 files changed, 6705 insertions(+), 110 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index e3273faf4568..2f22cd2de2b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -390,8 +390,17 @@ const char *mlx5_command_str(int command) case MLX5_CMD_OP_ARM_RQ: return "ARM_RQ"; - case MLX5_CMD_OP_RESIZE_SRQ: - return "RESIZE_SRQ"; + case MLX5_CMD_OP_CREATE_XRC_SRQ: + return "CREATE_XRC_SRQ"; + + case MLX5_CMD_OP_DESTROY_XRC_SRQ: + return "DESTROY_XRC_SRQ"; + + case MLX5_CMD_OP_QUERY_XRC_SRQ: + return "QUERY_XRC_SRQ"; + + case MLX5_CMD_OP_ARM_XRC_SRQ: + return "ARM_XRC_SRQ"; case MLX5_CMD_OP_ALLOC_PD: return "ALLOC_PD"; @@ -408,8 +417,8 @@ const char *mlx5_command_str(int command) case MLX5_CMD_OP_ATTACH_TO_MCG: return "ATTACH_TO_MCG"; - case MLX5_CMD_OP_DETACH_FROM_MCG: - return "DETACH_FROM_MCG"; + case MLX5_CMD_OP_DETTACH_FROM_MCG: + return "DETTACH_FROM_MCG"; case MLX5_CMD_OP_ALLOC_XRCD: return "ALLOC_XRCD"; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 4b4cda3bcc5f..ef9b7695decd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -95,7 +95,7 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *caps) goto out; } - memcpy(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability_struct), + memcpy(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability), sizeof(*caps)); mlx5_core_dbg(dev, "on-demand paging capabilities:\nrc: %08x\nuc: %08x\nud: %08x\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 55085b01b4ce..a652cb93ceaa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -319,8 +319,7 @@ static void fw2drv_caps(struct mlx5_caps *caps, void *out) gen->max_srq_wqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_srq_sz); gen->max_wqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_qp_sz); gen->log_max_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_qp); - gen->log_max_strq = MLX5_GET_PR(cmd_hca_cap, out, log_max_strq_sz); - gen->log_max_srq = MLX5_GET_PR(cmd_hca_cap, out, log_max_srqs); + gen->log_max_srq = MLX5_GET_PR(cmd_hca_cap, out, log_max_srq); gen->max_cqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_cq_sz); gen->log_max_cq = MLX5_GET_PR(cmd_hca_cap, out, log_max_cq); gen->max_eqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_eq_sz); @@ -391,7 +390,7 @@ int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps, goto query_ex; } mlx5_core_dbg(dev, "%s\n", caps_opmod_str(opmod)); - fw2drv_caps(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability_struct)); + fw2drv_caps(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability)); query_ex: kfree(out); @@ -453,7 +452,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) /* disable checksum */ cur_caps->gen.flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM; - copy_rw_fields(MLX5_ADDR_OF(set_hca_cap_in, set_ctx, hca_capability_struct), + copy_rw_fields(MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability), cur_caps); err = set_caps(dev, set_ctx, set_sz); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c index d79fd85d1dd5..d5a0c2d61a18 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c @@ -91,7 +91,7 @@ int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) memset(&in, 0, sizeof(in)); memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETACH_FROM_MCG); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETTACH_FROM_MCG); memcpy(in.gid, mgid, sizeof(*mgid)); in.qpn = cpu_to_be32(qpn); err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 5a89bb1d678a..ba58f6d7c761 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -223,3 +223,40 @@ int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) return 0; } + +int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) +{ + phys_addr_t pfn; + phys_addr_t uar_bar_start; + int err; + + err = mlx5_cmd_alloc_uar(mdev, &uar->index); + if (err) { + mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err); + return err; + } + + uar_bar_start = pci_resource_start(mdev->pdev, 0); + pfn = (uar_bar_start >> PAGE_SHIFT) + uar->index; + uar->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!uar->map) { + mlx5_core_warn(mdev, "ioremap() failed, %d\n", err); + err = -ENOMEM; + goto err_free_uar; + } + + return 0; + +err_free_uar: + mlx5_cmd_free_uar(mdev, uar->index); + + return err; +} +EXPORT_SYMBOL(mlx5_alloc_map_uar); + +void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) +{ + iounmap(uar->map); + mlx5_cmd_free_uar(mdev, uar->index); +} +EXPORT_SYMBOL(mlx5_unmap_free_uar); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index abf65c790421..feebed7b392b 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -35,6 +35,7 @@ #include #include +#include #if defined(__LITTLE_ENDIAN) #define MLX5_SET_HOST_ENDIANNESS 0 @@ -70,6 +71,14 @@ << __mlx5_dw_bit_off(typ, fld))); \ } while (0) +#define MLX5_SET_TO_ONES(typ, p, fld) do { \ + BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \ + *((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \ + cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + __mlx5_dw_off(typ, fld))) & \ + (~__mlx5_dw_mask(typ, fld))) | ((__mlx5_mask(typ, fld)) \ + << __mlx5_dw_bit_off(typ, fld))); \ +} while (0) + #define MLX5_GET(typ, p, fld) ((be32_to_cpu(*((__be32 *)(p) +\ __mlx5_dw_off(typ, fld))) >> __mlx5_dw_bit_off(typ, fld)) & \ __mlx5_mask(typ, fld)) @@ -264,6 +273,7 @@ enum { MLX5_OPCODE_RDMA_WRITE_IMM = 0x09, MLX5_OPCODE_SEND = 0x0a, MLX5_OPCODE_SEND_IMM = 0x0b, + MLX5_OPCODE_LSO = 0x0e, MLX5_OPCODE_RDMA_READ = 0x10, MLX5_OPCODE_ATOMIC_CS = 0x11, MLX5_OPCODE_ATOMIC_FA = 0x12, @@ -541,6 +551,10 @@ struct mlx5_cmd_prot_block { u8 sig; }; +enum { + MLX5_CQE_SYND_FLUSHED_IN_ERROR = 5, +}; + struct mlx5_err_cqe { u8 rsvd0[32]; __be32 srqn; @@ -554,13 +568,22 @@ struct mlx5_err_cqe { }; struct mlx5_cqe64 { - u8 rsvd0[17]; + u8 rsvd0[4]; + u8 lro_tcppsh_abort_dupack; + u8 lro_min_ttl; + __be16 lro_tcp_win; + __be32 lro_ack_seq_num; + __be32 rss_hash_result; + u8 rss_hash_type; u8 ml_path; - u8 rsvd20[4]; + u8 rsvd20[2]; + __be16 check_sum; __be16 slid; __be32 flags_rqpn; - u8 rsvd28[4]; - __be32 srqn; + u8 hds_ip_ext; + u8 l4_hdr_type_etc; + __be16 vlan_info; + __be32 srqn; /* [31:24]: lro_num_seg, [23:0]: srqn */ __be32 imm_inval_pkey; u8 rsvd40[4]; __be32 byte_cnt; @@ -571,6 +594,40 @@ struct mlx5_cqe64 { u8 op_own; }; +static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) +{ + return (cqe->lro_tcppsh_abort_dupack >> 6) & 1; +} + +static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe) +{ + return (cqe->l4_hdr_type_etc >> 4) & 0x7; +} + +static inline int cqe_has_vlan(struct mlx5_cqe64 *cqe) +{ + return !!(cqe->l4_hdr_type_etc & 0x1); +} + +enum { + CQE_L4_HDR_TYPE_NONE = 0x0, + CQE_L4_HDR_TYPE_TCP_NO_ACK = 0x1, + CQE_L4_HDR_TYPE_UDP = 0x2, + CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA = 0x3, + CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA = 0x4, +}; + +enum { + CQE_RSS_HTYPE_IP = 0x3 << 6, + CQE_RSS_HTYPE_L4 = 0x3 << 2, +}; + +enum { + CQE_L2_OK = 1 << 0, + CQE_L3_OK = 1 << 1, + CQE_L4_OK = 1 << 2, +}; + struct mlx5_sig_err_cqe { u8 rsvd0[16]; __be32 expected_trans_sig; @@ -996,4 +1053,52 @@ struct mlx5_destroy_psv_out { u8 rsvd[8]; }; +#define MLX5_CMD_OP_MAX 0x920 + +enum { + VPORT_STATE_DOWN = 0x0, + VPORT_STATE_UP = 0x1, +}; + +enum { + MLX5_L3_PROT_TYPE_IPV4 = 0, + MLX5_L3_PROT_TYPE_IPV6 = 1, +}; + +enum { + MLX5_L4_PROT_TYPE_TCP = 0, + MLX5_L4_PROT_TYPE_UDP = 1, +}; + +enum { + MLX5_HASH_FIELD_SEL_SRC_IP = 1 << 0, + MLX5_HASH_FIELD_SEL_DST_IP = 1 << 1, + MLX5_HASH_FIELD_SEL_L4_SPORT = 1 << 2, + MLX5_HASH_FIELD_SEL_L4_DPORT = 1 << 3, + MLX5_HASH_FIELD_SEL_IPSEC_SPI = 1 << 4, +}; + +enum { + MLX5_MATCH_OUTER_HEADERS = 1 << 0, + MLX5_MATCH_MISC_PARAMETERS = 1 << 1, + MLX5_MATCH_INNER_HEADERS = 1 << 2, + +}; + +enum { + MLX5_FLOW_TABLE_TYPE_NIC_RCV = 0, + MLX5_FLOW_TABLE_TYPE_ESWITCH = 4, +}; + +enum { + MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT = 0, + MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE = 1, + MLX5_FLOW_CONTEXT_DEST_TYPE_TIR = 2, +}; + +enum { + MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE = 0x0, + MLX5_RQC_RQ_TYPE_MEMORY_RQ_RPM = 0x1, +}; + #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9e8979502826..3fd4fdc1ba16 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -44,7 +44,6 @@ #include #include -#include enum { MLX5_BOARD_ID_LEN = 64, @@ -278,7 +277,6 @@ struct mlx5_general_caps { u8 log_max_mkey; u8 log_max_pd; u8 log_max_srq; - u8 log_max_strq; u8 log_max_mrw_sz; u8 log_max_bsf_list_size; u8 log_max_klm_list_size; @@ -664,6 +662,8 @@ int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); +int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); +void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); void mlx5_health_cleanup(void); void __init mlx5_health_init(void); void mlx5_start_health_poll(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cb3ad17edd1f..b27e9f6e090a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -28,11 +28,44 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - */ - +*/ #ifndef MLX5_IFC_H #define MLX5_IFC_H +enum { + MLX5_EVENT_TYPE_CODING_COMPLETION_EVENTS = 0x0, + MLX5_EVENT_TYPE_CODING_PATH_MIGRATED_SUCCEEDED = 0x1, + MLX5_EVENT_TYPE_CODING_COMMUNICATION_ESTABLISHED = 0x2, + MLX5_EVENT_TYPE_CODING_SEND_QUEUE_DRAINED = 0x3, + MLX5_EVENT_TYPE_CODING_LAST_WQE_REACHED = 0x13, + MLX5_EVENT_TYPE_CODING_SRQ_LIMIT = 0x14, + MLX5_EVENT_TYPE_CODING_DCT_ALL_CONNECTIONS_CLOSED = 0x1c, + MLX5_EVENT_TYPE_CODING_DCT_ACCESS_KEY_VIOLATION = 0x1d, + MLX5_EVENT_TYPE_CODING_CQ_ERROR = 0x4, + MLX5_EVENT_TYPE_CODING_LOCAL_WQ_CATASTROPHIC_ERROR = 0x5, + MLX5_EVENT_TYPE_CODING_PATH_MIGRATION_FAILED = 0x7, + MLX5_EVENT_TYPE_CODING_PAGE_FAULT_EVENT = 0xc, + MLX5_EVENT_TYPE_CODING_INVALID_REQUEST_LOCAL_WQ_ERROR = 0x10, + MLX5_EVENT_TYPE_CODING_LOCAL_ACCESS_VIOLATION_WQ_ERROR = 0x11, + MLX5_EVENT_TYPE_CODING_LOCAL_SRQ_CATASTROPHIC_ERROR = 0x12, + MLX5_EVENT_TYPE_CODING_INTERNAL_ERROR = 0x8, + MLX5_EVENT_TYPE_CODING_PORT_STATE_CHANGE = 0x9, + MLX5_EVENT_TYPE_CODING_GPIO_EVENT = 0x15, + MLX5_EVENT_TYPE_CODING_REMOTE_CONFIGURATION_PROTOCOL_EVENT = 0x19, + MLX5_EVENT_TYPE_CODING_DOORBELL_BLUEFLAME_CONGESTION_EVENT = 0x1a, + MLX5_EVENT_TYPE_CODING_STALL_VL_EVENT = 0x1b, + MLX5_EVENT_TYPE_CODING_DROPPED_PACKET_LOGGED_EVENT = 0x1f, + MLX5_EVENT_TYPE_CODING_COMMAND_INTERFACE_COMPLETION = 0xa, + MLX5_EVENT_TYPE_CODING_PAGE_REQUEST = 0xb +}; + +enum { + MLX5_MODIFY_TIR_BITMASK_LRO = 0x0, + MLX5_MODIFY_TIR_BITMASK_INDIRECT_TABLE = 0x1, + MLX5_MODIFY_TIR_BITMASK_HASH = 0x2, + MLX5_MODIFY_TIR_BITMASK_TUNNELED_OFFLOAD_EN = 0x3 +}; + enum { MLX5_CMD_OP_QUERY_HCA_CAP = 0x100, MLX5_CMD_OP_QUERY_ADAPTER = 0x101, @@ -43,6 +76,8 @@ enum { MLX5_CMD_OP_QUERY_PAGES = 0x107, MLX5_CMD_OP_MANAGE_PAGES = 0x108, MLX5_CMD_OP_SET_HCA_CAP = 0x109, + MLX5_CMD_OP_QUERY_ISSI = 0x10a, + MLX5_CMD_OP_SET_ISSI = 0x10b, MLX5_CMD_OP_CREATE_MKEY = 0x200, MLX5_CMD_OP_QUERY_MKEY = 0x201, MLX5_CMD_OP_DESTROY_MKEY = 0x202, @@ -66,6 +101,7 @@ enum { MLX5_CMD_OP_2ERR_QP = 0x507, MLX5_CMD_OP_2RST_QP = 0x50a, MLX5_CMD_OP_QUERY_QP = 0x50b, + MLX5_CMD_OP_SQD_RTS_QP = 0x50c, MLX5_CMD_OP_INIT2INIT_QP = 0x50e, MLX5_CMD_OP_CREATE_PSV = 0x600, MLX5_CMD_OP_DESTROY_PSV = 0x601, @@ -73,7 +109,10 @@ enum { MLX5_CMD_OP_DESTROY_SRQ = 0x701, MLX5_CMD_OP_QUERY_SRQ = 0x702, MLX5_CMD_OP_ARM_RQ = 0x703, - MLX5_CMD_OP_RESIZE_SRQ = 0x704, + MLX5_CMD_OP_CREATE_XRC_SRQ = 0x705, + MLX5_CMD_OP_DESTROY_XRC_SRQ = 0x706, + MLX5_CMD_OP_QUERY_XRC_SRQ = 0x707, + MLX5_CMD_OP_ARM_XRC_SRQ = 0x708, MLX5_CMD_OP_CREATE_DCT = 0x710, MLX5_CMD_OP_DESTROY_DCT = 0x711, MLX5_CMD_OP_DRAIN_DCT = 0x712, @@ -85,8 +124,12 @@ enum { MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT = 0x753, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT = 0x754, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT = 0x755, - MLX5_CMD_OP_QUERY_RCOE_ADDRESS = 0x760, + MLX5_CMD_OP_QUERY_ROCE_ADDRESS = 0x760, MLX5_CMD_OP_SET_ROCE_ADDRESS = 0x761, + MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT = 0x762, + MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT = 0x763, + MLX5_CMD_OP_QUERY_HCA_VPORT_GID = 0x764, + MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY = 0x765, MLX5_CMD_OP_QUERY_VPORT_COUNTER = 0x770, MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, @@ -98,7 +141,7 @@ enum { MLX5_CMD_OP_CONFIG_INT_MODERATION = 0x804, MLX5_CMD_OP_ACCESS_REG = 0x805, MLX5_CMD_OP_ATTACH_TO_MCG = 0x806, - MLX5_CMD_OP_DETACH_FROM_MCG = 0x807, + MLX5_CMD_OP_DETTACH_FROM_MCG = 0x807, MLX5_CMD_OP_GET_DROPPED_PACKET_LOG = 0x80a, MLX5_CMD_OP_MAD_IFC = 0x50d, MLX5_CMD_OP_QUERY_MAD_DEMUX = 0x80b, @@ -106,23 +149,22 @@ enum { MLX5_CMD_OP_NOP = 0x80d, MLX5_CMD_OP_ALLOC_XRCD = 0x80e, MLX5_CMD_OP_DEALLOC_XRCD = 0x80f, - MLX5_CMD_OP_SET_BURST_SIZE = 0x812, - MLX5_CMD_OP_QUERY_BURST_SZIE = 0x813, - MLX5_CMD_OP_ACTIVATE_TRACER = 0x814, - MLX5_CMD_OP_DEACTIVATE_TRACER = 0x815, - MLX5_CMD_OP_CREATE_SNIFFER_RULE = 0x820, - MLX5_CMD_OP_DESTROY_SNIFFER_RULE = 0x821, - MLX5_CMD_OP_QUERY_CONG_PARAMS = 0x822, - MLX5_CMD_OP_MODIFY_CONG_PARAMS = 0x823, - MLX5_CMD_OP_QUERY_CONG_STATISTICS = 0x824, + MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN = 0x816, + MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN = 0x817, + MLX5_CMD_OP_QUERY_CONG_STATUS = 0x822, + MLX5_CMD_OP_MODIFY_CONG_STATUS = 0x823, + MLX5_CMD_OP_QUERY_CONG_PARAMS = 0x824, + MLX5_CMD_OP_MODIFY_CONG_PARAMS = 0x825, + MLX5_CMD_OP_QUERY_CONG_STATISTICS = 0x826, + MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT = 0x827, + MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT = 0x828, + MLX5_CMD_OP_SET_L2_TABLE_ENTRY = 0x829, + MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY = 0x82a, + MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY = 0x82b, MLX5_CMD_OP_CREATE_TIR = 0x900, MLX5_CMD_OP_MODIFY_TIR = 0x901, MLX5_CMD_OP_DESTROY_TIR = 0x902, MLX5_CMD_OP_QUERY_TIR = 0x903, - MLX5_CMD_OP_CREATE_TIS = 0x912, - MLX5_CMD_OP_MODIFY_TIS = 0x913, - MLX5_CMD_OP_DESTROY_TIS = 0x914, - MLX5_CMD_OP_QUERY_TIS = 0x915, MLX5_CMD_OP_CREATE_SQ = 0x904, MLX5_CMD_OP_MODIFY_SQ = 0x905, MLX5_CMD_OP_DESTROY_SQ = 0x906, @@ -135,9 +177,430 @@ enum { MLX5_CMD_OP_MODIFY_RMP = 0x90d, MLX5_CMD_OP_DESTROY_RMP = 0x90e, MLX5_CMD_OP_QUERY_RMP = 0x90f, - MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY = 0x910, - MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY = 0x911, - MLX5_CMD_OP_MAX = 0x911 + MLX5_CMD_OP_CREATE_TIS = 0x912, + MLX5_CMD_OP_MODIFY_TIS = 0x913, + MLX5_CMD_OP_DESTROY_TIS = 0x914, + MLX5_CMD_OP_QUERY_TIS = 0x915, + MLX5_CMD_OP_CREATE_RQT = 0x916, + MLX5_CMD_OP_MODIFY_RQT = 0x917, + MLX5_CMD_OP_DESTROY_RQT = 0x918, + MLX5_CMD_OP_QUERY_RQT = 0x919, + MLX5_CMD_OP_CREATE_FLOW_TABLE = 0x930, + MLX5_CMD_OP_DESTROY_FLOW_TABLE = 0x931, + MLX5_CMD_OP_QUERY_FLOW_TABLE = 0x932, + MLX5_CMD_OP_CREATE_FLOW_GROUP = 0x933, + MLX5_CMD_OP_DESTROY_FLOW_GROUP = 0x934, + MLX5_CMD_OP_QUERY_FLOW_GROUP = 0x935, + MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY = 0x936, + MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY = 0x937, + MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY = 0x938 +}; + +struct mlx5_ifc_flow_table_fields_supported_bits { + u8 outer_dmac[0x1]; + u8 outer_smac[0x1]; + u8 outer_ether_type[0x1]; + u8 reserved_0[0x1]; + u8 outer_first_prio[0x1]; + u8 outer_first_cfi[0x1]; + u8 outer_first_vid[0x1]; + u8 reserved_1[0x1]; + u8 outer_second_prio[0x1]; + u8 outer_second_cfi[0x1]; + u8 outer_second_vid[0x1]; + u8 reserved_2[0x1]; + u8 outer_sip[0x1]; + u8 outer_dip[0x1]; + u8 outer_frag[0x1]; + u8 outer_ip_protocol[0x1]; + u8 outer_ip_ecn[0x1]; + u8 outer_ip_dscp[0x1]; + u8 outer_udp_sport[0x1]; + u8 outer_udp_dport[0x1]; + u8 outer_tcp_sport[0x1]; + u8 outer_tcp_dport[0x1]; + u8 outer_tcp_flags[0x1]; + u8 outer_gre_protocol[0x1]; + u8 outer_gre_key[0x1]; + u8 outer_vxlan_vni[0x1]; + u8 reserved_3[0x5]; + u8 source_eswitch_port[0x1]; + + u8 inner_dmac[0x1]; + u8 inner_smac[0x1]; + u8 inner_ether_type[0x1]; + u8 reserved_4[0x1]; + u8 inner_first_prio[0x1]; + u8 inner_first_cfi[0x1]; + u8 inner_first_vid[0x1]; + u8 reserved_5[0x1]; + u8 inner_second_prio[0x1]; + u8 inner_second_cfi[0x1]; + u8 inner_second_vid[0x1]; + u8 reserved_6[0x1]; + u8 inner_sip[0x1]; + u8 inner_dip[0x1]; + u8 inner_frag[0x1]; + u8 inner_ip_protocol[0x1]; + u8 inner_ip_ecn[0x1]; + u8 inner_ip_dscp[0x1]; + u8 inner_udp_sport[0x1]; + u8 inner_udp_dport[0x1]; + u8 inner_tcp_sport[0x1]; + u8 inner_tcp_dport[0x1]; + u8 inner_tcp_flags[0x1]; + u8 reserved_7[0x9]; + + u8 reserved_8[0x40]; +}; + +struct mlx5_ifc_flow_table_prop_layout_bits { + u8 ft_support[0x1]; + u8 reserved_0[0x1f]; + + u8 reserved_1[0x2]; + u8 log_max_ft_size[0x6]; + u8 reserved_2[0x10]; + u8 max_ft_level[0x8]; + + u8 reserved_3[0x20]; + + u8 reserved_4[0x18]; + u8 log_max_ft_num[0x8]; + + u8 reserved_5[0x18]; + u8 log_max_destination[0x8]; + + u8 reserved_6[0x18]; + u8 log_max_flow[0x8]; + + u8 reserved_7[0x40]; + + struct mlx5_ifc_flow_table_fields_supported_bits ft_field_support; + + struct mlx5_ifc_flow_table_fields_supported_bits ft_field_bitmask_support; +}; + +struct mlx5_ifc_odp_per_transport_service_cap_bits { + u8 send[0x1]; + u8 receive[0x1]; + u8 write[0x1]; + u8 read[0x1]; + u8 reserved_0[0x1]; + u8 srq_receive[0x1]; + u8 reserved_1[0x1a]; +}; + +struct mlx5_ifc_fte_match_set_lyr_2_4_bits { + u8 smac_47_16[0x20]; + + u8 smac_15_0[0x10]; + u8 ethertype[0x10]; + + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 first_prio[0x3]; + u8 first_cfi[0x1]; + u8 first_vid[0xc]; + + u8 ip_protocol[0x8]; + u8 ip_dscp[0x6]; + u8 ip_ecn[0x2]; + u8 vlan_tag[0x1]; + u8 reserved_0[0x1]; + u8 frag[0x1]; + u8 reserved_1[0x4]; + u8 tcp_flags[0x9]; + + u8 tcp_sport[0x10]; + u8 tcp_dport[0x10]; + + u8 reserved_2[0x20]; + + u8 udp_sport[0x10]; + u8 udp_dport[0x10]; + + u8 src_ip[4][0x20]; + + u8 dst_ip[4][0x20]; +}; + +struct mlx5_ifc_fte_match_set_misc_bits { + u8 reserved_0[0x20]; + + u8 reserved_1[0x10]; + u8 source_port[0x10]; + + u8 outer_second_prio[0x3]; + u8 outer_second_cfi[0x1]; + u8 outer_second_vid[0xc]; + u8 inner_second_prio[0x3]; + u8 inner_second_cfi[0x1]; + u8 inner_second_vid[0xc]; + + u8 outer_second_vlan_tag[0x1]; + u8 inner_second_vlan_tag[0x1]; + u8 reserved_2[0xe]; + u8 gre_protocol[0x10]; + + u8 gre_key_h[0x18]; + u8 gre_key_l[0x8]; + + u8 vxlan_vni[0x18]; + u8 reserved_3[0x8]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0xc]; + u8 outer_ipv6_flow_label[0x14]; + + u8 reserved_6[0xc]; + u8 inner_ipv6_flow_label[0x14]; + + u8 reserved_7[0xe0]; +}; + +struct mlx5_ifc_cmd_pas_bits { + u8 pa_h[0x20]; + + u8 pa_l[0x14]; + u8 reserved_0[0xc]; +}; + +struct mlx5_ifc_uint64_bits { + u8 hi[0x20]; + + u8 lo[0x20]; +}; + +enum { + MLX5_ADS_STAT_RATE_NO_LIMIT = 0x0, + MLX5_ADS_STAT_RATE_2_5GBPS = 0x7, + MLX5_ADS_STAT_RATE_10GBPS = 0x8, + MLX5_ADS_STAT_RATE_30GBPS = 0x9, + MLX5_ADS_STAT_RATE_5GBPS = 0xa, + MLX5_ADS_STAT_RATE_20GBPS = 0xb, + MLX5_ADS_STAT_RATE_40GBPS = 0xc, + MLX5_ADS_STAT_RATE_60GBPS = 0xd, + MLX5_ADS_STAT_RATE_80GBPS = 0xe, + MLX5_ADS_STAT_RATE_120GBPS = 0xf, +}; + +struct mlx5_ifc_ads_bits { + u8 fl[0x1]; + u8 free_ar[0x1]; + u8 reserved_0[0xe]; + u8 pkey_index[0x10]; + + u8 reserved_1[0x8]; + u8 grh[0x1]; + u8 mlid[0x7]; + u8 rlid[0x10]; + + u8 ack_timeout[0x5]; + u8 reserved_2[0x3]; + u8 src_addr_index[0x8]; + u8 reserved_3[0x4]; + u8 stat_rate[0x4]; + u8 hop_limit[0x8]; + + u8 reserved_4[0x4]; + u8 tclass[0x8]; + u8 flow_label[0x14]; + + u8 rgid_rip[16][0x8]; + + u8 reserved_5[0x4]; + u8 f_dscp[0x1]; + u8 f_ecn[0x1]; + u8 reserved_6[0x1]; + u8 f_eth_prio[0x1]; + u8 ecn[0x2]; + u8 dscp[0x6]; + u8 udp_sport[0x10]; + + u8 dei_cfi[0x1]; + u8 eth_prio[0x3]; + u8 sl[0x4]; + u8 port[0x8]; + u8 rmac_47_32[0x10]; + + u8 rmac_31_0[0x20]; +}; + +struct mlx5_ifc_flow_table_nic_cap_bits { + u8 reserved_0[0x200]; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive; + + u8 reserved_1[0x200]; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive_sniffer; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit; + + u8 reserved_2[0x200]; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer; + + u8 reserved_3[0x7200]; +}; + +struct mlx5_ifc_per_protocol_networking_offload_caps_bits { + u8 csum_cap[0x1]; + u8 vlan_cap[0x1]; + u8 lro_cap[0x1]; + u8 lro_psh_flag[0x1]; + u8 lro_time_stamp[0x1]; + u8 reserved_0[0x6]; + u8 max_lso_cap[0x5]; + u8 reserved_1[0x4]; + u8 rss_ind_tbl_cap[0x4]; + u8 reserved_2[0x3]; + u8 tunnel_lso_const_out_ip_id[0x1]; + u8 reserved_3[0x2]; + u8 tunnel_statless_gre[0x1]; + u8 tunnel_stateless_vxlan[0x1]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0x10]; + u8 lro_min_mss_size[0x10]; + + u8 reserved_6[0x120]; + + u8 lro_timer_supported_periods[4][0x20]; + + u8 reserved_7[0x600]; +}; + +struct mlx5_ifc_roce_cap_bits { + u8 roce_apm[0x1]; + u8 reserved_0[0x1f]; + + u8 reserved_1[0x60]; + + u8 reserved_2[0xc]; + u8 l3_type[0x4]; + u8 reserved_3[0x8]; + u8 roce_version[0x8]; + + u8 reserved_4[0x10]; + u8 r_roce_dest_udp_port[0x10]; + + u8 r_roce_max_src_udp_port[0x10]; + u8 r_roce_min_src_udp_port[0x10]; + + u8 reserved_5[0x10]; + u8 roce_address_table_size[0x10]; + + u8 reserved_6[0x700]; +}; + +enum { + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE = 0x0, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES = 0x2, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_4_BYTES = 0x4, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_8_BYTES = 0x8, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_16_BYTES = 0x10, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_32_BYTES = 0x20, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_64_BYTES = 0x40, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_128_BYTES = 0x80, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_256_BYTES = 0x100, +}; + +enum { + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_1_BYTE = 0x1, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_2_BYTES = 0x2, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_4_BYTES = 0x4, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_8_BYTES = 0x8, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_16_BYTES = 0x10, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_32_BYTES = 0x20, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_64_BYTES = 0x40, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_128_BYTES = 0x80, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_256_BYTES = 0x100, +}; + +struct mlx5_ifc_atomic_caps_bits { + u8 reserved_0[0x40]; + + u8 atomic_req_endianness[0x1]; + u8 reserved_1[0x1f]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x10]; + u8 atomic_operations[0x10]; + + u8 reserved_4[0x10]; + u8 atomic_size_qp[0x10]; + + u8 reserved_5[0x10]; + u8 atomic_size_dc[0x10]; + + u8 reserved_6[0x720]; +}; + +struct mlx5_ifc_odp_cap_bits { + u8 reserved_0[0x40]; + + u8 sig[0x1]; + u8 reserved_1[0x1f]; + + u8 reserved_2[0x20]; + + struct mlx5_ifc_odp_per_transport_service_cap_bits rc_odp_caps; + + struct mlx5_ifc_odp_per_transport_service_cap_bits uc_odp_caps; + + struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps; + + u8 reserved_3[0x720]; +}; + +enum { + MLX5_WQ_TYPE_LINKED_LIST = 0x0, + MLX5_WQ_TYPE_CYCLIC = 0x1, + MLX5_WQ_TYPE_STRQ = 0x2, +}; + +enum { + MLX5_WQ_END_PAD_MODE_NONE = 0x0, + MLX5_WQ_END_PAD_MODE_ALIGN = 0x1, +}; + +enum { + MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_8_GID_ENTRIES = 0x0, + MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_16_GID_ENTRIES = 0x1, + MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_32_GID_ENTRIES = 0x2, + MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_64_GID_ENTRIES = 0x3, + MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_128_GID_ENTRIES = 0x4, +}; + +enum { + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_128_ENTRIES = 0x0, + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_256_ENTRIES = 0x1, + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_512_ENTRIES = 0x2, + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_1K_ENTRIES = 0x3, + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_2K_ENTRIES = 0x4, + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_4K_ENTRIES = 0x5, +}; + +enum { + MLX5_CMD_HCA_CAP_PORT_TYPE_IB = 0x0, + MLX5_CMD_HCA_CAP_PORT_TYPE_ETHERNET = 0x1, +}; + +enum { + MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_DISABLED = 0x0, + MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_INITIAL_STATE = 0x1, + MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_ENABLED = 0x3, +}; + +enum { + MLX5_CAP_PORT_TYPE_IB = 0x0, + MLX5_CAP_PORT_TYPE_ETH = 0x1, }; struct mlx5_ifc_cmd_hca_cap_bits { @@ -148,9 +611,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_1[0xb]; u8 log_max_qp[0x5]; - u8 log_max_strq_sz[0x8]; - u8 reserved_2[0x3]; - u8 log_max_srqs[0x5]; + u8 reserved_2[0xb]; + u8 log_max_srq[0x5]; u8 reserved_3[0x10]; u8 reserved_4[0x8]; @@ -185,165 +647,6123 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 pad_cap[0x1]; u8 cc_query_allowed[0x1]; u8 cc_modify_allowed[0x1]; - u8 reserved_15[0x1d]; + u8 reserved_15[0xd]; + u8 gid_table_size[0x10]; - u8 reserved_16[0x6]; + u8 out_of_seq_cnt[0x1]; + u8 vport_counters[0x1]; + u8 reserved_16[0x4]; u8 max_qp_cnt[0xa]; u8 pkey_table_size[0x10]; - u8 eswitch_owner[0x1]; - u8 reserved_17[0xa]; + u8 vport_group_manager[0x1]; + u8 vhca_group_manager[0x1]; + u8 ib_virt[0x1]; + u8 eth_virt[0x1]; + u8 reserved_17[0x1]; + u8 ets[0x1]; + u8 nic_flow_table[0x1]; + u8 reserved_18[0x4]; u8 local_ca_ack_delay[0x5]; - u8 reserved_18[0x8]; + u8 reserved_19[0x6]; + u8 port_type[0x2]; u8 num_ports[0x8]; - u8 reserved_19[0x3]; + u8 reserved_20[0x3]; u8 log_max_msg[0x5]; - u8 reserved_20[0x18]; + u8 reserved_21[0x18]; u8 stat_rate_support[0x10]; - u8 reserved_21[0x10]; + u8 reserved_22[0xc]; + u8 cqe_version[0x4]; - u8 reserved_22[0x10]; + u8 compact_address_vector[0x1]; + u8 reserved_23[0xe]; + u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; - u8 reserved_23[0x1]; + u8 reserved_24[0x1]; u8 wq_signature[0x1]; u8 sctr_data_cqe[0x1]; - u8 reserved_24[0x1]; + u8 reserved_25[0x1]; u8 sho[0x1]; u8 tph[0x1]; u8 rf[0x1]; - u8 dc[0x1]; - u8 reserved_25[0x2]; + u8 dct[0x1]; + u8 reserved_26[0x1]; + u8 eth_net_offloads[0x1]; u8 roce[0x1]; u8 atomic[0x1]; - u8 rsz_srq[0x1]; + u8 reserved_27[0x1]; u8 cq_oi[0x1]; u8 cq_resize[0x1]; u8 cq_moderation[0x1]; - u8 sniffer_rule_flow[0x1]; - u8 sniffer_rule_vport[0x1]; - u8 sniffer_rule_phy[0x1]; - u8 reserved_26[0x1]; + u8 reserved_28[0x3]; + u8 cq_eq_remap[0x1]; u8 pg[0x1]; u8 block_lb_mc[0x1]; - u8 reserved_27[0x3]; + u8 reserved_29[0x1]; + u8 scqe_break_moderation[0x1]; + u8 reserved_30[0x1]; u8 cd[0x1]; - u8 reserved_28[0x1]; + u8 reserved_31[0x1]; u8 apm[0x1]; - u8 reserved_29[0x7]; + u8 reserved_32[0x7]; u8 qkv[0x1]; u8 pkv[0x1]; - u8 reserved_30[0x4]; + u8 reserved_33[0x4]; u8 xrc[0x1]; u8 ud[0x1]; u8 uc[0x1]; u8 rc[0x1]; - u8 reserved_31[0xa]; + u8 reserved_34[0xa]; u8 uar_sz[0x6]; - u8 reserved_32[0x8]; + u8 reserved_35[0x8]; u8 log_pg_sz[0x8]; u8 bf[0x1]; - u8 reserved_33[0xa]; + u8 reserved_36[0x1]; + u8 pad_tx_eth_packet[0x1]; + u8 reserved_37[0x8]; u8 log_bf_reg_size[0x5]; - u8 reserved_34[0x10]; + u8 reserved_38[0x10]; - u8 reserved_35[0x10]; + u8 reserved_39[0x10]; u8 max_wqe_sz_sq[0x10]; - u8 reserved_36[0x10]; + u8 reserved_40[0x10]; u8 max_wqe_sz_rq[0x10]; - u8 reserved_37[0x10]; + u8 reserved_41[0x10]; u8 max_wqe_sz_sq_dc[0x10]; - u8 reserved_38[0x7]; + u8 reserved_42[0x7]; u8 max_qp_mcg[0x19]; - u8 reserved_39[0x18]; + u8 reserved_43[0x18]; u8 log_max_mcg[0x8]; - u8 reserved_40[0xb]; + u8 reserved_44[0x3]; + u8 log_max_transport_domain[0x5]; + u8 reserved_45[0x3]; u8 log_max_pd[0x5]; - u8 reserved_41[0xb]; + u8 reserved_46[0xb]; u8 log_max_xrcd[0x5]; - u8 reserved_42[0x20]; + u8 reserved_47[0x20]; - u8 reserved_43[0x3]; + u8 reserved_48[0x3]; u8 log_max_rq[0x5]; - u8 reserved_44[0x3]; + u8 reserved_49[0x3]; u8 log_max_sq[0x5]; - u8 reserved_45[0x3]; + u8 reserved_50[0x3]; u8 log_max_tir[0x5]; - u8 reserved_46[0x3]; + u8 reserved_51[0x3]; u8 log_max_tis[0x5]; - u8 reserved_47[0x13]; - u8 log_max_rq_per_tir[0x5]; - u8 reserved_48[0x3]; + u8 basic_cyclic_rcv_wqe[0x1]; + u8 reserved_52[0x2]; + u8 log_max_rmp[0x5]; + u8 reserved_53[0x3]; + u8 log_max_rqt[0x5]; + u8 reserved_54[0x3]; + u8 log_max_rqt_size[0x5]; + u8 reserved_55[0x3]; u8 log_max_tis_per_sq[0x5]; - u8 reserved_49[0xe0]; + u8 reserved_56[0x3]; + u8 log_max_stride_sz_rq[0x5]; + u8 reserved_57[0x3]; + u8 log_min_stride_sz_rq[0x5]; + u8 reserved_58[0x3]; + u8 log_max_stride_sz_sq[0x5]; + u8 reserved_59[0x3]; + u8 log_min_stride_sz_sq[0x5]; + + u8 reserved_60[0x1b]; + u8 log_max_wq_sz[0x5]; + + u8 reserved_61[0xa0]; - u8 reserved_50[0x10]; + u8 reserved_62[0x3]; + u8 log_max_l2_table[0x5]; + u8 reserved_63[0x8]; u8 log_uar_page_sz[0x10]; - u8 reserved_51[0x100]; + u8 reserved_64[0x100]; - u8 reserved_52[0x1f]; + u8 reserved_65[0x1f]; u8 cqe_zip[0x1]; u8 cqe_zip_timeout[0x10]; u8 cqe_zip_max_num[0x10]; - u8 reserved_53[0x220]; + u8 reserved_66[0x220]; }; -struct mlx5_ifc_set_hca_cap_in_bits { - u8 opcode[0x10]; - u8 reserved_0[0x10]; +enum { + MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_FLOW_TABLE_ = 0x1, + MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_TIR = 0x2, +}; - u8 reserved_1[0x10]; - u8 op_mod[0x10]; +struct mlx5_ifc_dest_format_struct_bits { + u8 destination_type[0x8]; + u8 destination_id[0x18]; - u8 reserved_2[0x40]; + u8 reserved_0[0x20]; +}; + +struct mlx5_ifc_fte_match_param_bits { + struct mlx5_ifc_fte_match_set_lyr_2_4_bits outer_headers; + + struct mlx5_ifc_fte_match_set_misc_bits misc_parameters; + + struct mlx5_ifc_fte_match_set_lyr_2_4_bits inner_headers; - struct mlx5_ifc_cmd_hca_cap_bits hca_capability_struct; + u8 reserved_0[0xa00]; }; -struct mlx5_ifc_query_hca_cap_in_bits { - u8 opcode[0x10]; - u8 reserved_0[0x10]; +enum { + MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_SRC_IP = 0x0, + MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_DST_IP = 0x1, + MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_SPORT = 0x2, + MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_DPORT = 0x3, + MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_IPSEC_SPI = 0x4, +}; - u8 reserved_1[0x10]; - u8 op_mod[0x10]; +struct mlx5_ifc_rx_hash_field_select_bits { + u8 l3_prot_type[0x1]; + u8 l4_prot_type[0x1]; + u8 selected_fields[0x1e]; +}; - u8 reserved_2[0x40]; +enum { + MLX5_WQ_WQ_TYPE_WQ_LINKED_LIST = 0x0, + MLX5_WQ_WQ_TYPE_WQ_CYCLIC = 0x1, }; -struct mlx5_ifc_query_hca_cap_out_bits { - u8 status[0x8]; +enum { + MLX5_WQ_END_PADDING_MODE_END_PAD_NONE = 0x0, + MLX5_WQ_END_PADDING_MODE_END_PAD_ALIGN = 0x1, +}; + +struct mlx5_ifc_wq_bits { + u8 wq_type[0x4]; + u8 wq_signature[0x1]; + u8 end_padding_mode[0x2]; + u8 cd_slave[0x1]; u8 reserved_0[0x18]; - u8 syndrome[0x20]; + u8 hds_skip_first_sge[0x1]; + u8 log2_hds_buf_size[0x3]; + u8 reserved_1[0x7]; + u8 page_offset[0x5]; + u8 lwm[0x10]; - u8 reserved_1[0x40]; + u8 reserved_2[0x8]; + u8 pd[0x18]; + + u8 reserved_3[0x8]; + u8 uar_page[0x18]; + + u8 dbr_addr[0x40]; + + u8 hw_counter[0x20]; + + u8 sw_counter[0x20]; + + u8 reserved_4[0xc]; + u8 log_wq_stride[0x4]; + u8 reserved_5[0x3]; + u8 log_wq_pg_sz[0x5]; + u8 reserved_6[0x3]; + u8 log_wq_sz[0x5]; + + u8 reserved_7[0x4e0]; - u8 capability_struct[256][0x8]; + struct mlx5_ifc_cmd_pas_bits pas[0]; }; -struct mlx5_ifc_set_hca_cap_out_bits { - u8 status[0x8]; - u8 reserved_0[0x18]; +struct mlx5_ifc_rq_num_bits { + u8 reserved_0[0x8]; + u8 rq_num[0x18]; +}; - u8 syndrome[0x20]; +struct mlx5_ifc_mac_address_layout_bits { + u8 reserved_0[0x10]; + u8 mac_addr_47_32[0x10]; - u8 reserved_1[0x40]; + u8 mac_addr_31_0[0x20]; +}; + +struct mlx5_ifc_cong_control_r_roce_ecn_np_bits { + u8 reserved_0[0xa0]; + + u8 min_time_between_cnps[0x20]; + + u8 reserved_1[0x12]; + u8 cnp_dscp[0x6]; + u8 reserved_2[0x5]; + u8 cnp_802p_prio[0x3]; + + u8 reserved_3[0x720]; +}; + +struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits { + u8 reserved_0[0x60]; + + u8 reserved_1[0x4]; + u8 clamp_tgt_rate[0x1]; + u8 reserved_2[0x3]; + u8 clamp_tgt_rate_after_time_inc[0x1]; + u8 reserved_3[0x17]; + + u8 reserved_4[0x20]; + + u8 rpg_time_reset[0x20]; + + u8 rpg_byte_reset[0x20]; + + u8 rpg_threshold[0x20]; + + u8 rpg_max_rate[0x20]; + + u8 rpg_ai_rate[0x20]; + + u8 rpg_hai_rate[0x20]; + + u8 rpg_gd[0x20]; + + u8 rpg_min_dec_fac[0x20]; + + u8 rpg_min_rate[0x20]; + + u8 reserved_5[0xe0]; + + u8 rate_to_set_on_first_cnp[0x20]; + + u8 dce_tcp_g[0x20]; + + u8 dce_tcp_rtt[0x20]; + + u8 rate_reduce_monitor_period[0x20]; + + u8 reserved_6[0x20]; + + u8 initial_alpha_value[0x20]; + + u8 reserved_7[0x4a0]; +}; + +struct mlx5_ifc_cong_control_802_1qau_rp_bits { + u8 reserved_0[0x80]; + + u8 rppp_max_rps[0x20]; + + u8 rpg_time_reset[0x20]; + + u8 rpg_byte_reset[0x20]; + + u8 rpg_threshold[0x20]; + + u8 rpg_max_rate[0x20]; + + u8 rpg_ai_rate[0x20]; + + u8 rpg_hai_rate[0x20]; + + u8 rpg_gd[0x20]; + + u8 rpg_min_dec_fac[0x20]; + + u8 rpg_min_rate[0x20]; + + u8 reserved_1[0x640]; +}; + +enum { + MLX5_RESIZE_FIELD_SELECT_RESIZE_FIELD_SELECT_LOG_CQ_SIZE = 0x1, + MLX5_RESIZE_FIELD_SELECT_RESIZE_FIELD_SELECT_PAGE_OFFSET = 0x2, + MLX5_RESIZE_FIELD_SELECT_RESIZE_FIELD_SELECT_LOG_PAGE_SIZE = 0x4, +}; + +struct mlx5_ifc_resize_field_select_bits { + u8 resize_field_select[0x20]; +}; + +enum { + MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_CQ_PERIOD = 0x1, + MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_CQ_MAX_COUNT = 0x2, + MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_OI = 0x4, + MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_C_EQN = 0x8, +}; + +struct mlx5_ifc_modify_field_select_bits { + u8 modify_field_select[0x20]; +}; + +struct mlx5_ifc_field_select_r_roce_np_bits { + u8 field_select_r_roce_np[0x20]; +}; + +struct mlx5_ifc_field_select_r_roce_rp_bits { + u8 field_select_r_roce_rp[0x20]; +}; + +enum { + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPPP_MAX_RPS = 0x4, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_TIME_RESET = 0x8, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_BYTE_RESET = 0x10, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_THRESHOLD = 0x20, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_MAX_RATE = 0x40, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_AI_RATE = 0x80, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_HAI_RATE = 0x100, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_GD = 0x200, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_MIN_DEC_FAC = 0x400, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_MIN_RATE = 0x800, +}; + +struct mlx5_ifc_field_select_802_1qau_rp_bits { + u8 field_select_8021qaurp[0x20]; +}; + +struct mlx5_ifc_phys_layer_cntrs_bits { + u8 time_since_last_clear_high[0x20]; + + u8 time_since_last_clear_low[0x20]; + + u8 symbol_errors_high[0x20]; + + u8 symbol_errors_low[0x20]; + + u8 sync_headers_errors_high[0x20]; + + u8 sync_headers_errors_low[0x20]; + + u8 edpl_bip_errors_lane0_high[0x20]; + + u8 edpl_bip_errors_lane0_low[0x20]; + + u8 edpl_bip_errors_lane1_high[0x20]; + + u8 edpl_bip_errors_lane1_low[0x20]; + + u8 edpl_bip_errors_lane2_high[0x20]; + + u8 edpl_bip_errors_lane2_low[0x20]; + + u8 edpl_bip_errors_lane3_high[0x20]; + + u8 edpl_bip_errors_lane3_low[0x20]; + + u8 fc_fec_corrected_blocks_lane0_high[0x20]; + + u8 fc_fec_corrected_blocks_lane0_low[0x20]; + + u8 fc_fec_corrected_blocks_lane1_high[0x20]; + + u8 fc_fec_corrected_blocks_lane1_low[0x20]; + + u8 fc_fec_corrected_blocks_lane2_high[0x20]; + + u8 fc_fec_corrected_blocks_lane2_low[0x20]; + + u8 fc_fec_corrected_blocks_lane3_high[0x20]; + + u8 fc_fec_corrected_blocks_lane3_low[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane0_high[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane0_low[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane1_high[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane1_low[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane2_high[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane2_low[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane3_high[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane3_low[0x20]; + + u8 rs_fec_corrected_blocks_high[0x20]; + + u8 rs_fec_corrected_blocks_low[0x20]; + + u8 rs_fec_uncorrectable_blocks_high[0x20]; + + u8 rs_fec_uncorrectable_blocks_low[0x20]; + + u8 rs_fec_no_errors_blocks_high[0x20]; + + u8 rs_fec_no_errors_blocks_low[0x20]; + + u8 rs_fec_single_error_blocks_high[0x20]; + + u8 rs_fec_single_error_blocks_low[0x20]; + + u8 rs_fec_corrected_symbols_total_high[0x20]; + + u8 rs_fec_corrected_symbols_total_low[0x20]; + + u8 rs_fec_corrected_symbols_lane0_high[0x20]; + + u8 rs_fec_corrected_symbols_lane0_low[0x20]; + + u8 rs_fec_corrected_symbols_lane1_high[0x20]; + + u8 rs_fec_corrected_symbols_lane1_low[0x20]; + + u8 rs_fec_corrected_symbols_lane2_high[0x20]; + + u8 rs_fec_corrected_symbols_lane2_low[0x20]; + + u8 rs_fec_corrected_symbols_lane3_high[0x20]; + + u8 rs_fec_corrected_symbols_lane3_low[0x20]; + + u8 link_down_events[0x20]; + + u8 successful_recovery_events[0x20]; + + u8 reserved_0[0x180]; +}; + +struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits { + u8 transmit_queue_high[0x20]; + + u8 transmit_queue_low[0x20]; + + u8 reserved_0[0x780]; +}; + +struct mlx5_ifc_eth_per_prio_grp_data_layout_bits { + u8 rx_octets_high[0x20]; + + u8 rx_octets_low[0x20]; + + u8 reserved_0[0xc0]; + + u8 rx_frames_high[0x20]; + + u8 rx_frames_low[0x20]; + + u8 tx_octets_high[0x20]; + + u8 tx_octets_low[0x20]; + + u8 reserved_1[0xc0]; + + u8 tx_frames_high[0x20]; + + u8 tx_frames_low[0x20]; + + u8 rx_pause_high[0x20]; + + u8 rx_pause_low[0x20]; + + u8 rx_pause_duration_high[0x20]; + + u8 rx_pause_duration_low[0x20]; + + u8 tx_pause_high[0x20]; + + u8 tx_pause_low[0x20]; + + u8 tx_pause_duration_high[0x20]; + + u8 tx_pause_duration_low[0x20]; + + u8 rx_pause_transition_high[0x20]; + + u8 rx_pause_transition_low[0x20]; + + u8 reserved_2[0x400]; +}; + +struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits { + u8 port_transmit_wait_high[0x20]; + + u8 port_transmit_wait_low[0x20]; + + u8 reserved_0[0x780]; +}; + +struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits { + u8 dot3stats_alignment_errors_high[0x20]; + + u8 dot3stats_alignment_errors_low[0x20]; + + u8 dot3stats_fcs_errors_high[0x20]; + + u8 dot3stats_fcs_errors_low[0x20]; + + u8 dot3stats_single_collision_frames_high[0x20]; + + u8 dot3stats_single_collision_frames_low[0x20]; + + u8 dot3stats_multiple_collision_frames_high[0x20]; + + u8 dot3stats_multiple_collision_frames_low[0x20]; + + u8 dot3stats_sqe_test_errors_high[0x20]; + + u8 dot3stats_sqe_test_errors_low[0x20]; + + u8 dot3stats_deferred_transmissions_high[0x20]; + + u8 dot3stats_deferred_transmissions_low[0x20]; + + u8 dot3stats_late_collisions_high[0x20]; + + u8 dot3stats_late_collisions_low[0x20]; + + u8 dot3stats_excessive_collisions_high[0x20]; + + u8 dot3stats_excessive_collisions_low[0x20]; + + u8 dot3stats_internal_mac_transmit_errors_high[0x20]; + + u8 dot3stats_internal_mac_transmit_errors_low[0x20]; + + u8 dot3stats_carrier_sense_errors_high[0x20]; + + u8 dot3stats_carrier_sense_errors_low[0x20]; + + u8 dot3stats_frame_too_longs_high[0x20]; + + u8 dot3stats_frame_too_longs_low[0x20]; + + u8 dot3stats_internal_mac_receive_errors_high[0x20]; + + u8 dot3stats_internal_mac_receive_errors_low[0x20]; + + u8 dot3stats_symbol_errors_high[0x20]; + + u8 dot3stats_symbol_errors_low[0x20]; + + u8 dot3control_in_unknown_opcodes_high[0x20]; + + u8 dot3control_in_unknown_opcodes_low[0x20]; + + u8 dot3in_pause_frames_high[0x20]; + + u8 dot3in_pause_frames_low[0x20]; + + u8 dot3out_pause_frames_high[0x20]; + + u8 dot3out_pause_frames_low[0x20]; + + u8 reserved_0[0x3c0]; +}; + +struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits { + u8 ether_stats_drop_events_high[0x20]; + + u8 ether_stats_drop_events_low[0x20]; + + u8 ether_stats_octets_high[0x20]; + + u8 ether_stats_octets_low[0x20]; + + u8 ether_stats_pkts_high[0x20]; + + u8 ether_stats_pkts_low[0x20]; + + u8 ether_stats_broadcast_pkts_high[0x20]; + + u8 ether_stats_broadcast_pkts_low[0x20]; + + u8 ether_stats_multicast_pkts_high[0x20]; + + u8 ether_stats_multicast_pkts_low[0x20]; + + u8 ether_stats_crc_align_errors_high[0x20]; + + u8 ether_stats_crc_align_errors_low[0x20]; + + u8 ether_stats_undersize_pkts_high[0x20]; + + u8 ether_stats_undersize_pkts_low[0x20]; + + u8 ether_stats_oversize_pkts_high[0x20]; + + u8 ether_stats_oversize_pkts_low[0x20]; + + u8 ether_stats_fragments_high[0x20]; + + u8 ether_stats_fragments_low[0x20]; + + u8 ether_stats_jabbers_high[0x20]; + + u8 ether_stats_jabbers_low[0x20]; + + u8 ether_stats_collisions_high[0x20]; + + u8 ether_stats_collisions_low[0x20]; + + u8 ether_stats_pkts64octets_high[0x20]; + + u8 ether_stats_pkts64octets_low[0x20]; + + u8 ether_stats_pkts65to127octets_high[0x20]; + + u8 ether_stats_pkts65to127octets_low[0x20]; + + u8 ether_stats_pkts128to255octets_high[0x20]; + + u8 ether_stats_pkts128to255octets_low[0x20]; + + u8 ether_stats_pkts256to511octets_high[0x20]; + + u8 ether_stats_pkts256to511octets_low[0x20]; + + u8 ether_stats_pkts512to1023octets_high[0x20]; + + u8 ether_stats_pkts512to1023octets_low[0x20]; + + u8 ether_stats_pkts1024to1518octets_high[0x20]; + + u8 ether_stats_pkts1024to1518octets_low[0x20]; + + u8 ether_stats_pkts1519to2047octets_high[0x20]; + + u8 ether_stats_pkts1519to2047octets_low[0x20]; + + u8 ether_stats_pkts2048to4095octets_high[0x20]; + + u8 ether_stats_pkts2048to4095octets_low[0x20]; + + u8 ether_stats_pkts4096to8191octets_high[0x20]; + + u8 ether_stats_pkts4096to8191octets_low[0x20]; + + u8 ether_stats_pkts8192to10239octets_high[0x20]; + + u8 ether_stats_pkts8192to10239octets_low[0x20]; + + u8 reserved_0[0x280]; +}; + +struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits { + u8 if_in_octets_high[0x20]; + + u8 if_in_octets_low[0x20]; + + u8 if_in_ucast_pkts_high[0x20]; + + u8 if_in_ucast_pkts_low[0x20]; + + u8 if_in_discards_high[0x20]; + + u8 if_in_discards_low[0x20]; + + u8 if_in_errors_high[0x20]; + + u8 if_in_errors_low[0x20]; + + u8 if_in_unknown_protos_high[0x20]; + + u8 if_in_unknown_protos_low[0x20]; + + u8 if_out_octets_high[0x20]; + + u8 if_out_octets_low[0x20]; + + u8 if_out_ucast_pkts_high[0x20]; + + u8 if_out_ucast_pkts_low[0x20]; + + u8 if_out_discards_high[0x20]; + + u8 if_out_discards_low[0x20]; + + u8 if_out_errors_high[0x20]; + + u8 if_out_errors_low[0x20]; + + u8 if_in_multicast_pkts_high[0x20]; + + u8 if_in_multicast_pkts_low[0x20]; + + u8 if_in_broadcast_pkts_high[0x20]; + + u8 if_in_broadcast_pkts_low[0x20]; + + u8 if_out_multicast_pkts_high[0x20]; + + u8 if_out_multicast_pkts_low[0x20]; + + u8 if_out_broadcast_pkts_high[0x20]; + + u8 if_out_broadcast_pkts_low[0x20]; + + u8 reserved_0[0x480]; +}; + +struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits { + u8 a_frames_transmitted_ok_high[0x20]; + + u8 a_frames_transmitted_ok_low[0x20]; + + u8 a_frames_received_ok_high[0x20]; + + u8 a_frames_received_ok_low[0x20]; + + u8 a_frame_check_sequence_errors_high[0x20]; + + u8 a_frame_check_sequence_errors_low[0x20]; + + u8 a_alignment_errors_high[0x20]; + + u8 a_alignment_errors_low[0x20]; + + u8 a_octets_transmitted_ok_high[0x20]; + + u8 a_octets_transmitted_ok_low[0x20]; + + u8 a_octets_received_ok_high[0x20]; + + u8 a_octets_received_ok_low[0x20]; + + u8 a_multicast_frames_xmitted_ok_high[0x20]; + + u8 a_multicast_frames_xmitted_ok_low[0x20]; + + u8 a_broadcast_frames_xmitted_ok_high[0x20]; + + u8 a_broadcast_frames_xmitted_ok_low[0x20]; + + u8 a_multicast_frames_received_ok_high[0x20]; + + u8 a_multicast_frames_received_ok_low[0x20]; + + u8 a_broadcast_frames_received_ok_high[0x20]; + + u8 a_broadcast_frames_received_ok_low[0x20]; + + u8 a_in_range_length_errors_high[0x20]; + + u8 a_in_range_length_errors_low[0x20]; + + u8 a_out_of_range_length_field_high[0x20]; + + u8 a_out_of_range_length_field_low[0x20]; + + u8 a_frame_too_long_errors_high[0x20]; + + u8 a_frame_too_long_errors_low[0x20]; + + u8 a_symbol_error_during_carrier_high[0x20]; + + u8 a_symbol_error_during_carrier_low[0x20]; + + u8 a_mac_control_frames_transmitted_high[0x20]; + + u8 a_mac_control_frames_transmitted_low[0x20]; + + u8 a_mac_control_frames_received_high[0x20]; + + u8 a_mac_control_frames_received_low[0x20]; + + u8 a_unsupported_opcodes_received_high[0x20]; + + u8 a_unsupported_opcodes_received_low[0x20]; + + u8 a_pause_mac_ctrl_frames_received_high[0x20]; + + u8 a_pause_mac_ctrl_frames_received_low[0x20]; + + u8 a_pause_mac_ctrl_frames_transmitted_high[0x20]; + + u8 a_pause_mac_ctrl_frames_transmitted_low[0x20]; + + u8 reserved_0[0x300]; +}; + +struct mlx5_ifc_cmd_inter_comp_event_bits { + u8 command_completion_vector[0x20]; + + u8 reserved_0[0xc0]; +}; + +struct mlx5_ifc_stall_vl_event_bits { + u8 reserved_0[0x18]; + u8 port_num[0x1]; + u8 reserved_1[0x3]; + u8 vl[0x4]; + + u8 reserved_2[0xa0]; +}; + +struct mlx5_ifc_db_bf_congestion_event_bits { + u8 event_subtype[0x8]; + u8 reserved_0[0x8]; + u8 congestion_level[0x8]; + u8 reserved_1[0x8]; + + u8 reserved_2[0xa0]; +}; + +struct mlx5_ifc_gpio_event_bits { + u8 reserved_0[0x60]; + + u8 gpio_event_hi[0x20]; + + u8 gpio_event_lo[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_port_state_change_event_bits { + u8 reserved_0[0x40]; + + u8 port_num[0x4]; + u8 reserved_1[0x1c]; + + u8 reserved_2[0x80]; +}; + +struct mlx5_ifc_dropped_packet_logged_bits { + u8 reserved_0[0xe0]; +}; + +enum { + MLX5_CQ_ERROR_SYNDROME_CQ_OVERRUN = 0x1, + MLX5_CQ_ERROR_SYNDROME_CQ_ACCESS_VIOLATION_ERROR = 0x2, +}; + +struct mlx5_ifc_cq_error_bits { + u8 reserved_0[0x8]; + u8 cqn[0x18]; + + u8 reserved_1[0x20]; + + u8 reserved_2[0x18]; + u8 syndrome[0x8]; + + u8 reserved_3[0x80]; +}; + +struct mlx5_ifc_rdma_page_fault_event_bits { + u8 bytes_committed[0x20]; + + u8 r_key[0x20]; + + u8 reserved_0[0x10]; + u8 packet_len[0x10]; + + u8 rdma_op_len[0x20]; + + u8 rdma_va[0x40]; + + u8 reserved_1[0x5]; + u8 rdma[0x1]; + u8 write[0x1]; + u8 requestor[0x1]; + u8 qp_number[0x18]; +}; + +struct mlx5_ifc_wqe_associated_page_fault_event_bits { + u8 bytes_committed[0x20]; + + u8 reserved_0[0x10]; + u8 wqe_index[0x10]; + + u8 reserved_1[0x10]; + u8 len[0x10]; + + u8 reserved_2[0x60]; + + u8 reserved_3[0x5]; + u8 rdma[0x1]; + u8 write_read[0x1]; + u8 requestor[0x1]; + u8 qpn[0x18]; +}; + +struct mlx5_ifc_qp_events_bits { + u8 reserved_0[0xa0]; + + u8 type[0x8]; + u8 reserved_1[0x18]; + + u8 reserved_2[0x8]; + u8 qpn_rqn_sqn[0x18]; +}; + +struct mlx5_ifc_dct_events_bits { + u8 reserved_0[0xc0]; + + u8 reserved_1[0x8]; + u8 dct_number[0x18]; +}; + +struct mlx5_ifc_comp_event_bits { + u8 reserved_0[0xc0]; + + u8 reserved_1[0x8]; + u8 cq_number[0x18]; +}; + +enum { + MLX5_QPC_STATE_RST = 0x0, + MLX5_QPC_STATE_INIT = 0x1, + MLX5_QPC_STATE_RTR = 0x2, + MLX5_QPC_STATE_RTS = 0x3, + MLX5_QPC_STATE_SQER = 0x4, + MLX5_QPC_STATE_ERR = 0x6, + MLX5_QPC_STATE_SQD = 0x7, + MLX5_QPC_STATE_SUSPENDED = 0x9, +}; + +enum { + MLX5_QPC_ST_RC = 0x0, + MLX5_QPC_ST_UC = 0x1, + MLX5_QPC_ST_UD = 0x2, + MLX5_QPC_ST_XRC = 0x3, + MLX5_QPC_ST_DCI = 0x5, + MLX5_QPC_ST_QP0 = 0x7, + MLX5_QPC_ST_QP1 = 0x8, + MLX5_QPC_ST_RAW_DATAGRAM = 0x9, + MLX5_QPC_ST_REG_UMR = 0xc, +}; + +enum { + MLX5_QPC_PM_STATE_ARMED = 0x0, + MLX5_QPC_PM_STATE_REARM = 0x1, + MLX5_QPC_PM_STATE_RESERVED = 0x2, + MLX5_QPC_PM_STATE_MIGRATED = 0x3, +}; + +enum { + MLX5_QPC_END_PADDING_MODE_SCATTER_AS_IS = 0x0, + MLX5_QPC_END_PADDING_MODE_PAD_TO_CACHE_LINE_ALIGNMENT = 0x1, +}; + +enum { + MLX5_QPC_MTU_256_BYTES = 0x1, + MLX5_QPC_MTU_512_BYTES = 0x2, + MLX5_QPC_MTU_1K_BYTES = 0x3, + MLX5_QPC_MTU_2K_BYTES = 0x4, + MLX5_QPC_MTU_4K_BYTES = 0x5, + MLX5_QPC_MTU_RAW_ETHERNET_QP = 0x7, +}; + +enum { + MLX5_QPC_ATOMIC_MODE_IB_SPEC = 0x1, + MLX5_QPC_ATOMIC_MODE_ONLY_8B = 0x2, + MLX5_QPC_ATOMIC_MODE_UP_TO_8B = 0x3, + MLX5_QPC_ATOMIC_MODE_UP_TO_16B = 0x4, + MLX5_QPC_ATOMIC_MODE_UP_TO_32B = 0x5, + MLX5_QPC_ATOMIC_MODE_UP_TO_64B = 0x6, + MLX5_QPC_ATOMIC_MODE_UP_TO_128B = 0x7, + MLX5_QPC_ATOMIC_MODE_UP_TO_256B = 0x8, +}; + +enum { + MLX5_QPC_CS_REQ_DISABLE = 0x0, + MLX5_QPC_CS_REQ_UP_TO_32B = 0x11, + MLX5_QPC_CS_REQ_UP_TO_64B = 0x22, +}; + +enum { + MLX5_QPC_CS_RES_DISABLE = 0x0, + MLX5_QPC_CS_RES_UP_TO_32B = 0x1, + MLX5_QPC_CS_RES_UP_TO_64B = 0x2, +}; + +struct mlx5_ifc_qpc_bits { + u8 state[0x4]; + u8 reserved_0[0x4]; + u8 st[0x8]; + u8 reserved_1[0x3]; + u8 pm_state[0x2]; + u8 reserved_2[0x7]; + u8 end_padding_mode[0x2]; + u8 reserved_3[0x2]; + + u8 wq_signature[0x1]; + u8 block_lb_mc[0x1]; + u8 atomic_like_write_en[0x1]; + u8 latency_sensitive[0x1]; + u8 reserved_4[0x1]; + u8 drain_sigerr[0x1]; + u8 reserved_5[0x2]; + u8 pd[0x18]; + + u8 mtu[0x3]; + u8 log_msg_max[0x5]; + u8 reserved_6[0x1]; + u8 log_rq_size[0x4]; + u8 log_rq_stride[0x3]; + u8 no_sq[0x1]; + u8 log_sq_size[0x4]; + u8 reserved_7[0x6]; + u8 rlky[0x1]; + u8 reserved_8[0x4]; + + u8 counter_set_id[0x8]; + u8 uar_page[0x18]; + + u8 reserved_9[0x8]; + u8 user_index[0x18]; + + u8 reserved_10[0x3]; + u8 log_page_size[0x5]; + u8 remote_qpn[0x18]; + + struct mlx5_ifc_ads_bits primary_address_path; + + struct mlx5_ifc_ads_bits secondary_address_path; + + u8 log_ack_req_freq[0x4]; + u8 reserved_11[0x4]; + u8 log_sra_max[0x3]; + u8 reserved_12[0x2]; + u8 retry_count[0x3]; + u8 rnr_retry[0x3]; + u8 reserved_13[0x1]; + u8 fre[0x1]; + u8 cur_rnr_retry[0x3]; + u8 cur_retry_count[0x3]; + u8 reserved_14[0x5]; + + u8 reserved_15[0x20]; + + u8 reserved_16[0x8]; + u8 next_send_psn[0x18]; + + u8 reserved_17[0x8]; + u8 cqn_snd[0x18]; + + u8 reserved_18[0x40]; + + u8 reserved_19[0x8]; + u8 last_acked_psn[0x18]; + + u8 reserved_20[0x8]; + u8 ssn[0x18]; + + u8 reserved_21[0x8]; + u8 log_rra_max[0x3]; + u8 reserved_22[0x1]; + u8 atomic_mode[0x4]; + u8 rre[0x1]; + u8 rwe[0x1]; + u8 rae[0x1]; + u8 reserved_23[0x1]; + u8 page_offset[0x6]; + u8 reserved_24[0x3]; + u8 cd_slave_receive[0x1]; + u8 cd_slave_send[0x1]; + u8 cd_master[0x1]; + + u8 reserved_25[0x3]; + u8 min_rnr_nak[0x5]; + u8 next_rcv_psn[0x18]; + + u8 reserved_26[0x8]; + u8 xrcd[0x18]; + + u8 reserved_27[0x8]; + u8 cqn_rcv[0x18]; + + u8 dbr_addr[0x40]; + + u8 q_key[0x20]; + + u8 reserved_28[0x5]; + u8 rq_type[0x3]; + u8 srqn_rmpn[0x18]; + + u8 reserved_29[0x8]; + u8 rmsn[0x18]; + + u8 hw_sq_wqebb_counter[0x10]; + u8 sw_sq_wqebb_counter[0x10]; + + u8 hw_rq_counter[0x20]; + + u8 sw_rq_counter[0x20]; + + u8 reserved_30[0x20]; + + u8 reserved_31[0xf]; + u8 cgs[0x1]; + u8 cs_req[0x8]; + u8 cs_res[0x8]; + + u8 dc_access_key[0x40]; + + u8 reserved_32[0xc0]; +}; + +struct mlx5_ifc_roce_addr_layout_bits { + u8 source_l3_address[16][0x8]; + + u8 reserved_0[0x3]; + u8 vlan_valid[0x1]; + u8 vlan_id[0xc]; + u8 source_mac_47_32[0x10]; + + u8 source_mac_31_0[0x20]; + + u8 reserved_1[0x14]; + u8 roce_l3_type[0x4]; + u8 roce_version[0x8]; + + u8 reserved_2[0x20]; +}; + +union mlx5_ifc_hca_cap_union_bits { + struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap; + struct mlx5_ifc_odp_cap_bits odp_cap; + struct mlx5_ifc_atomic_caps_bits atomic_caps; + struct mlx5_ifc_roce_cap_bits roce_cap; + struct mlx5_ifc_per_protocol_networking_offload_caps_bits per_protocol_networking_offload_caps; + struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap; + u8 reserved_0[0x8000]; +}; + +enum { + MLX5_FLOW_CONTEXT_ACTION_ALLOW = 0x1, + MLX5_FLOW_CONTEXT_ACTION_DROP = 0x2, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST = 0x4, +}; + +struct mlx5_ifc_flow_context_bits { + u8 reserved_0[0x20]; + + u8 group_id[0x20]; + + u8 reserved_1[0x8]; + u8 flow_tag[0x18]; + + u8 reserved_2[0x10]; + u8 action[0x10]; + + u8 reserved_3[0x8]; + u8 destination_list_size[0x18]; + + u8 reserved_4[0x160]; + + struct mlx5_ifc_fte_match_param_bits match_value; + + u8 reserved_5[0x600]; + + struct mlx5_ifc_dest_format_struct_bits destination[0]; +}; + +enum { + MLX5_XRC_SRQC_STATE_GOOD = 0x0, + MLX5_XRC_SRQC_STATE_ERROR = 0x1, +}; + +struct mlx5_ifc_xrc_srqc_bits { + u8 state[0x4]; + u8 log_xrc_srq_size[0x4]; + u8 reserved_0[0x18]; + + u8 wq_signature[0x1]; + u8 cont_srq[0x1]; + u8 reserved_1[0x1]; + u8 rlky[0x1]; + u8 basic_cyclic_rcv_wqe[0x1]; + u8 log_rq_stride[0x3]; + u8 xrcd[0x18]; + + u8 page_offset[0x6]; + u8 reserved_2[0x2]; + u8 cqn[0x18]; + + u8 reserved_3[0x20]; + + u8 user_index_equal_xrc_srqn[0x1]; + u8 reserved_4[0x1]; + u8 log_page_size[0x6]; + u8 user_index[0x18]; + + u8 reserved_5[0x20]; + + u8 reserved_6[0x8]; + u8 pd[0x18]; + + u8 lwm[0x10]; + u8 wqe_cnt[0x10]; + + u8 reserved_7[0x40]; + + u8 db_record_addr_h[0x20]; + + u8 db_record_addr_l[0x1e]; + u8 reserved_8[0x2]; + + u8 reserved_9[0x80]; +}; + +struct mlx5_ifc_traffic_counter_bits { + u8 packets[0x40]; + + u8 octets[0x40]; +}; + +struct mlx5_ifc_tisc_bits { + u8 reserved_0[0xc]; + u8 prio[0x4]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x100]; + + u8 reserved_3[0x8]; + u8 transport_domain[0x18]; + + u8 reserved_4[0x3c0]; +}; + +enum { + MLX5_TIRC_DISP_TYPE_DIRECT = 0x0, + MLX5_TIRC_DISP_TYPE_INDIRECT = 0x1, +}; + +enum { + MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO = 0x1, + MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO = 0x2, +}; + +enum { + MLX5_TIRC_RX_HASH_FN_HASH_NONE = 0x0, + MLX5_TIRC_RX_HASH_FN_HASH_INVERTED_XOR8 = 0x1, + MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ = 0x2, +}; + +enum { + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_ = 0x1, + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST_ = 0x2, +}; + +struct mlx5_ifc_tirc_bits { + u8 reserved_0[0x20]; + + u8 disp_type[0x4]; + u8 reserved_1[0x1c]; + + u8 reserved_2[0x40]; + + u8 reserved_3[0x4]; + u8 lro_timeout_period_usecs[0x10]; + u8 lro_enable_mask[0x4]; + u8 lro_max_ip_payload_size[0x8]; + + u8 reserved_4[0x40]; + + u8 reserved_5[0x8]; + u8 inline_rqn[0x18]; + + u8 rx_hash_symmetric[0x1]; + u8 reserved_6[0x1]; + u8 tunneled_offload_en[0x1]; + u8 reserved_7[0x5]; + u8 indirect_table[0x18]; + + u8 rx_hash_fn[0x4]; + u8 reserved_8[0x2]; + u8 self_lb_block[0x2]; + u8 transport_domain[0x18]; + + u8 rx_hash_toeplitz_key[10][0x20]; + + struct mlx5_ifc_rx_hash_field_select_bits rx_hash_field_selector_outer; + + struct mlx5_ifc_rx_hash_field_select_bits rx_hash_field_selector_inner; + + u8 reserved_9[0x4c0]; +}; + +enum { + MLX5_SRQC_STATE_GOOD = 0x0, + MLX5_SRQC_STATE_ERROR = 0x1, +}; + +struct mlx5_ifc_srqc_bits { + u8 state[0x4]; + u8 log_srq_size[0x4]; + u8 reserved_0[0x18]; + + u8 wq_signature[0x1]; + u8 cont_srq[0x1]; + u8 reserved_1[0x1]; + u8 rlky[0x1]; + u8 reserved_2[0x1]; + u8 log_rq_stride[0x3]; + u8 xrcd[0x18]; + + u8 page_offset[0x6]; + u8 reserved_3[0x2]; + u8 cqn[0x18]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0x2]; + u8 log_page_size[0x6]; + u8 reserved_6[0x18]; + + u8 reserved_7[0x20]; + + u8 reserved_8[0x8]; + u8 pd[0x18]; + + u8 lwm[0x10]; + u8 wqe_cnt[0x10]; + + u8 reserved_9[0x40]; + + u8 db_record_addr_h[0x20]; + + u8 db_record_addr_l[0x1e]; + u8 reserved_10[0x2]; + + u8 reserved_11[0x80]; +}; + +enum { + MLX5_SQC_STATE_RST = 0x0, + MLX5_SQC_STATE_RDY = 0x1, + MLX5_SQC_STATE_ERR = 0x3, +}; + +struct mlx5_ifc_sqc_bits { + u8 rlky[0x1]; + u8 cd_master[0x1]; + u8 fre[0x1]; + u8 flush_in_error_en[0x1]; + u8 reserved_0[0x4]; + u8 state[0x4]; + u8 reserved_1[0x14]; + + u8 reserved_2[0x8]; + u8 user_index[0x18]; + + u8 reserved_3[0x8]; + u8 cqn[0x18]; + + u8 reserved_4[0xa0]; + + u8 tis_lst_sz[0x10]; + u8 reserved_5[0x10]; + + u8 reserved_6[0x40]; + + u8 reserved_7[0x8]; + u8 tis_num_0[0x18]; + + struct mlx5_ifc_wq_bits wq; +}; + +struct mlx5_ifc_rqtc_bits { + u8 reserved_0[0xa0]; + + u8 reserved_1[0x10]; + u8 rqt_max_size[0x10]; + + u8 reserved_2[0x10]; + u8 rqt_actual_size[0x10]; + + u8 reserved_3[0x6a0]; + + struct mlx5_ifc_rq_num_bits rq_num[0]; +}; + +enum { + MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE = 0x0, + MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_RMP = 0x1, +}; + +enum { + MLX5_RQC_STATE_RST = 0x0, + MLX5_RQC_STATE_RDY = 0x1, + MLX5_RQC_STATE_ERR = 0x3, +}; + +struct mlx5_ifc_rqc_bits { + u8 rlky[0x1]; + u8 reserved_0[0x2]; + u8 vsd[0x1]; + u8 mem_rq_type[0x4]; + u8 state[0x4]; + u8 reserved_1[0x1]; + u8 flush_in_error_en[0x1]; + u8 reserved_2[0x12]; + + u8 reserved_3[0x8]; + u8 user_index[0x18]; + + u8 reserved_4[0x8]; + u8 cqn[0x18]; + + u8 counter_set_id[0x8]; + u8 reserved_5[0x18]; + + u8 reserved_6[0x8]; + u8 rmpn[0x18]; + + u8 reserved_7[0xe0]; + + struct mlx5_ifc_wq_bits wq; +}; + +enum { + MLX5_RMPC_STATE_RDY = 0x1, + MLX5_RMPC_STATE_ERR = 0x3, +}; + +struct mlx5_ifc_rmpc_bits { + u8 reserved_0[0x8]; + u8 state[0x4]; + u8 reserved_1[0x14]; + + u8 basic_cyclic_rcv_wqe[0x1]; + u8 reserved_2[0x1f]; + + u8 reserved_3[0x140]; + + struct mlx5_ifc_wq_bits wq; +}; + +enum { + MLX5_NIC_VPORT_CONTEXT_ALLOWED_LIST_TYPE_CURRENT_UC_MAC_ADDRESS = 0x0, +}; + +struct mlx5_ifc_nic_vport_context_bits { + u8 reserved_0[0x1f]; + u8 roce_en[0x1]; + + u8 reserved_1[0x760]; + + u8 reserved_2[0x5]; + u8 allowed_list_type[0x3]; + u8 reserved_3[0xc]; + u8 allowed_list_size[0xc]; + + struct mlx5_ifc_mac_address_layout_bits permanent_address; + + u8 reserved_4[0x20]; + + u8 current_uc_mac_address[0][0x40]; +}; + +enum { + MLX5_MKC_ACCESS_MODE_PA = 0x0, + MLX5_MKC_ACCESS_MODE_MTT = 0x1, + MLX5_MKC_ACCESS_MODE_KLMS = 0x2, +}; + +struct mlx5_ifc_mkc_bits { + u8 reserved_0[0x1]; + u8 free[0x1]; + u8 reserved_1[0xd]; + u8 small_fence_on_rdma_read_response[0x1]; + u8 umr_en[0x1]; + u8 a[0x1]; + u8 rw[0x1]; + u8 rr[0x1]; + u8 lw[0x1]; + u8 lr[0x1]; + u8 access_mode[0x2]; + u8 reserved_2[0x8]; + + u8 qpn[0x18]; + u8 mkey_7_0[0x8]; + + u8 reserved_3[0x20]; + + u8 length64[0x1]; + u8 bsf_en[0x1]; + u8 sync_umr[0x1]; + u8 reserved_4[0x2]; + u8 expected_sigerr_count[0x1]; + u8 reserved_5[0x1]; + u8 en_rinval[0x1]; + u8 pd[0x18]; + + u8 start_addr[0x40]; + + u8 len[0x40]; + + u8 bsf_octword_size[0x20]; + + u8 reserved_6[0x80]; + + u8 translations_octword_size[0x20]; + + u8 reserved_7[0x1b]; + u8 log_page_size[0x5]; + + u8 reserved_8[0x20]; +}; + +struct mlx5_ifc_pkey_bits { + u8 reserved_0[0x10]; + u8 pkey[0x10]; +}; + +struct mlx5_ifc_array128_auto_bits { + u8 array128_auto[16][0x8]; +}; + +struct mlx5_ifc_hca_vport_context_bits { + u8 field_select[0x20]; + + u8 reserved_0[0xe0]; + + u8 sm_virt_aware[0x1]; + u8 has_smi[0x1]; + u8 has_raw[0x1]; + u8 grh_required[0x1]; + u8 reserved_1[0x10]; + u8 port_state_policy[0x4]; + u8 phy_port_state[0x4]; + u8 vport_state[0x4]; + + u8 reserved_2[0x60]; + + u8 port_guid[0x40]; + + u8 node_guid[0x40]; + + u8 cap_mask1[0x20]; + + u8 cap_mask1_field_select[0x20]; + + u8 cap_mask2[0x20]; + + u8 cap_mask2_field_select[0x20]; + + u8 reserved_3[0x80]; + + u8 lid[0x10]; + u8 reserved_4[0x4]; + u8 init_type_reply[0x4]; + u8 lmc[0x3]; + u8 subnet_timeout[0x5]; + + u8 sm_lid[0x10]; + u8 sm_sl[0x4]; + u8 reserved_5[0xc]; + + u8 qkey_violation_counter[0x10]; + u8 pkey_violation_counter[0x10]; + + u8 reserved_6[0xca0]; +}; + +enum { + MLX5_EQC_STATUS_OK = 0x0, + MLX5_EQC_STATUS_EQ_WRITE_FAILURE = 0xa, +}; + +enum { + MLX5_EQC_ST_ARMED = 0x9, + MLX5_EQC_ST_FIRED = 0xa, +}; + +struct mlx5_ifc_eqc_bits { + u8 status[0x4]; + u8 reserved_0[0x9]; + u8 ec[0x1]; + u8 oi[0x1]; + u8 reserved_1[0x5]; + u8 st[0x4]; + u8 reserved_2[0x8]; + + u8 reserved_3[0x20]; + + u8 reserved_4[0x14]; + u8 page_offset[0x6]; + u8 reserved_5[0x6]; + + u8 reserved_6[0x3]; + u8 log_eq_size[0x5]; + u8 uar_page[0x18]; + + u8 reserved_7[0x20]; + + u8 reserved_8[0x18]; + u8 intr[0x8]; + + u8 reserved_9[0x3]; + u8 log_page_size[0x5]; + u8 reserved_10[0x18]; + + u8 reserved_11[0x60]; + + u8 reserved_12[0x8]; + u8 consumer_counter[0x18]; + + u8 reserved_13[0x8]; + u8 producer_counter[0x18]; + + u8 reserved_14[0x80]; +}; + +enum { + MLX5_DCTC_STATE_ACTIVE = 0x0, + MLX5_DCTC_STATE_DRAINING = 0x1, + MLX5_DCTC_STATE_DRAINED = 0x2, +}; + +enum { + MLX5_DCTC_CS_RES_DISABLE = 0x0, + MLX5_DCTC_CS_RES_NA = 0x1, + MLX5_DCTC_CS_RES_UP_TO_64B = 0x2, +}; + +enum { + MLX5_DCTC_MTU_256_BYTES = 0x1, + MLX5_DCTC_MTU_512_BYTES = 0x2, + MLX5_DCTC_MTU_1K_BYTES = 0x3, + MLX5_DCTC_MTU_2K_BYTES = 0x4, + MLX5_DCTC_MTU_4K_BYTES = 0x5, +}; + +struct mlx5_ifc_dctc_bits { + u8 reserved_0[0x4]; + u8 state[0x4]; + u8 reserved_1[0x18]; + + u8 reserved_2[0x8]; + u8 user_index[0x18]; + + u8 reserved_3[0x8]; + u8 cqn[0x18]; + + u8 counter_set_id[0x8]; + u8 atomic_mode[0x4]; + u8 rre[0x1]; + u8 rwe[0x1]; + u8 rae[0x1]; + u8 atomic_like_write_en[0x1]; + u8 latency_sensitive[0x1]; + u8 rlky[0x1]; + u8 free_ar[0x1]; + u8 reserved_4[0xd]; + + u8 reserved_5[0x8]; + u8 cs_res[0x8]; + u8 reserved_6[0x3]; + u8 min_rnr_nak[0x5]; + u8 reserved_7[0x8]; + + u8 reserved_8[0x8]; + u8 srqn[0x18]; + + u8 reserved_9[0x8]; + u8 pd[0x18]; + + u8 tclass[0x8]; + u8 reserved_10[0x4]; + u8 flow_label[0x14]; + + u8 dc_access_key[0x40]; + + u8 reserved_11[0x5]; + u8 mtu[0x3]; + u8 port[0x8]; + u8 pkey_index[0x10]; + + u8 reserved_12[0x8]; + u8 my_addr_index[0x8]; + u8 reserved_13[0x8]; + u8 hop_limit[0x8]; + + u8 dc_access_key_violation_count[0x20]; + + u8 reserved_14[0x14]; + u8 dei_cfi[0x1]; + u8 eth_prio[0x3]; + u8 ecn[0x2]; + u8 dscp[0x6]; + + u8 reserved_15[0x40]; +}; + +enum { + MLX5_CQC_STATUS_OK = 0x0, + MLX5_CQC_STATUS_CQ_OVERFLOW = 0x9, + MLX5_CQC_STATUS_CQ_WRITE_FAIL = 0xa, +}; + +enum { + MLX5_CQC_CQE_SZ_64_BYTES = 0x0, + MLX5_CQC_CQE_SZ_128_BYTES = 0x1, +}; + +enum { + MLX5_CQC_ST_SOLICITED_NOTIFICATION_REQUEST_ARMED = 0x6, + MLX5_CQC_ST_NOTIFICATION_REQUEST_ARMED = 0x9, + MLX5_CQC_ST_FIRED = 0xa, +}; + +struct mlx5_ifc_cqc_bits { + u8 status[0x4]; + u8 reserved_0[0x4]; + u8 cqe_sz[0x3]; + u8 cc[0x1]; + u8 reserved_1[0x1]; + u8 scqe_break_moderation_en[0x1]; + u8 oi[0x1]; + u8 reserved_2[0x2]; + u8 cqe_zip_en[0x1]; + u8 mini_cqe_res_format[0x2]; + u8 st[0x4]; + u8 reserved_3[0x8]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0x14]; + u8 page_offset[0x6]; + u8 reserved_6[0x6]; + + u8 reserved_7[0x3]; + u8 log_cq_size[0x5]; + u8 uar_page[0x18]; + + u8 reserved_8[0x4]; + u8 cq_period[0xc]; + u8 cq_max_count[0x10]; + + u8 reserved_9[0x18]; + u8 c_eqn[0x8]; + + u8 reserved_10[0x3]; + u8 log_page_size[0x5]; + u8 reserved_11[0x18]; + + u8 reserved_12[0x20]; + + u8 reserved_13[0x8]; + u8 last_notified_index[0x18]; + + u8 reserved_14[0x8]; + u8 last_solicit_index[0x18]; + + u8 reserved_15[0x8]; + u8 consumer_counter[0x18]; + + u8 reserved_16[0x8]; + u8 producer_counter[0x18]; + + u8 reserved_17[0x40]; + + u8 dbr_addr[0x40]; +}; + +union mlx5_ifc_cong_control_roce_ecn_auto_bits { + struct mlx5_ifc_cong_control_802_1qau_rp_bits cong_control_802_1qau_rp; + struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits cong_control_r_roce_ecn_rp; + struct mlx5_ifc_cong_control_r_roce_ecn_np_bits cong_control_r_roce_ecn_np; + u8 reserved_0[0x800]; +}; + +struct mlx5_ifc_query_adapter_param_block_bits { + u8 reserved_0[0xe0]; + + u8 reserved_1[0x10]; + u8 vsd_vendor_id[0x10]; + + u8 vsd[208][0x8]; + + u8 vsd_contd_psid[16][0x8]; +}; + +union mlx5_ifc_modify_field_select_resize_field_select_auto_bits { + struct mlx5_ifc_modify_field_select_bits modify_field_select; + struct mlx5_ifc_resize_field_select_bits resize_field_select; + u8 reserved_0[0x20]; +}; + +union mlx5_ifc_field_select_802_1_r_roce_auto_bits { + struct mlx5_ifc_field_select_802_1qau_rp_bits field_select_802_1qau_rp; + struct mlx5_ifc_field_select_r_roce_rp_bits field_select_r_roce_rp; + struct mlx5_ifc_field_select_r_roce_np_bits field_select_r_roce_np; + u8 reserved_0[0x20]; +}; + +union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { + struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits eth_802_3_cntrs_grp_data_layout; + struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits eth_2863_cntrs_grp_data_layout; + struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; + struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits eth_3635_cntrs_grp_data_layout; + struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout; + struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout; + struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout; + struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; + u8 reserved_0[0x7c0]; +}; + +union mlx5_ifc_event_auto_bits { + struct mlx5_ifc_comp_event_bits comp_event; + struct mlx5_ifc_dct_events_bits dct_events; + struct mlx5_ifc_qp_events_bits qp_events; + struct mlx5_ifc_wqe_associated_page_fault_event_bits wqe_associated_page_fault_event; + struct mlx5_ifc_rdma_page_fault_event_bits rdma_page_fault_event; + struct mlx5_ifc_cq_error_bits cq_error; + struct mlx5_ifc_dropped_packet_logged_bits dropped_packet_logged; + struct mlx5_ifc_port_state_change_event_bits port_state_change_event; + struct mlx5_ifc_gpio_event_bits gpio_event; + struct mlx5_ifc_db_bf_congestion_event_bits db_bf_congestion_event; + struct mlx5_ifc_stall_vl_event_bits stall_vl_event; + struct mlx5_ifc_cmd_inter_comp_event_bits cmd_inter_comp_event; + u8 reserved_0[0xe0]; +}; + +struct mlx5_ifc_health_buffer_bits { + u8 reserved_0[0x100]; + + u8 assert_existptr[0x20]; + + u8 assert_callra[0x20]; + + u8 reserved_1[0x40]; + + u8 fw_version[0x20]; + + u8 hw_id[0x20]; + + u8 reserved_2[0x20]; + + u8 irisc_index[0x8]; + u8 synd[0x8]; + u8 ext_synd[0x10]; +}; + +struct mlx5_ifc_register_loopback_control_bits { + u8 no_lb[0x1]; + u8 reserved_0[0x7]; + u8 port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x60]; +}; + +struct mlx5_ifc_teardown_hca_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +enum { + MLX5_TEARDOWN_HCA_IN_PROFILE_GRACEFUL_CLOSE = 0x0, + MLX5_TEARDOWN_HCA_IN_PROFILE_PANIC_CLOSE = 0x1, +}; + +struct mlx5_ifc_teardown_hca_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 profile[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_sqerr2rts_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_sqerr2rts_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_sqd2rts_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_sqd2rts_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_set_roce_address_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_roce_address_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 roce_address_index[0x10]; + u8 reserved_2[0x10]; + + u8 reserved_3[0x20]; + + struct mlx5_ifc_roce_addr_layout_bits roce_address; +}; + +struct mlx5_ifc_set_mad_demux_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +enum { + MLX5_SET_MAD_DEMUX_IN_DEMUX_MODE_PASS_ALL = 0x0, + MLX5_SET_MAD_DEMUX_IN_DEMUX_MODE_SELECTIVE = 0x2, +}; + +struct mlx5_ifc_set_mad_demux_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x6]; + u8 demux_mode[0x2]; + u8 reserved_4[0x18]; +}; + +struct mlx5_ifc_set_l2_table_entry_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_l2_table_entry_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x60]; + + u8 reserved_3[0x8]; + u8 table_index[0x18]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0x13]; + u8 vlan_valid[0x1]; + u8 vlan[0xc]; + + struct mlx5_ifc_mac_address_layout_bits mac_address; + + u8 reserved_6[0xc0]; +}; + +struct mlx5_ifc_set_issi_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_issi_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 current_issi[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_set_hca_cap_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_hca_cap_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + union mlx5_ifc_hca_cap_union_bits capability; +}; + +struct mlx5_ifc_set_fte_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_fte_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x40]; + + u8 flow_index[0x20]; + + u8 reserved_6[0xe0]; + + struct mlx5_ifc_flow_context_bits flow_context; +}; + +struct mlx5_ifc_rts2rts_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_rts2rts_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_rtr2rts_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_rtr2rts_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_rst2init_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_rst2init_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_query_xrc_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry; + + u8 reserved_2[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_query_xrc_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 xrc_srqn[0x18]; + + u8 reserved_3[0x20]; +}; + +enum { + MLX5_QUERY_VPORT_STATE_OUT_STATE_DOWN = 0x0, + MLX5_QUERY_VPORT_STATE_OUT_STATE_UP = 0x1, +}; + +struct mlx5_ifc_query_vport_state_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; + + u8 reserved_2[0x18]; + u8 admin_state[0x4]; + u8 state[0x4]; +}; + +enum { + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT = 0x0, +}; + +struct mlx5_ifc_query_vport_state_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_vport_counter_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_traffic_counter_bits received_errors; + + struct mlx5_ifc_traffic_counter_bits transmit_errors; + + struct mlx5_ifc_traffic_counter_bits received_ib_unicast; + + struct mlx5_ifc_traffic_counter_bits transmitted_ib_unicast; + + struct mlx5_ifc_traffic_counter_bits received_ib_multicast; + + struct mlx5_ifc_traffic_counter_bits transmitted_ib_multicast; + + struct mlx5_ifc_traffic_counter_bits received_eth_broadcast; + + struct mlx5_ifc_traffic_counter_bits transmitted_eth_broadcast; + + struct mlx5_ifc_traffic_counter_bits received_eth_unicast; + + struct mlx5_ifc_traffic_counter_bits transmitted_eth_unicast; + + struct mlx5_ifc_traffic_counter_bits received_eth_multicast; + + struct mlx5_ifc_traffic_counter_bits transmitted_eth_multicast; + + u8 reserved_2[0xa00]; +}; + +enum { + MLX5_QUERY_VPORT_COUNTER_IN_OP_MOD_VPORT_COUNTERS = 0x0, +}; + +struct mlx5_ifc_query_vport_counter_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x60]; + + u8 clear[0x1]; + u8 reserved_4[0x1f]; + + u8 reserved_5[0x20]; +}; + +struct mlx5_ifc_query_tis_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_tisc_bits tis_context; +}; + +struct mlx5_ifc_query_tis_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tisn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_tir_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xc0]; + + struct mlx5_ifc_tirc_bits tir_context; +}; + +struct mlx5_ifc_query_tir_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tirn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_srqc_bits srq_context_entry; + + u8 reserved_2[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_query_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 srqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_sq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xc0]; + + struct mlx5_ifc_sqc_bits sq_context; +}; + +struct mlx5_ifc_query_sq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 sqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_special_contexts_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; + + u8 resd_lkey[0x20]; +}; + +struct mlx5_ifc_query_special_contexts_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_query_rqt_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xc0]; + + struct mlx5_ifc_rqtc_bits rqt_context; +}; + +struct mlx5_ifc_query_rqt_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rqtn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_rq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xc0]; + + struct mlx5_ifc_rqc_bits rq_context; +}; + +struct mlx5_ifc_query_rq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_roce_address_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_roce_addr_layout_bits roce_address; +}; + +struct mlx5_ifc_query_roce_address_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 roce_address_index[0x10]; + u8 reserved_2[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_rmp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xc0]; + + struct mlx5_ifc_rmpc_bits rmp_context; +}; + +struct mlx5_ifc_query_rmp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rmpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 opt_param_mask[0x20]; + + u8 reserved_2[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_3[0x80]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_query_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_q_counter_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 rx_write_requests[0x20]; + + u8 reserved_2[0x20]; + + u8 rx_read_requests[0x20]; + + u8 reserved_3[0x20]; + + u8 rx_atomic_requests[0x20]; + + u8 reserved_4[0x20]; + + u8 rx_dct_connect[0x20]; + + u8 reserved_5[0x20]; + + u8 out_of_buffer[0x20]; + + u8 reserved_6[0x20]; + + u8 out_of_sequence[0x20]; + + u8 reserved_7[0x620]; +}; + +struct mlx5_ifc_query_q_counter_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x80]; + + u8 clear[0x1]; + u8 reserved_3[0x1f]; + + u8 reserved_4[0x18]; + u8 counter_set_id[0x8]; +}; + +struct mlx5_ifc_query_pages_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x10]; + u8 function_id[0x10]; + + u8 num_pages[0x20]; +}; + +enum { + MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES = 0x1, + MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES = 0x2, + MLX5_QUERY_PAGES_IN_OP_MOD_REGULAR_PAGES = 0x3, +}; + +struct mlx5_ifc_query_pages_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 function_id[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_nic_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_nic_vport_context_bits nic_vport_context; +}; + +struct mlx5_ifc_query_nic_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x5]; + u8 allowed_list_type[0x3]; + u8 reserved_4[0x18]; +}; + +struct mlx5_ifc_query_mkey_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_mkc_bits memory_key_mkey_entry; + + u8 reserved_2[0x600]; + + u8 bsf0_klm0_pas_mtt0_1[16][0x8]; + + u8 bsf1_klm1_pas_mtt2_3[16][0x8]; +}; + +struct mlx5_ifc_query_mkey_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 mkey_index[0x18]; + + u8 pg_access[0x1]; + u8 reserved_3[0x1f]; +}; + +struct mlx5_ifc_query_mad_demux_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 mad_dumux_parameters_block[0x20]; +}; + +struct mlx5_ifc_query_mad_demux_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_query_l2_table_entry_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xa0]; + + u8 reserved_2[0x13]; + u8 vlan_valid[0x1]; + u8 vlan[0xc]; + + struct mlx5_ifc_mac_address_layout_bits mac_address; + + u8 reserved_3[0xc0]; +}; + +struct mlx5_ifc_query_l2_table_entry_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x60]; + + u8 reserved_3[0x8]; + u8 table_index[0x18]; + + u8 reserved_4[0x140]; +}; + +struct mlx5_ifc_query_issi_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x10]; + u8 current_issi[0x10]; + + u8 reserved_2[0xa0]; + + u8 supported_issi_reserved[76][0x8]; + u8 supported_issi_dw0[0x20]; +}; + +struct mlx5_ifc_query_issi_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_query_hca_vport_pkey_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_pkey_bits pkey[0]; +}; + +struct mlx5_ifc_query_hca_vport_pkey_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x10]; + u8 pkey_index[0x10]; +}; + +struct mlx5_ifc_query_hca_vport_gid_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; + + u8 gids_num[0x10]; + u8 reserved_2[0x10]; + + struct mlx5_ifc_array128_auto_bits gid[0]; +}; + +struct mlx5_ifc_query_hca_vport_gid_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x10]; + u8 gid_index[0x10]; +}; + +struct mlx5_ifc_query_hca_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_hca_vport_context_bits hca_vport_context; +}; + +struct mlx5_ifc_query_hca_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_hca_cap_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + union mlx5_ifc_hca_cap_union_bits capability; +}; + +struct mlx5_ifc_query_hca_cap_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_query_flow_table_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x80]; + + u8 reserved_2[0x8]; + u8 level[0x8]; + u8 reserved_3[0x8]; + u8 log_size[0x8]; + + u8 reserved_4[0x120]; +}; + +struct mlx5_ifc_query_flow_table_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x140]; +}; + +struct mlx5_ifc_query_fte_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x1c0]; + + struct mlx5_ifc_flow_context_bits flow_context; +}; + +struct mlx5_ifc_query_fte_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x40]; + + u8 flow_index[0x20]; + + u8 reserved_6[0xe0]; +}; + +enum { + MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, + MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, + MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_INNER_HEADERS = 0x2, +}; + +struct mlx5_ifc_query_flow_group_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xa0]; + + u8 start_flow_index[0x20]; + + u8 reserved_2[0x20]; + + u8 end_flow_index[0x20]; + + u8 reserved_3[0xa0]; + + u8 reserved_4[0x18]; + u8 match_criteria_enable[0x8]; + + struct mlx5_ifc_fte_match_param_bits match_criteria; + + u8 reserved_5[0xe00]; +}; + +struct mlx5_ifc_query_flow_group_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 group_id[0x20]; + + u8 reserved_5[0x120]; +}; + +struct mlx5_ifc_query_eq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_eqc_bits eq_context_entry; + + u8 reserved_2[0x40]; + + u8 event_bitmask[0x40]; + + u8 reserved_3[0x580]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_query_eq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 eq_number[0x8]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_dct_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_dctc_bits dct_context_entry; + + u8 reserved_2[0x180]; +}; + +struct mlx5_ifc_query_dct_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 dctn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_cq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_cqc_bits cq_context; + + u8 reserved_2[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_query_cq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 cqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_cong_status_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; + + u8 enable[0x1]; + u8 tag_enable[0x1]; + u8 reserved_2[0x1e]; +}; + +struct mlx5_ifc_query_cong_status_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 priority[0x4]; + u8 cong_protocol[0x4]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_cong_statistics_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 cur_flows[0x20]; + + u8 sum_flows[0x20]; + + u8 cnp_ignored_high[0x20]; + + u8 cnp_ignored_low[0x20]; + + u8 cnp_handled_high[0x20]; + + u8 cnp_handled_low[0x20]; + + u8 reserved_2[0x100]; + + u8 time_stamp_high[0x20]; + + u8 time_stamp_low[0x20]; + + u8 accumulators_period[0x20]; + + u8 ecn_marked_roce_packets_high[0x20]; + + u8 ecn_marked_roce_packets_low[0x20]; + + u8 cnps_sent_high[0x20]; + + u8 cnps_sent_low[0x20]; + + u8 reserved_3[0x560]; +}; + +struct mlx5_ifc_query_cong_statistics_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 clear[0x1]; + u8 reserved_2[0x1f]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_cong_params_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters; +}; + +struct mlx5_ifc_query_cong_params_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x1c]; + u8 cong_protocol[0x4]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_adapter_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_query_adapter_param_block_bits query_adapter_struct; +}; + +struct mlx5_ifc_query_adapter_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_qp_2rst_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_qp_2rst_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_qp_2err_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_qp_2err_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_page_fault_resume_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_page_fault_resume_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 error[0x1]; + u8 reserved_2[0x4]; + u8 rdma[0x1]; + u8 read_write[0x1]; + u8 req_res[0x1]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_nop_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_nop_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_modify_vport_state_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_vport_state_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x18]; + u8 admin_state[0x4]; + u8 reserved_4[0x4]; +}; + +struct mlx5_ifc_modify_tis_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_tis_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tisn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_tisc_bits ctx; +}; + +struct mlx5_ifc_modify_tir_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_tir_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tirn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_tirc_bits ctx; +}; + +struct mlx5_ifc_modify_sq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_sq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 sq_state[0x4]; + u8 reserved_2[0x4]; + u8 sqn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_sqc_bits ctx; +}; + +struct mlx5_ifc_modify_rqt_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_rqt_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rqtn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_rqtc_bits ctx; +}; + +struct mlx5_ifc_modify_rq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_rq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 rq_state[0x4]; + u8 reserved_2[0x4]; + u8 rqn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_rqc_bits ctx; +}; + +struct mlx5_ifc_modify_rmp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_rmp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 rmp_state[0x4]; + u8 reserved_2[0x4]; + u8 rmpn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_rmpc_bits ctx; +}; + +struct mlx5_ifc_modify_nic_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_nic_vport_field_select_bits { + u8 reserved_0[0x1c]; + u8 permanent_address[0x1]; + u8 addresses_list[0x1]; + u8 roce_en[0x1]; + u8 reserved_1[0x1]; +}; + +struct mlx5_ifc_modify_nic_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + struct mlx5_ifc_modify_nic_vport_field_select_bits field_select; + + u8 reserved_3[0x780]; + + struct mlx5_ifc_nic_vport_context_bits nic_vport_context; +}; + +struct mlx5_ifc_modify_hca_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_hca_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x20]; + + struct mlx5_ifc_hca_vport_context_bits hca_vport_context; +}; + +struct mlx5_ifc_modify_cq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +enum { + MLX5_MODIFY_CQ_IN_OP_MOD_MODIFY_CQ = 0x0, + MLX5_MODIFY_CQ_IN_OP_MOD_RESIZE_CQ = 0x1, +}; + +struct mlx5_ifc_modify_cq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 cqn[0x18]; + + union mlx5_ifc_modify_field_select_resize_field_select_auto_bits modify_field_select_resize_field_select; + + struct mlx5_ifc_cqc_bits cq_context; + + u8 reserved_3[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_modify_cong_status_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_cong_status_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 priority[0x4]; + u8 cong_protocol[0x4]; + + u8 enable[0x1]; + u8 tag_enable[0x1]; + u8 reserved_3[0x1e]; +}; + +struct mlx5_ifc_modify_cong_params_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_cong_params_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x1c]; + u8 cong_protocol[0x4]; + + union mlx5_ifc_field_select_802_1_r_roce_auto_bits field_select; + + u8 reserved_3[0x80]; + + union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters; +}; + +struct mlx5_ifc_manage_pages_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 output_num_entries[0x20]; + + u8 reserved_1[0x20]; + + u8 pas[0][0x40]; +}; + +enum { + MLX5_MANAGE_PAGES_IN_OP_MOD_ALLOCATION_FAIL = 0x0, + MLX5_MANAGE_PAGES_IN_OP_MOD_ALLOCATION_SUCCESS = 0x1, + MLX5_MANAGE_PAGES_IN_OP_MOD_HCA_RETURN_PAGES = 0x2, +}; + +struct mlx5_ifc_manage_pages_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 function_id[0x10]; + + u8 input_num_entries[0x20]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_mad_ifc_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 response_mad_packet[256][0x8]; +}; + +struct mlx5_ifc_mad_ifc_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 remote_lid[0x10]; + u8 reserved_2[0x8]; + u8 port[0x8]; + + u8 reserved_3[0x20]; + + u8 mad[256][0x8]; +}; + +struct mlx5_ifc_init_hca_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_init_hca_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_init2rtr_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_init2rtr_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_init2init_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_init2init_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_get_dropped_packet_log_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 packet_headers_log[128][0x8]; + + u8 packet_syndrome[64][0x8]; +}; + +struct mlx5_ifc_get_dropped_packet_log_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_gen_eqe_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 eq_number[0x8]; + + u8 reserved_3[0x20]; + + u8 eqe[64][0x8]; +}; + +struct mlx5_ifc_gen_eq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_enable_hca_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; +}; + +struct mlx5_ifc_enable_hca_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 function_id[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_drain_dct_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_drain_dct_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 dctn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_disable_hca_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; +}; + +struct mlx5_ifc_disable_hca_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 function_id[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_detach_from_mcg_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_detach_from_mcg_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 multicast_gid[16][0x8]; +}; + +struct mlx5_ifc_destroy_xrc_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_xrc_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 xrc_srqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_tis_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_tis_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tisn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_tir_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_tir_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tirn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 srqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_sq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_sq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 sqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_rqt_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_rqt_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rqtn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_rq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_rq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_rmp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_rmp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rmpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_psv_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_psv_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 psvn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_mkey_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_mkey_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 mkey_index[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_flow_table_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_flow_table_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x140]; +}; + +struct mlx5_ifc_destroy_flow_group_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_flow_group_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 group_id[0x20]; + + u8 reserved_5[0x120]; +}; + +struct mlx5_ifc_destroy_eq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_eq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 eq_number[0x8]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_dct_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_dct_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 dctn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_cq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_cq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 cqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_delete_vxlan_udp_dport_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_delete_vxlan_udp_dport_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x10]; + u8 vxlan_udp_port[0x10]; +}; + +struct mlx5_ifc_delete_l2_table_entry_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_delete_l2_table_entry_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x60]; + + u8 reserved_3[0x8]; + u8 table_index[0x18]; + + u8 reserved_4[0x140]; +}; + +struct mlx5_ifc_delete_fte_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_delete_fte_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x40]; + + u8 flow_index[0x20]; + + u8 reserved_6[0xe0]; +}; + +struct mlx5_ifc_dealloc_xrcd_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_dealloc_xrcd_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 xrcd[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_dealloc_uar_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_dealloc_uar_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 uar[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_dealloc_transport_domain_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_dealloc_transport_domain_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 transport_domain[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_dealloc_q_counter_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_dealloc_q_counter_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 counter_set_id[0x8]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_dealloc_pd_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_dealloc_pd_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 pd[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_create_xrc_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 xrc_srqn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_xrc_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry; + + u8 reserved_3[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_create_tis_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 tisn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_tis_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_tisc_bits ctx; +}; + +struct mlx5_ifc_create_tir_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 tirn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_tir_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_tirc_bits ctx; +}; + +struct mlx5_ifc_create_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 srqn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + struct mlx5_ifc_srqc_bits srq_context_entry; + + u8 reserved_3[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_create_sq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 sqn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_sq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_sqc_bits ctx; +}; + +struct mlx5_ifc_create_rqt_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 rqtn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_rqt_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_rqtc_bits rqt_context; +}; + +struct mlx5_ifc_create_rq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 rqn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_rq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_rqc_bits ctx; +}; + +struct mlx5_ifc_create_rmp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 rmpn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_rmp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_rmpc_bits ctx; +}; + +struct mlx5_ifc_create_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 qpn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 opt_param_mask[0x20]; + + u8 reserved_3[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_4[0x80]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_create_psv_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 reserved_2[0x8]; + u8 psv0_index[0x18]; + + u8 reserved_3[0x8]; + u8 psv1_index[0x18]; + + u8 reserved_4[0x8]; + u8 psv2_index[0x18]; + + u8 reserved_5[0x8]; + u8 psv3_index[0x18]; +}; + +struct mlx5_ifc_create_psv_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 num_psv[0x4]; + u8 reserved_2[0x4]; + u8 pd[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_create_mkey_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 mkey_index[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_mkey_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x20]; + + u8 pg_access[0x1]; + u8 reserved_3[0x1f]; + + struct mlx5_ifc_mkc_bits memory_key_mkey_entry; + + u8 reserved_4[0x80]; + + u8 translations_octword_actual_size[0x20]; + + u8 reserved_5[0x560]; + + u8 klm_pas_mtt[0][0x20]; +}; + +struct mlx5_ifc_create_flow_table_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 table_id[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_flow_table_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0x8]; + u8 level[0x8]; + u8 reserved_6[0x8]; + u8 log_size[0x8]; + + u8 reserved_7[0x120]; +}; + +struct mlx5_ifc_create_flow_group_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 group_id[0x18]; + + u8 reserved_2[0x20]; +}; + +enum { + MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, + MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, + MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS = 0x2, +}; + +struct mlx5_ifc_create_flow_group_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x20]; + + u8 start_flow_index[0x20]; + + u8 reserved_6[0x20]; + + u8 end_flow_index[0x20]; + + u8 reserved_7[0xa0]; + + u8 reserved_8[0x18]; + u8 match_criteria_enable[0x8]; + + struct mlx5_ifc_fte_match_param_bits match_criteria; + + u8 reserved_9[0xe00]; +}; + +struct mlx5_ifc_create_eq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x18]; + u8 eq_number[0x8]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_eq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + struct mlx5_ifc_eqc_bits eq_context_entry; + + u8 reserved_3[0x40]; + + u8 event_bitmask[0x40]; + + u8 reserved_4[0x580]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_create_dct_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 dctn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_dct_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + struct mlx5_ifc_dctc_bits dct_context_entry; + + u8 reserved_3[0x180]; +}; + +struct mlx5_ifc_create_cq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 cqn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_cq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + struct mlx5_ifc_cqc_bits cq_context; + + u8 reserved_3[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_config_int_moderation_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x4]; + u8 min_delay[0xc]; + u8 int_vector[0x10]; + + u8 reserved_2[0x20]; +}; + +enum { + MLX5_CONFIG_INT_MODERATION_IN_OP_MOD_WRITE = 0x0, + MLX5_CONFIG_INT_MODERATION_IN_OP_MOD_READ = 0x1, +}; + +struct mlx5_ifc_config_int_moderation_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x4]; + u8 min_delay[0xc]; + u8 int_vector[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_attach_to_mcg_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_attach_to_mcg_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 multicast_gid[16][0x8]; +}; + +struct mlx5_ifc_arm_xrc_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +enum { + MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ = 0x1, +}; + +struct mlx5_ifc_arm_xrc_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 xrc_srqn[0x18]; + + u8 reserved_3[0x10]; + u8 lwm[0x10]; +}; + +struct mlx5_ifc_arm_rq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +enum { + MLX5_ARM_RQ_IN_OP_MOD_SRQ_ = 0x1, +}; + +struct mlx5_ifc_arm_rq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 srq_number[0x18]; + + u8 reserved_3[0x10]; + u8 lwm[0x10]; +}; + +struct mlx5_ifc_arm_dct_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_arm_dct_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 dct_number[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_alloc_xrcd_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 xrcd[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_alloc_xrcd_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_alloc_uar_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 uar[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_alloc_uar_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_alloc_transport_domain_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 transport_domain[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_alloc_transport_domain_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_alloc_q_counter_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x18]; + u8 counter_set_id[0x8]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_alloc_q_counter_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_alloc_pd_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 pd[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_alloc_pd_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_add_vxlan_udp_dport_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_add_vxlan_udp_dport_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x10]; + u8 vxlan_udp_port[0x10]; +}; + +struct mlx5_ifc_access_register_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 register_data[0][0x20]; +}; + +enum { + MLX5_ACCESS_REGISTER_IN_OP_MOD_WRITE = 0x0, + MLX5_ACCESS_REGISTER_IN_OP_MOD_READ = 0x1, +}; + +struct mlx5_ifc_access_register_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 register_id[0x10]; + + u8 argument[0x20]; + + u8 register_data[0][0x20]; +}; + +struct mlx5_ifc_sltp_reg_bits { + u8 status[0x4]; + u8 version[0x4]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 reserved_0[0x2]; + u8 lane[0x4]; + u8 reserved_1[0x8]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x7]; + u8 polarity[0x1]; + u8 ob_tap0[0x8]; + u8 ob_tap1[0x8]; + u8 ob_tap2[0x8]; + + u8 reserved_4[0xc]; + u8 ob_preemp_mode[0x4]; + u8 ob_reg[0x8]; + u8 ob_bias[0x8]; + + u8 reserved_5[0x20]; +}; + +struct mlx5_ifc_slrg_reg_bits { + u8 status[0x4]; + u8 version[0x4]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 reserved_0[0x2]; + u8 lane[0x4]; + u8 reserved_1[0x8]; + + u8 time_to_link_up[0x10]; + u8 reserved_2[0xc]; + u8 grade_lane_speed[0x4]; + + u8 grade_version[0x8]; + u8 grade[0x18]; + + u8 reserved_3[0x4]; + u8 height_grade_type[0x4]; + u8 height_grade[0x18]; + + u8 height_dz[0x10]; + u8 height_dv[0x10]; + + u8 reserved_4[0x10]; + u8 height_sigma[0x10]; + + u8 reserved_5[0x20]; + + u8 reserved_6[0x4]; + u8 phase_grade_type[0x4]; + u8 phase_grade[0x18]; + + u8 reserved_7[0x8]; + u8 phase_eo_pos[0x8]; + u8 reserved_8[0x8]; + u8 phase_eo_neg[0x8]; + + u8 ffe_set_tested[0x10]; + u8 test_errors_per_lane[0x10]; +}; + +struct mlx5_ifc_pvlc_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x1c]; + u8 vl_hw_cap[0x4]; + + u8 reserved_3[0x1c]; + u8 vl_admin[0x4]; + + u8 reserved_4[0x1c]; + u8 vl_operational[0x4]; +}; + +struct mlx5_ifc_pude_reg_bits { + u8 swid[0x8]; + u8 local_port[0x8]; + u8 reserved_0[0x4]; + u8 admin_status[0x4]; + u8 reserved_1[0x4]; + u8 oper_status[0x4]; + + u8 reserved_2[0x60]; +}; + +struct mlx5_ifc_ptys_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0xd]; + u8 proto_mask[0x3]; + + u8 reserved_2[0x40]; + + u8 eth_proto_capability[0x20]; + + u8 ib_link_width_capability[0x10]; + u8 ib_proto_capability[0x10]; + + u8 reserved_3[0x20]; + + u8 eth_proto_admin[0x20]; + + u8 ib_link_width_admin[0x10]; + u8 ib_proto_admin[0x10]; + + u8 reserved_4[0x20]; + + u8 eth_proto_oper[0x20]; + + u8 ib_link_width_oper[0x10]; + u8 ib_proto_oper[0x10]; + + u8 reserved_5[0x20]; + + u8 eth_proto_lp_advertise[0x20]; + + u8 reserved_6[0x60]; +}; + +struct mlx5_ifc_ptas_reg_bits { + u8 reserved_0[0x20]; + + u8 algorithm_options[0x10]; + u8 reserved_1[0x4]; + u8 repetitions_mode[0x4]; + u8 num_of_repetitions[0x8]; + + u8 grade_version[0x8]; + u8 height_grade_type[0x4]; + u8 phase_grade_type[0x4]; + u8 height_grade_weight[0x8]; + u8 phase_grade_weight[0x8]; + + u8 gisim_measure_bits[0x10]; + u8 adaptive_tap_measure_bits[0x10]; + + u8 ber_bath_high_error_threshold[0x10]; + u8 ber_bath_mid_error_threshold[0x10]; + + u8 ber_bath_low_error_threshold[0x10]; + u8 one_ratio_high_threshold[0x10]; + + u8 one_ratio_high_mid_threshold[0x10]; + u8 one_ratio_low_mid_threshold[0x10]; + + u8 one_ratio_low_threshold[0x10]; + u8 ndeo_error_threshold[0x10]; + + u8 mixer_offset_step_size[0x10]; + u8 reserved_2[0x8]; + u8 mix90_phase_for_voltage_bath[0x8]; + + u8 mixer_offset_start[0x10]; + u8 mixer_offset_end[0x10]; + + u8 reserved_3[0x15]; + u8 ber_test_time[0xb]; +}; + +struct mlx5_ifc_pspa_reg_bits { + u8 swid[0x8]; + u8 local_port[0x8]; + u8 sub_port[0x8]; + u8 reserved_0[0x8]; + + u8 reserved_1[0x20]; +}; + +struct mlx5_ifc_pqdr_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x5]; + u8 prio[0x3]; + u8 reserved_2[0x6]; + u8 mode[0x2]; + + u8 reserved_3[0x20]; + + u8 reserved_4[0x10]; + u8 min_threshold[0x10]; + + u8 reserved_5[0x10]; + u8 max_threshold[0x10]; + + u8 reserved_6[0x10]; + u8 mark_probability_denominator[0x10]; + + u8 reserved_7[0x60]; +}; + +struct mlx5_ifc_ppsc_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x60]; + + u8 reserved_3[0x1c]; + u8 wrps_admin[0x4]; + + u8 reserved_4[0x1c]; + u8 wrps_status[0x4]; + + u8 reserved_5[0x8]; + u8 up_threshold[0x8]; + u8 reserved_6[0x8]; + u8 down_threshold[0x8]; + + u8 reserved_7[0x20]; + + u8 reserved_8[0x1c]; + u8 srps_admin[0x4]; + + u8 reserved_9[0x1c]; + u8 srps_status[0x4]; + + u8 reserved_10[0x40]; +}; + +struct mlx5_ifc_pplr_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x8]; + u8 lb_cap[0x8]; + u8 reserved_3[0x8]; + u8 lb_en[0x8]; +}; + +struct mlx5_ifc_pplm_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x20]; + + u8 port_profile_mode[0x8]; + u8 static_port_profile[0x8]; + u8 active_port_profile[0x8]; + u8 reserved_3[0x8]; + + u8 retransmission_active[0x8]; + u8 fec_mode_active[0x18]; + + u8 reserved_4[0x20]; +}; + +struct mlx5_ifc_ppcnt_reg_bits { + u8 swid[0x8]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 reserved_0[0x8]; + u8 grp[0x6]; + + u8 clr[0x1]; + u8 reserved_1[0x1c]; + u8 prio_tc[0x3]; + + union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set; +}; + +struct mlx5_ifc_ppad_reg_bits { + u8 reserved_0[0x3]; + u8 single_mac[0x1]; + u8 reserved_1[0x4]; + u8 local_port[0x8]; + u8 mac_47_32[0x10]; + + u8 mac_31_0[0x20]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_pmtu_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 max_mtu[0x10]; + u8 reserved_2[0x10]; + + u8 admin_mtu[0x10]; + u8 reserved_3[0x10]; + + u8 oper_mtu[0x10]; + u8 reserved_4[0x10]; +}; + +struct mlx5_ifc_pmpr_reg_bits { + u8 reserved_0[0x8]; + u8 module[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x18]; + u8 attenuation_5g[0x8]; + + u8 reserved_3[0x18]; + u8 attenuation_7g[0x8]; + + u8 reserved_4[0x18]; + u8 attenuation_12g[0x8]; +}; + +struct mlx5_ifc_pmpe_reg_bits { + u8 reserved_0[0x8]; + u8 module[0x8]; + u8 reserved_1[0xc]; + u8 module_status[0x4]; + + u8 reserved_2[0x60]; +}; + +struct mlx5_ifc_pmpc_reg_bits { + u8 module_state_updated[32][0x8]; +}; + +struct mlx5_ifc_pmlpn_reg_bits { + u8 reserved_0[0x4]; + u8 mlpn_status[0x4]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 e[0x1]; + u8 reserved_2[0x1f]; +}; + +struct mlx5_ifc_pmlp_reg_bits { + u8 rxtx[0x1]; + u8 reserved_0[0x7]; + u8 local_port[0x8]; + u8 reserved_1[0x8]; + u8 width[0x8]; + + u8 lane0_module_mapping[0x20]; + + u8 lane1_module_mapping[0x20]; + + u8 lane2_module_mapping[0x20]; + + u8 lane3_module_mapping[0x20]; + + u8 reserved_2[0x160]; +}; + +struct mlx5_ifc_pmaos_reg_bits { + u8 reserved_0[0x8]; + u8 module[0x8]; + u8 reserved_1[0x4]; + u8 admin_status[0x4]; + u8 reserved_2[0x4]; + u8 oper_status[0x4]; + + u8 ase[0x1]; + u8 ee[0x1]; + u8 reserved_3[0x1c]; + u8 e[0x2]; + + u8 reserved_4[0x40]; +}; + +struct mlx5_ifc_plpc_reg_bits { + u8 reserved_0[0x4]; + u8 profile_id[0xc]; + u8 reserved_1[0x4]; + u8 proto_mask[0x4]; + u8 reserved_2[0x8]; + + u8 reserved_3[0x10]; + u8 lane_speed[0x10]; + + u8 reserved_4[0x17]; + u8 lpbf[0x1]; + u8 fec_mode_policy[0x8]; + + u8 retransmission_capability[0x8]; + u8 fec_mode_capability[0x18]; + + u8 retransmission_support_admin[0x8]; + u8 fec_mode_support_admin[0x18]; + + u8 retransmission_request_admin[0x8]; + u8 fec_mode_request_admin[0x18]; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_plib_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x8]; + u8 ib_port[0x8]; + + u8 reserved_2[0x60]; +}; + +struct mlx5_ifc_plbf_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0xd]; + u8 lbf_mode[0x3]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_pipg_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 dic[0x1]; + u8 reserved_2[0x19]; + u8 ipg[0x4]; + u8 reserved_3[0x2]; +}; + +struct mlx5_ifc_pifr_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0xe0]; + + u8 port_filter[8][0x20]; + + u8 port_filter_update_en[8][0x20]; +}; + +struct mlx5_ifc_pfcc_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 ppan[0x4]; + u8 reserved_2[0x4]; + u8 prio_mask_tx[0x8]; + u8 reserved_3[0x8]; + u8 prio_mask_rx[0x8]; + + u8 pptx[0x1]; + u8 aptx[0x1]; + u8 reserved_4[0x6]; + u8 pfctx[0x8]; + u8 reserved_5[0x10]; + + u8 pprx[0x1]; + u8 aprx[0x1]; + u8 reserved_6[0x6]; + u8 pfcrx[0x8]; + u8 reserved_7[0x10]; + + u8 reserved_8[0x80]; +}; + +struct mlx5_ifc_pelc_reg_bits { + u8 op[0x4]; + u8 reserved_0[0x4]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 op_admin[0x8]; + u8 op_capability[0x8]; + u8 op_request[0x8]; + u8 op_active[0x8]; + + u8 admin[0x40]; + + u8 capability[0x40]; + + u8 request[0x40]; + + u8 active[0x40]; + + u8 reserved_2[0x80]; +}; + +struct mlx5_ifc_peir_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0xc]; + u8 error_count[0x4]; + u8 reserved_3[0x10]; + + u8 reserved_4[0xc]; + u8 lane[0x4]; + u8 reserved_5[0x8]; + u8 error_type[0x8]; +}; + +struct mlx5_ifc_pcap_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 port_capability_mask[4][0x20]; +}; + +struct mlx5_ifc_paos_reg_bits { + u8 swid[0x8]; + u8 local_port[0x8]; + u8 reserved_0[0x4]; + u8 admin_status[0x4]; + u8 reserved_1[0x4]; + u8 oper_status[0x4]; + + u8 ase[0x1]; + u8 ee[0x1]; + u8 reserved_2[0x1c]; + u8 e[0x2]; + + u8 reserved_3[0x40]; +}; + +struct mlx5_ifc_pamp_reg_bits { + u8 reserved_0[0x8]; + u8 opamp_group[0x8]; + u8 reserved_1[0xc]; + u8 opamp_group_type[0x4]; + + u8 start_index[0x10]; + u8 reserved_2[0x4]; + u8 num_of_indices[0xc]; + + u8 index_data[18][0x10]; +}; + +struct mlx5_ifc_lane_2_module_mapping_bits { + u8 reserved_0[0x6]; + u8 rx_lane[0x2]; + u8 reserved_1[0x6]; + u8 tx_lane[0x2]; + u8 reserved_2[0x8]; + u8 module[0x8]; +}; + +struct mlx5_ifc_bufferx_reg_bits { + u8 reserved_0[0x6]; + u8 lossy[0x1]; + u8 epsb[0x1]; + u8 reserved_1[0xc]; + u8 size[0xc]; + + u8 xoff_threshold[0x10]; + u8 xon_threshold[0x10]; +}; + +struct mlx5_ifc_set_node_in_bits { + u8 node_description[64][0x8]; +}; + +struct mlx5_ifc_register_power_settings_bits { + u8 reserved_0[0x18]; + u8 power_settings_level[0x8]; + + u8 reserved_1[0x60]; +}; + +struct mlx5_ifc_register_host_endianness_bits { + u8 he[0x1]; + u8 reserved_0[0x1f]; + + u8 reserved_1[0x60]; +}; + +struct mlx5_ifc_umr_pointer_desc_argument_bits { + u8 reserved_0[0x20]; + + u8 mkey[0x20]; + + u8 addressh_63_32[0x20]; + + u8 addressl_31_0[0x20]; +}; + +struct mlx5_ifc_ud_adrs_vector_bits { + u8 dc_key[0x40]; + + u8 ext[0x1]; + u8 reserved_0[0x7]; + u8 destination_qp_dct[0x18]; + + u8 static_rate[0x4]; + u8 sl_eth_prio[0x4]; + u8 fl[0x1]; + u8 mlid[0x7]; + u8 rlid_udp_sport[0x10]; + + u8 reserved_1[0x20]; + + u8 rmac_47_16[0x20]; + + u8 rmac_15_0[0x10]; + u8 tclass[0x8]; + u8 hop_limit[0x8]; + + u8 reserved_2[0x1]; + u8 grh[0x1]; + u8 reserved_3[0x2]; + u8 src_addr_index[0x8]; + u8 flow_label[0x14]; + + u8 rgid_rip[16][0x8]; +}; + +struct mlx5_ifc_pages_req_event_bits { + u8 reserved_0[0x10]; + u8 function_id[0x10]; + + u8 num_pages[0x20]; + + u8 reserved_1[0xa0]; +}; + +struct mlx5_ifc_eqe_bits { + u8 reserved_0[0x8]; + u8 event_type[0x8]; + u8 reserved_1[0x8]; + u8 event_sub_type[0x8]; + + u8 reserved_2[0xe0]; + + union mlx5_ifc_event_auto_bits event_data; + + u8 reserved_3[0x10]; + u8 signature[0x8]; + u8 reserved_4[0x7]; + u8 owner[0x1]; +}; + +enum { + MLX5_CMD_QUEUE_ENTRY_TYPE_PCIE_CMD_IF_TRANSPORT = 0x7, +}; + +struct mlx5_ifc_cmd_queue_entry_bits { + u8 type[0x8]; + u8 reserved_0[0x18]; + + u8 input_length[0x20]; + + u8 input_mailbox_pointer_63_32[0x20]; + + u8 input_mailbox_pointer_31_9[0x17]; + u8 reserved_1[0x9]; + + u8 command_input_inline_data[16][0x8]; + + u8 command_output_inline_data[16][0x8]; + + u8 output_mailbox_pointer_63_32[0x20]; + + u8 output_mailbox_pointer_31_9[0x17]; + u8 reserved_2[0x9]; + + u8 output_length[0x20]; + + u8 token[0x8]; + u8 signature[0x8]; + u8 reserved_3[0x8]; + u8 status[0x7]; + u8 ownership[0x1]; +}; + +struct mlx5_ifc_cmd_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 command_output[0x20]; +}; + +struct mlx5_ifc_cmd_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 command[0][0x20]; +}; + +struct mlx5_ifc_cmd_if_box_bits { + u8 mailbox_data[512][0x8]; + + u8 reserved_0[0x180]; + + u8 next_pointer_63_32[0x20]; + + u8 next_pointer_31_10[0x16]; + u8 reserved_1[0xa]; + + u8 block_number[0x20]; + + u8 reserved_2[0x8]; + u8 token[0x8]; + u8 ctrl_signature[0x8]; + u8 signature[0x8]; +}; + +struct mlx5_ifc_mtt_bits { + u8 ptag_63_32[0x20]; + + u8 ptag_31_8[0x18]; + u8 reserved_0[0x6]; + u8 wr_en[0x1]; + u8 rd_en[0x1]; +}; + +enum { + MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER = 0x0, + MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED = 0x1, + MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC = 0x2, +}; + +enum { + MLX5_INITIAL_SEG_NIC_INTERFACE_SUPPORTED_FULL_DRIVER = 0x0, + MLX5_INITIAL_SEG_NIC_INTERFACE_SUPPORTED_DISABLED = 0x1, + MLX5_INITIAL_SEG_NIC_INTERFACE_SUPPORTED_NO_DRAM_NIC = 0x2, +}; + +enum { + MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_INTERNAL_ERR = 0x1, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_DEAD_IRISC = 0x7, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_HW_FATAL_ERR = 0x8, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_CRC_ERR = 0x9, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_FETCH_PCI_ERR = 0xa, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PAGE_ERR = 0xb, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_ASYNCHRONOUS_EQ_BUF_OVERRUN = 0xc, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_IN_ERR = 0xd, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_INV = 0xe, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR = 0xf, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR = 0x10, +}; + +struct mlx5_ifc_initial_seg_bits { + u8 fw_rev_minor[0x10]; + u8 fw_rev_major[0x10]; + + u8 cmd_interface_rev[0x10]; + u8 fw_rev_subminor[0x10]; + + u8 reserved_0[0x40]; + + u8 cmdq_phy_addr_63_32[0x20]; + + u8 cmdq_phy_addr_31_12[0x14]; + u8 reserved_1[0x2]; + u8 nic_interface[0x2]; + u8 log_cmdq_size[0x4]; + u8 log_cmdq_stride[0x4]; + + u8 command_doorbell_vector[0x20]; + + u8 reserved_2[0xf00]; + + u8 initializing[0x1]; + u8 reserved_3[0x4]; + u8 nic_interface_supported[0x3]; + u8 reserved_4[0x18]; + + struct mlx5_ifc_health_buffer_bits health_buffer; + + u8 no_dram_nic_offset[0x20]; + + u8 reserved_5[0x6e40]; + + u8 reserved_6[0x1f]; + u8 clear_int[0x1]; + + u8 health_syndrome[0x8]; + u8 health_counter[0x18]; + + u8 reserved_7[0x17fc0]; +}; + +union mlx5_ifc_ports_control_registers_document_bits { + struct mlx5_ifc_bufferx_reg_bits bufferx_reg; + struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; + struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits eth_2863_cntrs_grp_data_layout; + struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits eth_3635_cntrs_grp_data_layout; + struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits eth_802_3_cntrs_grp_data_layout; + struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout; + struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout; + struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout; + struct mlx5_ifc_lane_2_module_mapping_bits lane_2_module_mapping; + struct mlx5_ifc_pamp_reg_bits pamp_reg; + struct mlx5_ifc_paos_reg_bits paos_reg; + struct mlx5_ifc_pcap_reg_bits pcap_reg; + struct mlx5_ifc_peir_reg_bits peir_reg; + struct mlx5_ifc_pelc_reg_bits pelc_reg; + struct mlx5_ifc_pfcc_reg_bits pfcc_reg; + struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; + struct mlx5_ifc_pifr_reg_bits pifr_reg; + struct mlx5_ifc_pipg_reg_bits pipg_reg; + struct mlx5_ifc_plbf_reg_bits plbf_reg; + struct mlx5_ifc_plib_reg_bits plib_reg; + struct mlx5_ifc_plpc_reg_bits plpc_reg; + struct mlx5_ifc_pmaos_reg_bits pmaos_reg; + struct mlx5_ifc_pmlp_reg_bits pmlp_reg; + struct mlx5_ifc_pmlpn_reg_bits pmlpn_reg; + struct mlx5_ifc_pmpc_reg_bits pmpc_reg; + struct mlx5_ifc_pmpe_reg_bits pmpe_reg; + struct mlx5_ifc_pmpr_reg_bits pmpr_reg; + struct mlx5_ifc_pmtu_reg_bits pmtu_reg; + struct mlx5_ifc_ppad_reg_bits ppad_reg; + struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg; + struct mlx5_ifc_pplm_reg_bits pplm_reg; + struct mlx5_ifc_pplr_reg_bits pplr_reg; + struct mlx5_ifc_ppsc_reg_bits ppsc_reg; + struct mlx5_ifc_pqdr_reg_bits pqdr_reg; + struct mlx5_ifc_pspa_reg_bits pspa_reg; + struct mlx5_ifc_ptas_reg_bits ptas_reg; + struct mlx5_ifc_ptys_reg_bits ptys_reg; + struct mlx5_ifc_pude_reg_bits pude_reg; + struct mlx5_ifc_pvlc_reg_bits pvlc_reg; + struct mlx5_ifc_slrg_reg_bits slrg_reg; + struct mlx5_ifc_sltp_reg_bits sltp_reg; + u8 reserved_0[0x60e0]; +}; + +union mlx5_ifc_debug_enhancements_document_bits { + struct mlx5_ifc_health_buffer_bits health_buffer; + u8 reserved_0[0x200]; +}; + +union mlx5_ifc_uplink_pci_interface_document_bits { + struct mlx5_ifc_initial_seg_bits initial_seg; + u8 reserved_0[0x20060]; }; #endif /* MLX5_IFC_H */ diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 310b5f7fd6ae..f079fb1a31f7 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -134,13 +134,21 @@ enum { enum { MLX5_WQE_CTRL_CQ_UPDATE = 2 << 2, + MLX5_WQE_CTRL_CQ_UPDATE_AND_EQE = 3 << 2, MLX5_WQE_CTRL_SOLICITED = 1 << 1, }; enum { + MLX5_SEND_WQE_DS = 16, MLX5_SEND_WQE_BB = 64, }; +#define MLX5_SEND_WQEBB_NUM_DS (MLX5_SEND_WQE_BB / MLX5_SEND_WQE_DS) + +enum { + MLX5_SEND_WQE_MAX_WQEBBS = 16, +}; + enum { MLX5_WQE_FMR_PERM_LOCAL_READ = 1 << 27, MLX5_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28, @@ -200,6 +208,23 @@ struct mlx5_wqe_ctrl_seg { #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00 #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8 +enum { + MLX5_ETH_WQE_L3_INNER_CSUM = 1 << 4, + MLX5_ETH_WQE_L4_INNER_CSUM = 1 << 5, + MLX5_ETH_WQE_L3_CSUM = 1 << 6, + MLX5_ETH_WQE_L4_CSUM = 1 << 7, +}; + +struct mlx5_wqe_eth_seg { + u8 rsvd0[4]; + u8 cs_flags; + u8 rsvd1; + __be16 mss; + __be32 rsvd2; + __be16 inline_hdr_sz; + u8 inline_hdr_start[2]; +}; + struct mlx5_wqe_xrc_seg { __be32 xrc_srqn; u8 rsvd[12]; -- cgit v1.2.3 From 938fe83c8dcbbf294d167e6163200a8540ae43c4 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 28 May 2015 22:28:41 +0300 Subject: net/mlx5_core: New device capabilities handling - Query all supported types of dev caps on driver load. - Store the Cap data outbox per cap type into driver private data. - Introduce new Macros to access/dump stored caps (using the auto generated data types). - Obsolete SW representation of dev caps (no need for SW copy for each cap). - Modify IB driver to use new macros for checking caps. Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/cq.c | 8 +- drivers/infiniband/hw/mlx5/mad.c | 2 +- drivers/infiniband/hw/mlx5/main.c | 113 ++++++++------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 +- drivers/infiniband/hw/mlx5/mr.c | 3 +- drivers/infiniband/hw/mlx5/odp.c | 47 +++---- drivers/infiniband/hw/mlx5/qp.c | 84 +++++------ drivers/infiniband/hw/mlx5/srq.c | 7 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 4 +- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 90 +++++++----- drivers/net/ethernet/mellanox/mlx5/core/main.c | 154 +++++++-------------- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 10 +- drivers/net/ethernet/mellanox/mlx5/core/uar.c | 7 +- include/linux/mlx5/device.h | 66 ++++++++- include/linux/mlx5/driver.h | 58 +------- 15 files changed, 310 insertions(+), 349 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 4e88b18cf62e..e2bea9ab93b3 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -753,7 +753,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, return ERR_PTR(-EINVAL); entries = roundup_pow_of_two(entries + 1); - if (entries > dev->mdev->caps.gen.max_cqes) + if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) return ERR_PTR(-EINVAL); cq = kzalloc(sizeof(*cq), GFP_KERNEL); @@ -920,7 +920,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) int err; u32 fsel; - if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_CQ_MODER)) + if (!MLX5_CAP_GEN(dev->mdev, cq_moderation)) return -ENOSYS; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -1075,7 +1075,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) int uninitialized_var(cqe_size); unsigned long flags; - if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) { + if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) { pr_info("Firmware does not support resize CQ\n"); return -ENOSYS; } @@ -1084,7 +1084,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) return -EINVAL; entries = roundup_pow_of_two(entries + 1); - if (entries > dev->mdev->caps.gen.max_cqes + 1) + if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1) return -EINVAL; if (entries == ibcq->cqe + 1) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 9cf9a37bb5ff..f2d9e70818d7 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -129,7 +129,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) packet_error = be16_to_cpu(out_mad->status); - dev->mdev->caps.gen.ext_port_cap[port - 1] = (!err && !packet_error) ? + dev->mdev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ? MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0; out: diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 57c9809e8b87..9075649f30fc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -66,15 +66,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; - struct mlx5_general_caps *gen; int err = -ENOMEM; int max_rq_sg; int max_sq_sg; - u64 flags; - gen = &dev->mdev->caps.gen; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); if (!in_mad || !out_mad) @@ -96,18 +94,18 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN; - flags = gen->flags; - if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR) + + if (MLX5_CAP_GEN(mdev, pkv)) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; - if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR) + if (MLX5_CAP_GEN(mdev, qkv)) props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; - if (flags & MLX5_DEV_CAP_FLAG_APM) + if (MLX5_CAP_GEN(mdev, apm)) props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; - if (flags & MLX5_DEV_CAP_FLAG_XRC) + if (MLX5_CAP_GEN(mdev, xrc)) props->device_cap_flags |= IB_DEVICE_XRC; props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; - if (flags & MLX5_DEV_CAP_FLAG_SIG_HAND_OVER) { + if (MLX5_CAP_GEN(mdev, sho)) { props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; /* At this stage no support for signature handover */ props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | @@ -116,7 +114,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->sig_guard_cap = IB_GUARD_T10DIF_CRC | IB_GUARD_T10DIF_CSUM; } - if (flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST) + if (MLX5_CAP_GEN(mdev, block_lb_mc)) props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) & @@ -126,37 +124,38 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, memcpy(&props->sys_image_guid, out_mad->data + 4, 8); props->max_mr_size = ~0ull; - props->page_size_cap = gen->min_page_sz; - props->max_qp = 1 << gen->log_max_qp; - props->max_qp_wr = gen->max_wqes; - max_rq_sg = gen->max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg); - max_sq_sg = (gen->max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) / - sizeof(struct mlx5_wqe_data_seg); + props->page_size_cap = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); + props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp); + props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); + max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / + sizeof(struct mlx5_wqe_data_seg); + max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) - + sizeof(struct mlx5_wqe_ctrl_seg)) / + sizeof(struct mlx5_wqe_data_seg); props->max_sge = min(max_rq_sg, max_sq_sg); - props->max_cq = 1 << gen->log_max_cq; - props->max_cqe = gen->max_cqes - 1; - props->max_mr = 1 << gen->log_max_mkey; - props->max_pd = 1 << gen->log_max_pd; - props->max_qp_rd_atom = 1 << gen->log_max_ra_req_qp; - props->max_qp_init_rd_atom = 1 << gen->log_max_ra_res_qp; - props->max_srq = 1 << gen->log_max_srq; - props->max_srq_wr = gen->max_srq_wqes - 1; - props->local_ca_ack_delay = gen->local_ca_ack_delay; + props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); + props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1; + props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); + props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd); + props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp); + props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp); + props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq); + props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1; + props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq_sge = max_rq_sg - 1; props->max_fast_reg_page_list_len = (unsigned int)-1; - props->local_ca_ack_delay = gen->local_ca_ack_delay; props->atomic_cap = IB_ATOMIC_NONE; props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28)); - props->max_mcast_grp = 1 << gen->log_max_mcg; - props->max_mcast_qp_attach = gen->max_qp_mcg; + props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); + props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG) + if (MLX5_CAP_GEN(mdev, pg)) props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; props->odp_caps = dev->odp_caps; #endif @@ -172,14 +171,13 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; - struct mlx5_general_caps *gen; int ext_active_speed; int err = -ENOMEM; - gen = &dev->mdev->caps.gen; - if (port < 1 || port > gen->num_ports) { + if (port < 1 || port > MLX5_CAP_GEN(mdev, num_ports)) { mlx5_ib_warn(dev, "invalid port number %d\n", port); return -EINVAL; } @@ -210,8 +208,8 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, props->phys_state = out_mad->data[33] >> 4; props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20)); props->gid_tbl_len = out_mad->data[50]; - props->max_msg_sz = 1 << gen->log_max_msg; - props->pkey_tbl_len = gen->port[port - 1].pkey_table_len; + props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); + props->pkey_tbl_len = mdev->port_caps[port - 1].pkey_table_len; props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; @@ -238,7 +236,7 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, /* If reported active speed is QDR, check if is FDR-10 */ if (props->active_speed == 4) { - if (gen->ext_port_cap[port - 1] & + if (mdev->port_caps[port - 1].ext_port_cap & MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { init_query_mad(in_mad); in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; @@ -392,7 +390,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct mlx5_ib_alloc_ucontext_req_v2 req; struct mlx5_ib_alloc_ucontext_resp resp; struct mlx5_ib_ucontext *context; - struct mlx5_general_caps *gen; struct mlx5_uuar_info *uuari; struct mlx5_uar *uars; int gross_uuars; @@ -403,7 +400,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, int i; size_t reqlen; - gen = &dev->mdev->caps.gen; if (!dev->ib_active) return ERR_PTR(-EAGAIN); @@ -436,14 +432,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; - resp.qp_tab_size = 1 << gen->log_max_qp; - resp.bf_reg_size = gen->bf_reg_size; - resp.cache_line_size = L1_CACHE_BYTES; - resp.max_sq_desc_sz = gen->max_sq_desc_sz; - resp.max_rq_desc_sz = gen->max_rq_desc_sz; - resp.max_send_wqebb = gen->max_wqes; - resp.max_recv_wr = gen->max_wqes; - resp.max_srq_recv_wr = gen->max_srq_wqes; + resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); + resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); + resp.cache_line_size = L1_CACHE_BYTES; + resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); + resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); + resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) @@ -493,7 +489,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, mutex_init(&context->db_page_mutex); resp.tot_uuars = req.total_num_uuars; - resp.num_ports = gen->num_ports; + resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports); err = ib_copy_to_udata(udata, &resp, sizeof(resp) - sizeof(resp.reserved)); if (err) @@ -895,11 +891,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, static void get_ext_port_caps(struct mlx5_ib_dev *dev) { - struct mlx5_general_caps *gen; int port; - gen = &dev->mdev->caps.gen; - for (port = 1; port <= gen->num_ports; port++) + for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) mlx5_query_ext_port_caps(dev, port); } @@ -907,11 +901,9 @@ static int get_port_caps(struct mlx5_ib_dev *dev) { struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; - struct mlx5_general_caps *gen; int err = -ENOMEM; int port; - gen = &dev->mdev->caps.gen; pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); if (!pprops) goto out; @@ -926,14 +918,17 @@ static int get_port_caps(struct mlx5_ib_dev *dev) goto out; } - for (port = 1; port <= gen->num_ports; port++) { + for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) { err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); if (err) { - mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err); + mlx5_ib_warn(dev, "query_port %d failed %d\n", + port, err); break; } - gen->port[port - 1].pkey_table_len = dprops->max_pkeys; - gen->port[port - 1].gid_table_len = pprops->gid_tbl_len; + dev->mdev->port_caps[port - 1].pkey_table_len = + dprops->max_pkeys; + dev->mdev->port_caps[port - 1].gid_table_len = + pprops->gid_tbl_len; mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", dprops->max_pkeys, pprops->gid_tbl_len); } @@ -1207,8 +1202,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.node_type = RDMA_NODE_IB_CA; - dev->ib_dev.local_dma_lkey = mdev->caps.gen.reserved_lkey; - dev->num_ports = mdev->caps.gen.num_ports; + dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; + dev->num_ports = MLX5_CAP_GEN(mdev, num_ports); dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = dev->mdev->priv.eq_table.num_comp_vectors; @@ -1286,9 +1281,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; - mlx5_ib_internal_query_odp_caps(dev); + mlx5_ib_internal_fill_odp_caps(dev); - if (mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_XRC) { + if (MLX5_CAP_GEN(mdev, xrc)) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; dev->ib_dev.uverbs_cmd_mask |= diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index dff1cfcdf476..0c441add0464 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -617,7 +617,7 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING extern struct workqueue_struct *mlx5_ib_page_fault_wq; -int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev); +void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev); void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault); void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp); @@ -631,9 +631,9 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, unsigned long end); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ -static inline int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev) +static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { - return 0; + return; } static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {} diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 71c593583864..bc9a0de897cb 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -975,8 +975,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, struct mlx5_ib_mr *mr; int inlen; int err; - bool pg_cap = !!(dev->mdev->caps.gen.flags & - MLX5_DEV_CAP_FLAG_ON_DMND_PG); + bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 5099db08afd2..aa8391e75385 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -109,40 +109,33 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, ib_umem_odp_unmap_dma_pages(umem, start, end); } -#define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do { \ - if (be32_to_cpu(reg.field_name) & MLX5_ODP_SUPPORT_##bit_name) \ - ib_caps->field_name |= IB_ODP_SUPPORT_##bit_name; \ -} while (0) - -int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev) +void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { - int err; - struct mlx5_odp_caps hw_caps; struct ib_odp_caps *caps = &dev->odp_caps; memset(caps, 0, sizeof(*caps)); - if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG)) - return 0; - - err = mlx5_query_odp_caps(dev->mdev, &hw_caps); - if (err) - goto out; + if (!MLX5_CAP_GEN(dev->mdev, pg)) + return; caps->general_caps = IB_ODP_SUPPORT; - COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.ud_odp_caps, - SEND); - COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps, - SEND); - COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps, - RECV); - COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps, - WRITE); - COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps, - READ); - -out: - return err; + + if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.send)) + caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND; + + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.send)) + caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND; + + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.receive)) + caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_RECV; + + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.write)) + caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_WRITE; + + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.read)) + caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ; + + return; } static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev, diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 426eb88dfa49..15fd485d1ad9 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -220,13 +220,11 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd) { - struct mlx5_general_caps *gen; int wqe_size; int wq_size; - gen = &dev->mdev->caps.gen; /* Sanity check RQ size before proceeding */ - if (cap->max_recv_wr > gen->max_wqes) + if (cap->max_recv_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) return -EINVAL; if (!has_rq) { @@ -246,10 +244,11 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size; wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB); qp->rq.wqe_cnt = wq_size / wqe_size; - if (wqe_size > gen->max_rq_desc_sz) { + if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq)) { mlx5_ib_dbg(dev, "wqe_size %d, max %d\n", wqe_size, - gen->max_rq_desc_sz); + MLX5_CAP_GEN(dev->mdev, + max_wqe_sz_rq)); return -EINVAL; } qp->rq.wqe_shift = ilog2(wqe_size); @@ -330,11 +329,9 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, struct mlx5_ib_qp *qp) { - struct mlx5_general_caps *gen; int wqe_size; int wq_size; - gen = &dev->mdev->caps.gen; if (!attr->cap.max_send_wr) return 0; @@ -343,9 +340,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, if (wqe_size < 0) return wqe_size; - if (wqe_size > gen->max_sq_desc_sz) { + if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) { mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n", - wqe_size, gen->max_sq_desc_sz); + wqe_size, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)); return -EINVAL; } @@ -358,9 +355,10 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; - if (qp->sq.wqe_cnt > gen->max_wqes) { + if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) { mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n", - qp->sq.wqe_cnt, gen->max_wqes); + qp->sq.wqe_cnt, + 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)); return -ENOMEM; } qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); @@ -375,13 +373,11 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd) { - struct mlx5_general_caps *gen; int desc_sz = 1 << qp->sq.wqe_shift; - gen = &dev->mdev->caps.gen; - if (desc_sz > gen->max_sq_desc_sz) { + if (desc_sz > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) { mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n", - desc_sz, gen->max_sq_desc_sz); + desc_sz, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)); return -EINVAL; } @@ -393,9 +389,10 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, qp->sq.wqe_cnt = ucmd->sq_wqe_count; - if (qp->sq.wqe_cnt > gen->max_wqes) { + if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) { mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n", - qp->sq.wqe_cnt, gen->max_wqes); + qp->sq.wqe_cnt, + 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)); return -EINVAL; } @@ -866,22 +863,21 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_udata *udata, struct mlx5_ib_qp *qp) { struct mlx5_ib_resources *devr = &dev->devr; + struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_create_qp_resp resp; struct mlx5_create_qp_mbox_in *in; - struct mlx5_general_caps *gen; struct mlx5_ib_create_qp ucmd; int inlen = sizeof(*in); int err; mlx5_ib_odp_create_qp(qp); - gen = &dev->mdev->caps.gen; mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { - if (!(gen->flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) { + if (!MLX5_CAP_GEN(mdev, block_lb_mc)) { mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); return -EINVAL; } else { @@ -914,15 +910,17 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (pd) { if (pd->uobject) { + __u32 max_wqes = + 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count); if (ucmd.rq_wqe_shift != qp->rq.wqe_shift || ucmd.rq_wqe_count != qp->rq.wqe_cnt) { mlx5_ib_dbg(dev, "invalid rq params\n"); return -EINVAL; } - if (ucmd.sq_wqe_count > gen->max_wqes) { + if (ucmd.sq_wqe_count > max_wqes) { mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n", - ucmd.sq_wqe_count, gen->max_wqes); + ucmd.sq_wqe_count, max_wqes); return -EINVAL; } err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen); @@ -1226,7 +1224,6 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { - struct mlx5_general_caps *gen; struct mlx5_ib_dev *dev; struct mlx5_ib_qp *qp; u16 xrcdn = 0; @@ -1244,12 +1241,11 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, } dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); } - gen = &dev->mdev->caps.gen; switch (init_attr->qp_type) { case IB_QPT_XRC_TGT: case IB_QPT_XRC_INI: - if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC)) { + if (!MLX5_CAP_GEN(dev->mdev, xrc)) { mlx5_ib_dbg(dev, "XRC not supported\n"); return ERR_PTR(-ENOSYS); } @@ -1356,9 +1352,6 @@ enum { static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) { - struct mlx5_general_caps *gen; - - gen = &dev->mdev->caps.gen; if (rate == IB_RATE_PORT_CURRENT) { return 0; } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) { @@ -1366,7 +1359,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) } else { while (rate != IB_RATE_2_5_GBPS && !(1 << (rate + MLX5_STAT_RATE_OFFSET) & - gen->stat_rate_support)) + MLX5_CAP_GEN(dev->mdev, stat_rate_support))) --rate; } @@ -1377,10 +1370,8 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, u32 path_flags, const struct ib_qp_attr *attr) { - struct mlx5_general_caps *gen; int err; - gen = &dev->mdev->caps.gen; path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0; @@ -1391,9 +1382,11 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, path->rlid = cpu_to_be16(ah->dlid); if (ah->ah_flags & IB_AH_GRH) { - if (ah->grh.sgid_index >= gen->port[port - 1].gid_table_len) { + if (ah->grh.sgid_index >= + dev->mdev->port_caps[port - 1].gid_table_len) { pr_err("sgid_index (%u) too large. max is %d\n", - ah->grh.sgid_index, gen->port[port - 1].gid_table_len); + ah->grh.sgid_index, + dev->mdev->port_caps[port - 1].gid_table_len); return -EINVAL; } path->grh_mlid |= 1 << 7; @@ -1570,7 +1563,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_qp_context *context; - struct mlx5_general_caps *gen; struct mlx5_modify_qp_mbox_in *in; struct mlx5_ib_pd *pd; enum mlx5_qp_state mlx5_cur, mlx5_new; @@ -1579,7 +1571,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, int mlx5_st; int err; - gen = &dev->mdev->caps.gen; in = kzalloc(sizeof(*in), GFP_KERNEL); if (!in) return -ENOMEM; @@ -1619,7 +1610,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, err = -EINVAL; goto out; } - context->mtu_msgmax = (attr->path_mtu << 5) | gen->log_max_msg; + context->mtu_msgmax = (attr->path_mtu << 5) | + (u8)MLX5_CAP_GEN(dev->mdev, log_max_msg); } if (attr_mask & IB_QP_DEST_QPN) @@ -1777,11 +1769,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; - struct mlx5_general_caps *gen; int err = -EINVAL; int port; - gen = &dev->mdev->caps.gen; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; @@ -1793,21 +1783,25 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; if ((attr_mask & IB_QP_PORT) && - (attr->port_num == 0 || attr->port_num > gen->num_ports)) + (attr->port_num == 0 || + attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) goto out; if (attr_mask & IB_QP_PKEY_INDEX) { port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - if (attr->pkey_index >= gen->port[port - 1].pkey_table_len) + if (attr->pkey_index >= + dev->mdev->port_caps[port - 1].pkey_table_len) goto out; } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && - attr->max_rd_atomic > (1 << gen->log_max_ra_res_qp)) + attr->max_rd_atomic > + (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) goto out; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && - attr->max_dest_rd_atomic > (1 << gen->log_max_ra_req_qp)) + attr->max_dest_rd_atomic > + (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) goto out; if (cur_state == new_state && cur_state == IB_QPS_RESET) { @@ -3009,7 +3003,7 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at ib_ah_attr->port_num = path->port; if (ib_ah_attr->port_num == 0 || - ib_ah_attr->port_num > dev->caps.gen.num_ports) + ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports)) return; ib_ah_attr->sl = path->sl & 0xf; @@ -3135,12 +3129,10 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_general_caps *gen; struct mlx5_ib_xrcd *xrcd; int err; - gen = &dev->mdev->caps.gen; - if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC)) + if (!MLX5_CAP_GEN(dev->mdev, xrc)) return ERR_PTR(-ENOSYS); xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 4242e1ded868..e8e8e942fa4a 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -236,7 +236,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_general_caps *gen; struct mlx5_ib_srq *srq; int desc_size; int buf_size; @@ -245,13 +244,13 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, int uninitialized_var(inlen); int is_xrc; u32 flgs, xrcdn; + __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); - gen = &dev->mdev->caps.gen; /* Sanity check SRQ size before proceeding */ - if (init_attr->attr.max_wr >= gen->max_srq_wqes) { + if (init_attr->attr.max_wr >= max_srq_wqes) { mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", init_attr->attr.max_wr, - gen->max_srq_wqes); + max_srq_wqes); return ERR_PTR(-EINVAL); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 516efc25fc4f..a40b96d4c662 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -455,7 +455,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) u32 async_event_mask = MLX5_ASYNC_EVENT_MASK; int err; - if (dev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG) + if (MLX5_CAP_GEN(dev, pg)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT); err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, @@ -478,7 +478,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) err = mlx5_create_map_eq(dev, &table->pages_eq, MLX5_EQ_VEC_PAGES, - dev->caps.gen.max_vf + 1, + /* TODO: sriov max_vf + */ 1, 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", &dev->priv.uuari.uars[0]); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index ef9b7695decd..801ccadd709a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -64,50 +64,74 @@ out_out: return err; } -int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, struct mlx5_caps *caps) +int mlx5_query_hca_caps(struct mlx5_core_dev *dev) { - return mlx5_core_get_caps(dev, caps, HCA_CAP_OPMOD_GET_CUR); -} - -int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *caps) -{ - u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)]; - int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); - void *out; int err; - if (!(dev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG)) - return -ENOTSUPP; + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; - memset(in, 0, sizeof(in)); - out = kzalloc(out_sz, GFP_KERNEL); - if (!out) - return -ENOMEM; - MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); - MLX5_SET(query_hca_cap_in, in, op_mod, HCA_CAP_OPMOD_GET_ODP_CUR); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX); if (err) - goto out; + return err; - err = mlx5_cmd_status_to_err_v2(out); - if (err) { - mlx5_core_warn(dev, "query cur hca ODP caps failed, %d\n", err); - goto out; + if (MLX5_CAP_GEN(dev, eth_net_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; } - memcpy(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability), - sizeof(*caps)); + if (MLX5_CAP_GEN(dev, pg)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } - mlx5_core_dbg(dev, "on-demand paging capabilities:\nrc: %08x\nuc: %08x\nud: %08x\n", - be32_to_cpu(caps->per_transport_caps.rc_odp_caps), - be32_to_cpu(caps->per_transport_caps.uc_odp_caps), - be32_to_cpu(caps->per_transport_caps.ud_odp_caps)); + if (MLX5_CAP_GEN(dev, atomic)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } -out: - kfree(out); - return err; + if (MLX5_CAP_GEN(dev, roce)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, nic_flow_table)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + return 0; } -EXPORT_SYMBOL(mlx5_query_odp_caps); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a652cb93ceaa..e7b7b123a128 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -211,11 +211,12 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_eq_table *table = &priv->eq_table; - int num_eqs = 1 << dev->caps.gen.log_max_eq; + int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq); int nvec; int i; - nvec = dev->caps.gen.num_ports * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE; + nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + + MLX5_EQ_VEC_COMP_BASE; nvec = min_t(int, nvec, num_eqs); if (nvec <= MLX5_EQ_VEC_COMP_BASE) return -ENOMEM; @@ -287,97 +288,28 @@ static u16 to_fw_pkey_sz(u32 size) } } -/* selectively copy writable fields clearing any reserved area - */ -static void copy_rw_fields(void *to, struct mlx5_caps *from) -{ - __be64 *flags_off = (__be64 *)MLX5_ADDR_OF(cmd_hca_cap, to, reserved_22); - u64 v64; - - MLX5_SET(cmd_hca_cap, to, log_max_qp, from->gen.log_max_qp); - MLX5_SET(cmd_hca_cap, to, log_max_ra_req_qp, from->gen.log_max_ra_req_qp); - MLX5_SET(cmd_hca_cap, to, log_max_ra_res_qp, from->gen.log_max_ra_res_qp); - MLX5_SET(cmd_hca_cap, to, pkey_table_size, from->gen.pkey_table_size); - MLX5_SET(cmd_hca_cap, to, pkey_table_size, to_fw_pkey_sz(from->gen.pkey_table_size)); - MLX5_SET(cmd_hca_cap, to, log_uar_page_sz, PAGE_SHIFT - 12); - v64 = from->gen.flags & MLX5_CAP_BITS_RW_MASK; - *flags_off = cpu_to_be64(v64); -} - -static u16 get_pkey_table_size(int pkey) +static u16 to_sw_pkey_sz(int pkey_sz) { - if (pkey > MLX5_MAX_LOG_PKEY_TABLE) + if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) return 0; - return MLX5_MIN_PKEY_TABLE_SIZE << pkey; -} - -static void fw2drv_caps(struct mlx5_caps *caps, void *out) -{ - struct mlx5_general_caps *gen = &caps->gen; - - gen->max_srq_wqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_srq_sz); - gen->max_wqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_qp_sz); - gen->log_max_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_qp); - gen->log_max_srq = MLX5_GET_PR(cmd_hca_cap, out, log_max_srq); - gen->max_cqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_cq_sz); - gen->log_max_cq = MLX5_GET_PR(cmd_hca_cap, out, log_max_cq); - gen->max_eqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_eq_sz); - gen->log_max_mkey = MLX5_GET_PR(cmd_hca_cap, out, log_max_mkey); - gen->log_max_eq = MLX5_GET_PR(cmd_hca_cap, out, log_max_eq); - gen->max_indirection = MLX5_GET_PR(cmd_hca_cap, out, max_indirection); - gen->log_max_mrw_sz = MLX5_GET_PR(cmd_hca_cap, out, log_max_mrw_sz); - gen->log_max_bsf_list_size = MLX5_GET_PR(cmd_hca_cap, out, log_max_bsf_list_size); - gen->log_max_klm_list_size = MLX5_GET_PR(cmd_hca_cap, out, log_max_klm_list_size); - gen->log_max_ra_req_dc = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_req_dc); - gen->log_max_ra_res_dc = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_res_dc); - gen->log_max_ra_req_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_req_qp); - gen->log_max_ra_res_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_res_qp); - gen->max_qp_counters = MLX5_GET_PR(cmd_hca_cap, out, max_qp_cnt); - gen->pkey_table_size = get_pkey_table_size(MLX5_GET_PR(cmd_hca_cap, out, pkey_table_size)); - gen->local_ca_ack_delay = MLX5_GET_PR(cmd_hca_cap, out, local_ca_ack_delay); - gen->num_ports = MLX5_GET_PR(cmd_hca_cap, out, num_ports); - gen->log_max_msg = MLX5_GET_PR(cmd_hca_cap, out, log_max_msg); - gen->stat_rate_support = MLX5_GET_PR(cmd_hca_cap, out, stat_rate_support); - gen->flags = be64_to_cpu(*(__be64 *)MLX5_ADDR_OF(cmd_hca_cap, out, reserved_22)); - pr_debug("flags = 0x%llx\n", gen->flags); - gen->uar_sz = MLX5_GET_PR(cmd_hca_cap, out, uar_sz); - gen->min_log_pg_sz = MLX5_GET_PR(cmd_hca_cap, out, log_pg_sz); - gen->bf_reg_size = MLX5_GET_PR(cmd_hca_cap, out, bf); - gen->bf_reg_size = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_bf_reg_size); - gen->max_sq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_sq); - gen->max_rq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_rq); - gen->max_dc_sq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_sq_dc); - gen->max_qp_mcg = MLX5_GET_PR(cmd_hca_cap, out, max_qp_mcg); - gen->log_max_pd = MLX5_GET_PR(cmd_hca_cap, out, log_max_pd); - gen->log_max_xrcd = MLX5_GET_PR(cmd_hca_cap, out, log_max_xrcd); - gen->log_uar_page_sz = MLX5_GET_PR(cmd_hca_cap, out, log_uar_page_sz); -} - -static const char *caps_opmod_str(u16 opmod) -{ - switch (opmod) { - case HCA_CAP_OPMOD_GET_MAX: - return "GET_MAX"; - case HCA_CAP_OPMOD_GET_CUR: - return "GET_CUR"; - default: - return "Invalid"; - } + return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; } -int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps, - u16 opmod) +int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, + enum mlx5_cap_mode cap_mode) { u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)]; int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); - void *out; + void *out, *hca_caps; + u16 opmod = (cap_type << 1) | (cap_mode & 0x01); int err; memset(in, 0, sizeof(in)); out = kzalloc(out_sz, GFP_KERNEL); if (!out) return -ENOMEM; + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); MLX5_SET(query_hca_cap_in, in, op_mod, opmod); err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); @@ -386,12 +318,30 @@ int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps, err = mlx5_cmd_status_to_err_v2(out); if (err) { - mlx5_core_warn(dev, "query max hca cap failed, %d\n", err); + mlx5_core_warn(dev, + "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n", + cap_type, cap_mode, err); goto query_ex; } - mlx5_core_dbg(dev, "%s\n", caps_opmod_str(opmod)); - fw2drv_caps(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability)); + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); + + switch (cap_mode) { + case HCA_CAP_OPMOD_GET_MAX: + memcpy(dev->hca_caps_max[cap_type], hca_caps, + MLX5_UN_SZ_BYTES(hca_cap_union)); + break; + case HCA_CAP_OPMOD_GET_CUR: + memcpy(dev->hca_caps_cur[cap_type], hca_caps, + MLX5_UN_SZ_BYTES(hca_cap_union)); + break; + default: + mlx5_core_warn(dev, + "Tried to query dev cap type(%x) with wrong opmode(%x)\n", + cap_type, cap_mode); + err = -EINVAL; + break; + } query_ex: kfree(out); return err; @@ -418,49 +368,45 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) { void *set_ctx = NULL; struct mlx5_profile *prof = dev->profile; - struct mlx5_caps *cur_caps = NULL; - struct mlx5_caps *max_caps = NULL; int err = -ENOMEM; int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *set_hca_cap; set_ctx = kzalloc(set_sz, GFP_KERNEL); if (!set_ctx) goto query_ex; - max_caps = kzalloc(sizeof(*max_caps), GFP_KERNEL); - if (!max_caps) - goto query_ex; - - cur_caps = kzalloc(sizeof(*cur_caps), GFP_KERNEL); - if (!cur_caps) - goto query_ex; - - err = mlx5_core_get_caps(dev, max_caps, HCA_CAP_OPMOD_GET_MAX); + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX); if (err) goto query_ex; - err = mlx5_core_get_caps(dev, cur_caps, HCA_CAP_OPMOD_GET_CUR); + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR); if (err) goto query_ex; + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, + capability); + memcpy(set_hca_cap, dev->hca_caps_cur[MLX5_CAP_GENERAL], + MLX5_ST_SZ_BYTES(cmd_hca_cap)); + + mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n", + to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)), + 128); /* we limit the size of the pkey table to 128 entries for now */ - cur_caps->gen.pkey_table_size = 128; + MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, + to_fw_pkey_sz(128)); if (prof->mask & MLX5_PROF_MASK_QP_SIZE) - cur_caps->gen.log_max_qp = prof->log_max_qp; + MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, + prof->log_max_qp); - /* disable checksum */ - cur_caps->gen.flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM; + /* disable cmdif checksum */ + MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0); - copy_rw_fields(MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability), - cur_caps); err = set_caps(dev, set_ctx, set_sz); query_ex: - kfree(cur_caps); - kfree(max_caps); kfree(set_ctx); - return err; } @@ -768,7 +714,7 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) mlx5_start_health_poll(dev); - err = mlx5_cmd_query_hca_cap(dev, &dev->caps); + err = mlx5_query_hca_caps(dev); if (err) { dev_err(&pdev->dev, "query hca failed\n"); goto err_stop_poll; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index a051b906afdf..b986f1c258bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -65,9 +65,15 @@ enum { MLX5_CMD_TIME, /* print command execution time */ }; +static inline int mlx5_cmd_exec_check_status(struct mlx5_core_dev *dev, u32 *in, + int in_size, u32 *out, + int out_size) +{ + mlx5_cmd_exec(dev, in, in_size, out, out_size); + return mlx5_cmd_status_to_err((struct mlx5_outbox_hdr *)out); +} -int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, - struct mlx5_caps *caps); +int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index ba58f6d7c761..9ef85873ceea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -175,12 +175,13 @@ int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) for (i = 0; i < tot_uuars; i++) { bf = &uuari->bfs[i]; - bf->buf_size = dev->caps.gen.bf_reg_size / 2; + bf->buf_size = (1 << MLX5_CAP_GEN(dev, log_bf_reg_size)) / 2; bf->uar = &uuari->uars[i / MLX5_BF_REGS_PER_PAGE]; bf->regreg = uuari->uars[i / MLX5_BF_REGS_PER_PAGE].map; bf->reg = NULL; /* Add WC support */ - bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * dev->caps.gen.bf_reg_size + - MLX5_BF_OFFSET; + bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * + (1 << MLX5_CAP_GEN(dev, log_bf_reg_size)) + + MLX5_BF_OFFSET; bf->need_lock = need_uuar_lock(i); spin_lock_init(&bf->lock); spin_lock_init(&bf->lock32); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index feebed7b392b..4ee52bf1f959 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -59,6 +59,8 @@ #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8) #define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8) #define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32) +#define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8) +#define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32) #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) #define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld)) @@ -322,13 +324,6 @@ enum { MLX5_CAP_OFF_CMDIF_CSUM = 46, }; -enum { - HCA_CAP_OPMOD_GET_MAX = 0, - HCA_CAP_OPMOD_GET_CUR = 1, - HCA_CAP_OPMOD_GET_ODP_MAX = 4, - HCA_CAP_OPMOD_GET_ODP_CUR = 5 -}; - struct mlx5_inbox_hdr { __be16 opcode; u8 rsvd[4]; @@ -1101,4 +1096,61 @@ enum { MLX5_RQC_RQ_TYPE_MEMORY_RQ_RPM = 0x1, }; +/* MLX5 DEV CAPs */ + +/* TODO: EAT.ME */ +enum mlx5_cap_mode { + HCA_CAP_OPMOD_GET_MAX = 0, + HCA_CAP_OPMOD_GET_CUR = 1, +}; + +enum mlx5_cap_type { + MLX5_CAP_GENERAL = 0, + MLX5_CAP_ETHERNET_OFFLOADS, + MLX5_CAP_ODP, + MLX5_CAP_ATOMIC, + MLX5_CAP_ROCE, + MLX5_CAP_IPOIB_OFFLOADS, + MLX5_CAP_EOIB_OFFLOADS, + MLX5_CAP_FLOW_TABLE, + /* NUM OF CAP Types */ + MLX5_CAP_NUM +}; + +/* GET Dev Caps macros */ +#define MLX5_CAP_GEN(mdev, cap) \ + MLX5_GET(cmd_hca_cap, mdev->hca_caps_cur[MLX5_CAP_GENERAL], cap) + +#define MLX5_CAP_GEN_MAX(mdev, cap) \ + MLX5_GET(cmd_hca_cap, mdev->hca_caps_max[MLX5_CAP_GENERAL], cap) + +#define MLX5_CAP_ETH(mdev, cap) \ + MLX5_GET(per_protocol_networking_offload_caps,\ + mdev->hca_caps_cur[MLX5_CAP_ETHERNET_OFFLOADS], cap) + +#define MLX5_CAP_ETH_MAX(mdev, cap) \ + MLX5_GET(per_protocol_networking_offload_caps,\ + mdev->hca_caps_max[MLX5_CAP_ETHERNET_OFFLOADS], cap) + +#define MLX5_CAP_ROCE(mdev, cap) \ + MLX5_GET(roce_cap, mdev->hca_caps_cur[MLX5_CAP_ROCE], cap) + +#define MLX5_CAP_ROCE_MAX(mdev, cap) \ + MLX5_GET(roce_cap, mdev->hca_caps_max[MLX5_CAP_ROCE], cap) + +#define MLX5_CAP_ATOMIC(mdev, cap) \ + MLX5_GET(atomic_caps, mdev->hca_caps_cur[MLX5_CAP_ATOMIC], cap) + +#define MLX5_CAP_ATOMIC_MAX(mdev, cap) \ + MLX5_GET(atomic_caps, mdev->hca_caps_max[MLX5_CAP_ATOMIC], cap) + +#define MLX5_CAP_FLOWTABLE(mdev, cap) \ + MLX5_GET(flow_table_nic_cap, mdev->hca_caps_cur[MLX5_CAP_FLOW_TABLE], cap) + +#define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ + MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap) + +#define MLX5_CAP_ODP(mdev, cap)\ + MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap) + #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3fd4fdc1ba16..6b9199163633 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -268,55 +268,7 @@ struct mlx5_cmd { struct mlx5_port_caps { int gid_table_len; int pkey_table_len; -}; - -struct mlx5_general_caps { - u8 log_max_eq; - u8 log_max_cq; - u8 log_max_qp; - u8 log_max_mkey; - u8 log_max_pd; - u8 log_max_srq; - u8 log_max_mrw_sz; - u8 log_max_bsf_list_size; - u8 log_max_klm_list_size; - u32 max_cqes; - int max_wqes; - u32 max_eqes; - u32 max_indirection; - int max_sq_desc_sz; - int max_rq_desc_sz; - int max_dc_sq_desc_sz; - u64 flags; - u16 stat_rate_support; - int log_max_msg; - int num_ports; - u8 log_max_ra_res_qp; - u8 log_max_ra_req_qp; - int max_srq_wqes; - int bf_reg_size; - int bf_regs_per_page; - struct mlx5_port_caps port[MLX5_MAX_PORTS]; - u8 ext_port_cap[MLX5_MAX_PORTS]; - int max_vf; - u32 reserved_lkey; - u8 local_ca_ack_delay; - u8 log_max_mcg; - u32 max_qp_mcg; - int min_page_sz; - int pd_cap; - u32 max_qp_counters; - u32 pkey_table_size; - u8 log_max_ra_req_dc; - u8 log_max_ra_res_dc; - u32 uar_sz; - u8 min_log_pg_sz; - u8 log_max_xrcd; - u16 log_uar_page_sz; -}; - -struct mlx5_caps { - struct mlx5_general_caps gen; + u8 ext_port_cap; }; struct mlx5_cmd_mailbox { @@ -521,7 +473,9 @@ struct mlx5_core_dev { u8 rev_id; char board_id[MLX5_BOARD_ID_LEN]; struct mlx5_cmd cmd; - struct mlx5_caps caps; + struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; + u32 hca_caps_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; + u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; void (*event) (struct mlx5_core_dev *dev, @@ -651,8 +605,8 @@ void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr); int mlx5_cmd_status_to_err_v2(void *ptr); -int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps, - u16 opmod); +int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, + enum mlx5_cap_mode cap_mode); int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, -- cgit v1.2.3 From adb0c9545bce6f1b1d563e988e6ee5531861d449 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 28 May 2015 22:28:42 +0300 Subject: net/mlx5_core: Implement access functions of ptys register fields Those registers will be used by the ethtool to set/get settings. Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 77 ++++++++++++++++++++++++++ include/linux/mlx5/driver.h | 14 +++++ 2 files changed, 91 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 49e90f2612d8..6e2d99cc3b61 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -102,3 +102,80 @@ int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps) return err; } EXPORT_SYMBOL_GPL(mlx5_set_port_caps); + +int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, + int ptys_size, int proto_mask) +{ + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + memset(in, 0, sizeof(in)); + MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, proto_mask, proto_mask); + + err = mlx5_core_access_reg(dev, in, sizeof(in), ptys, + ptys_size, MLX5_REG_PTYS, 0, 0); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_ptys); + +int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, + u32 *proto_cap, int proto_mask) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask); + if (err) + return err; + + if (proto_mask == MLX5_PTYS_EN) + *proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); + else + *proto_cap = MLX5_GET(ptys_reg, out, ib_proto_capability); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_proto_cap); + +int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, + u32 *proto_admin, int proto_mask) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask); + if (err) + return err; + + if (proto_mask == MLX5_PTYS_EN) + *proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); + else + *proto_admin = MLX5_GET(ptys_reg, out, ib_proto_admin); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin); + +int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, + int proto_mask) +{ + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, proto_mask, proto_mask); + if (proto_mask == MLX5_PTYS_EN) + MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); + else + MLX5_SET(ptys_reg, in, ib_proto_admin, proto_admin); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PTYS, 0, 1); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_set_port_proto); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6b9199163633..266d5498a270 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -504,6 +504,11 @@ enum { MLX5_COMP_EQ_SIZE = 1024, }; +enum { + MLX5_PTYS_IB = 1 << 0, + MLX5_PTYS_EN = 1 << 2, +}; + struct mlx5_db_pgdir { struct list_head list; DECLARE_BITMAP(bitmap, MLX5_DB_PER_PAGE); @@ -686,7 +691,16 @@ void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, void *data_out, int size_out, u16 reg_num, int arg, int write); + int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); +int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, + int ptys_size, int proto_mask); +int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, + u32 *proto_cap, int proto_mask); +int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, + u32 *proto_admin, int proto_mask); +int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, + int proto_mask); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -- cgit v1.2.3 From 4c916a798058c1acf5a980438416020932c24aca Mon Sep 17 00:00:00 2001 From: Rana Shahout Date: Thu, 28 May 2015 22:28:43 +0300 Subject: net/mlx5_core: Implement get/set port status Implemet get/set port status low level functions to be exposed by the netdev. Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 32 ++++++++++++++++++++++++++ include/linux/mlx5/driver.h | 8 +++++++ 2 files changed, 40 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 6e2d99cc3b61..742a6fb8debe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -179,3 +179,35 @@ int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, return err; } EXPORT_SYMBOL_GPL(mlx5_set_port_proto); + +int mlx5_set_port_status(struct mlx5_core_dev *dev, + enum mlx5_port_status status) +{ + u32 in[MLX5_ST_SZ_DW(paos_reg)]; + u32 out[MLX5_ST_SZ_DW(paos_reg)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(paos_reg, in, admin_status, status); + MLX5_SET(paos_reg, in, ase, 1); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PAOS, 0, 1); +} + +int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status) +{ + u32 in[MLX5_ST_SZ_DW(paos_reg)]; + u32 out[MLX5_ST_SZ_DW(paos_reg)]; + int err; + + memset(in, 0, sizeof(in)); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PAOS, 0, 0); + if (err) + return err; + + *status = MLX5_GET(paos_reg, out, oper_status); + return err; +} diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 266d5498a270..6438444ab361 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -149,6 +149,11 @@ enum mlx5_dev_event { MLX5_DEV_EVENT_CLIENT_REREG, }; +enum mlx5_port_status { + MLX5_PORT_UP = 1 << 1, + MLX5_PORT_DOWN = 1 << 2, +}; + struct mlx5_uuar_info { struct mlx5_uar *uars; int num_uars; @@ -701,6 +706,9 @@ int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, u32 *proto_admin, int proto_mask); int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, int proto_mask); +int mlx5_set_port_status(struct mlx5_core_dev *dev, + enum mlx5_port_status status); +int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -- cgit v1.2.3 From 90b3e38d048f09b22fb50bcd460cea65fd00b2d7 Mon Sep 17 00:00:00 2001 From: Rana Shahout Date: Thu, 28 May 2015 22:28:44 +0300 Subject: net/mlx5_core: Modify CQ moderation parameters Introduce mlx5_core_modify_cq_moderation() to be used by the netdev, to set hardware coalescing. Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 18 ++++++++++++++++++ include/linux/mlx5/cq.h | 3 +++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index eb0cf81f5f45..04ab7e445eae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -219,6 +219,24 @@ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, } EXPORT_SYMBOL(mlx5_core_modify_cq); +int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, + struct mlx5_core_cq *cq, + u16 cq_period, + u16 cq_max_count) +{ + struct mlx5_modify_cq_mbox_in in; + + memset(&in, 0, sizeof(in)); + + in.cqn = cpu_to_be32(cq->cqn); + in.ctx.cq_period = cpu_to_be16(cq_period); + in.ctx.cq_max_count = cpu_to_be16(cq_max_count); + in.field_select = cpu_to_be32(MLX5_CQ_MODIFY_PERIOD | + MLX5_CQ_MODIFY_COUNT); + + return mlx5_core_modify_cq(dev, cq, &in, sizeof(in)); +} + int mlx5_init_cq_table(struct mlx5_core_dev *dev) { struct mlx5_cq_table *table = &dev->priv.cq_table; diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 2695ced222df..abc4767695e4 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -169,6 +169,9 @@ int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, struct mlx5_query_cq_mbox_out *out); int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, struct mlx5_modify_cq_mbox_in *in, int in_sz); +int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, + struct mlx5_core_cq *cq, u16 cq_period, + u16 cq_max_count); int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); -- cgit v1.2.3 From e725440e75da8c4d617a31c4e38216acc55c24e3 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 28 May 2015 22:28:45 +0300 Subject: net/mlx5_core: Set/Query port MTU commands Introduce set/Query low level functions to access MTU in hardware. To be used by the netdev. Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 53 ++++++++++++++++++++++++++ include/linux/mlx5/driver.h | 4 ++ 2 files changed, 57 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 742a6fb8debe..7d3d0f9f328d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -211,3 +211,56 @@ int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status) *status = MLX5_GET(paos_reg, out, oper_status); return err; } + +static int mlx5_query_port_mtu(struct mlx5_core_dev *dev, + int *admin_mtu, int *max_mtu, int *oper_mtu) +{ + u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; + u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(pmtu_reg, in, local_port, 1); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PMTU, 0, 0); + if (err) + return err; + + if (max_mtu) + *max_mtu = MLX5_GET(pmtu_reg, out, max_mtu); + if (oper_mtu) + *oper_mtu = MLX5_GET(pmtu_reg, out, oper_mtu); + if (admin_mtu) + *admin_mtu = MLX5_GET(pmtu_reg, out, admin_mtu); + + return 0; +} + +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu) +{ + u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; + u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(pmtu_reg, in, admin_mtu, mtu); + MLX5_SET(pmtu_reg, in, local_port, 1); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_PMTU, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_mtu); + +int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu) +{ + return mlx5_query_port_mtu(dev, NULL, max_mtu, NULL); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu); + +int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu) +{ + return mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6438444ab361..51738472657e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -710,6 +710,10 @@ int mlx5_set_port_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status); +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu); +int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu); +int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu); + int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, -- cgit v1.2.3 From afb736e9330ad6b2b6935d2f53ded784eb73f12d Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 28 May 2015 22:28:47 +0300 Subject: net/mlx5: Ethernet resource handling files This patch contains the resource handling files: - flow_table.c: This file contains the code to handle the low level API to configure hardware flow table. It is separated from the flow_table_en.c, because it will be used in the future by Raw Ethernet QP in mlx5_ib too. - en_flow_table.[ch]: Ethernet flow steering handling. The flow table object contain a mapping between flow specs and TIRs. This mechanism will be used also to configure e-switch in the future, when SR-IOV support will be added. - transobj.[ch] - Low level functions to create/modify/destroy the transport objects: RQ/SQ/TIR/TIS - vport.[ch] - Handle attributes of a virtual port (vPort) in the embedded switch. Currently this switch is a passthrough, until SR-IOV support will be added. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlx5/core/en_flow_table.c | 858 +++++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/flow_table.c | 422 ++++++++++ drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 169 ++++ drivers/net/ethernet/mellanox/mlx5/core/transobj.h | 47 ++ drivers/net/ethernet/mellanox/mlx5/core/vport.c | 84 ++ drivers/net/ethernet/mellanox/mlx5/core/vport.h | 41 + include/linux/mlx5/flow_table.h | 54 ++ 7 files changed, 1675 insertions(+) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/flow_table.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/transobj.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/transobj.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/vport.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/vport.h create mode 100644 include/linux/mlx5/flow_table.h (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c new file mode 100644 index 000000000000..6feebda4b3e4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c @@ -0,0 +1,858 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "en.h" + +enum { + MLX5E_FULLMATCH = 0, + MLX5E_ALLMULTI = 1, + MLX5E_PROMISC = 2, +}; + +enum { + MLX5E_UC = 0, + MLX5E_MC_IPV4 = 1, + MLX5E_MC_IPV6 = 2, + MLX5E_MC_OTHER = 3, +}; + +enum { + MLX5E_ACTION_NONE = 0, + MLX5E_ACTION_ADD = 1, + MLX5E_ACTION_DEL = 2, +}; + +struct mlx5e_eth_addr_hash_node { + struct hlist_node hlist; + u8 action; + struct mlx5e_eth_addr_info ai; +}; + +static inline int mlx5e_hash_eth_addr(u8 *addr) +{ + return addr[5]; +} + +static void mlx5e_add_eth_addr_to_hash(struct hlist_head *hash, u8 *addr) +{ + struct mlx5e_eth_addr_hash_node *hn; + int ix = mlx5e_hash_eth_addr(addr); + int found = 0; + + hlist_for_each_entry(hn, &hash[ix], hlist) + if (ether_addr_equal_64bits(hn->ai.addr, addr)) { + found = 1; + break; + } + + if (found) { + hn->action = MLX5E_ACTION_NONE; + return; + } + + hn = kzalloc(sizeof(*hn), GFP_ATOMIC); + if (!hn) + return; + + ether_addr_copy(hn->ai.addr, addr); + hn->action = MLX5E_ACTION_ADD; + + hlist_add_head(&hn->hlist, &hash[ix]); +} + +static void mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn) +{ + hlist_del(&hn->hlist); + kfree(hn); +} + +static void mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai) +{ + void *ft = priv->ft.main; + + if (ai->tt_vec & (1 << MLX5E_TT_IPV6_TCP)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_TCP]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV4_TCP)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_TCP]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV6_UDP)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_UDP]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV4_UDP)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_UDP]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV6)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV4)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4]); + + if (ai->tt_vec & (1 << MLX5E_TT_ANY)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_ANY]); +} + +static int mlx5e_get_eth_addr_type(u8 *addr) +{ + if (is_unicast_ether_addr(addr)) + return MLX5E_UC; + + if ((addr[0] == 0x01) && + (addr[1] == 0x00) && + (addr[2] == 0x5e) && + !(addr[3] & 0x80)) + return MLX5E_MC_IPV4; + + if ((addr[0] == 0x33) && + (addr[1] == 0x33)) + return MLX5E_MC_IPV6; + + return MLX5E_MC_OTHER; +} + +static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type) +{ + int eth_addr_type; + u32 ret; + + switch (type) { + case MLX5E_FULLMATCH: + eth_addr_type = mlx5e_get_eth_addr_type(ai->addr); + switch (eth_addr_type) { + case MLX5E_UC: + ret = + (1 << MLX5E_TT_IPV4_TCP) | + (1 << MLX5E_TT_IPV6_TCP) | + (1 << MLX5E_TT_IPV4_UDP) | + (1 << MLX5E_TT_IPV6_UDP) | + (1 << MLX5E_TT_IPV4) | + (1 << MLX5E_TT_IPV6) | + (1 << MLX5E_TT_ANY) | + 0; + break; + + case MLX5E_MC_IPV4: + ret = + (1 << MLX5E_TT_IPV4_UDP) | + (1 << MLX5E_TT_IPV4) | + 0; + break; + + case MLX5E_MC_IPV6: + ret = + (1 << MLX5E_TT_IPV6_UDP) | + (1 << MLX5E_TT_IPV6) | + 0; + break; + + case MLX5E_MC_OTHER: + ret = + (1 << MLX5E_TT_ANY) | + 0; + break; + } + + break; + + case MLX5E_ALLMULTI: + ret = + (1 << MLX5E_TT_IPV4_UDP) | + (1 << MLX5E_TT_IPV6_UDP) | + (1 << MLX5E_TT_IPV4) | + (1 << MLX5E_TT_IPV6) | + (1 << MLX5E_TT_ANY) | + 0; + break; + + default: /* MLX5E_PROMISC */ + ret = + (1 << MLX5E_TT_IPV4_TCP) | + (1 << MLX5E_TT_IPV6_TCP) | + (1 << MLX5E_TT_IPV4_UDP) | + (1 << MLX5E_TT_IPV6_UDP) | + (1 << MLX5E_TT_IPV4) | + (1 << MLX5E_TT_IPV6) | + (1 << MLX5E_TT_ANY) | + 0; + break; + } + + return ret; +} + +static int __mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai, int type, + void *flow_context, void *match_criteria) +{ + u8 match_criteria_enable = 0; + void *match_value; + void *dest; + u8 *dmac; + u8 *match_criteria_dmac; + void *ft = priv->ft.main; + u32 *tirn = priv->tirn; + u32 tt_vec; + int err; + + match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); + dmac = MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.dmac_47_16); + match_criteria_dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers.dmac_47_16); + dest = MLX5_ADDR_OF(flow_context, flow_context, destination); + + MLX5_SET(flow_context, flow_context, action, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + MLX5_SET(flow_context, flow_context, destination_list_size, 1); + MLX5_SET(dest_format_struct, dest, destination_type, + MLX5_FLOW_CONTEXT_DEST_TYPE_TIR); + + switch (type) { + case MLX5E_FULLMATCH: + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + memset(match_criteria_dmac, 0xff, ETH_ALEN); + ether_addr_copy(dmac, ai->addr); + break; + + case MLX5E_ALLMULTI: + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + match_criteria_dmac[0] = 0x01; + dmac[0] = 0x01; + break; + + case MLX5E_PROMISC: + break; + } + + tt_vec = mlx5e_get_tt_vec(ai, type); + + if (tt_vec & (1 << MLX5E_TT_ANY)) { + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_ANY]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + &ai->ft_ix[MLX5E_TT_ANY]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return err; + } + ai->tt_vec |= (1 << MLX5E_TT_ANY); + } + + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.ethertype); + + if (tt_vec & (1 << MLX5E_TT_IPV4)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IP); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV4]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + &ai->ft_ix[MLX5E_TT_IPV4]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return err; + } + ai->tt_vec |= (1 << MLX5E_TT_IPV4); + } + + if (tt_vec & (1 << MLX5E_TT_IPV6)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IPV6); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV6]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + &ai->ft_ix[MLX5E_TT_IPV6]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return err; + } + ai->tt_vec |= (1 << MLX5E_TT_IPV6); + } + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.ip_protocol); + MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, + IPPROTO_UDP); + + if (tt_vec & (1 << MLX5E_TT_IPV4_UDP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IP); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV4_UDP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + &ai->ft_ix[MLX5E_TT_IPV4_UDP]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return err; + } + ai->tt_vec |= (1 << MLX5E_TT_IPV4_UDP); + } + + if (tt_vec & (1 << MLX5E_TT_IPV6_UDP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IPV6); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV6_UDP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + &ai->ft_ix[MLX5E_TT_IPV6_UDP]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return err; + } + ai->tt_vec |= (1 << MLX5E_TT_IPV6_UDP); + } + + MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, + IPPROTO_TCP); + + if (tt_vec & (1 << MLX5E_TT_IPV4_TCP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IP); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV4_TCP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + &ai->ft_ix[MLX5E_TT_IPV4_TCP]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return err; + } + ai->tt_vec |= (1 << MLX5E_TT_IPV4_TCP); + } + + if (tt_vec & (1 << MLX5E_TT_IPV6_TCP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IPV6); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV6_TCP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + &ai->ft_ix[MLX5E_TT_IPV6_TCP]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return err; + } + ai->tt_vec |= (1 << MLX5E_TT_IPV6_TCP); + } + + return 0; +} + +static int mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai, int type) +{ + u32 *flow_context; + u32 *match_criteria; + int err; + + flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + + MLX5_ST_SZ_BYTES(dest_format_struct)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!flow_context || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto add_eth_addr_rule_out; + } + + err = __mlx5e_add_eth_addr_rule(priv, ai, type, flow_context, + match_criteria); + if (err) + netdev_err(priv->netdev, "%s: failed\n", __func__); + +add_eth_addr_rule_out: + kvfree(match_criteria); + kvfree(flow_context); + return err; +} + +enum mlx5e_vlan_rule_type { + MLX5E_VLAN_RULE_TYPE_UNTAGGED, + MLX5E_VLAN_RULE_TYPE_ANY_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_VID, +}; + +static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + u8 match_criteria_enable = 0; + u32 *flow_context; + void *match_value; + void *dest; + u32 *match_criteria; + u32 *ft_ix; + int err; + + flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + + MLX5_ST_SZ_BYTES(dest_format_struct)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!flow_context || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto add_vlan_rule_out; + } + match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); + dest = MLX5_ADDR_OF(flow_context, flow_context, destination); + + MLX5_SET(flow_context, flow_context, action, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + MLX5_SET(flow_context, flow_context, destination_list_size, 1); + MLX5_SET(dest_format_struct, dest, destination_type, + MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE); + MLX5_SET(dest_format_struct, dest, destination_id, + mlx5_get_flow_table_id(priv->ft.main)); + + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.vlan_tag); + + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + ft_ix = &priv->vlan.untagged_rule_ft_ix; + break; + case MLX5E_VLAN_RULE_TYPE_ANY_VID: + ft_ix = &priv->vlan.any_vlan_rule_ft_ix; + MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, + 1); + break; + default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ + ft_ix = &priv->vlan.active_vlans_ft_ix[vid]; + MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, + 1); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, match_value, outer_headers.first_vid, + vid); + break; + } + + err = mlx5_add_flow_table_entry(priv->ft.vlan, match_criteria_enable, + match_criteria, flow_context, ft_ix); + if (err) + netdev_err(priv->netdev, "%s: failed\n", __func__); + +add_vlan_rule_out: + kvfree(match_criteria); + kvfree(flow_context); + return err; +} + +static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + mlx5_del_flow_table_entry(priv->ft.vlan, + priv->vlan.untagged_rule_ft_ix); + break; + case MLX5E_VLAN_RULE_TYPE_ANY_VID: + mlx5_del_flow_table_entry(priv->ft.vlan, + priv->vlan.any_vlan_rule_ft_ix); + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_VID: + mlx5_del_flow_table_entry(priv->ft.vlan, + priv->vlan.active_vlans_ft_ix[vid]); + break; + } +} + +void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) +{ + WARN_ON(!mutex_is_locked(&priv->state_lock)); + + if (priv->vlan.filter_disabled) { + priv->vlan.filter_disabled = false; + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); + } +} + +void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) +{ + WARN_ON(!mutex_is_locked(&priv->state_lock)); + + if (!priv->vlan.filter_disabled) { + priv->vlan.filter_disabled = true; + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); + } +} + +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err = 0; + + mutex_lock(&priv->state_lock); + + set_bit(vid, priv->vlan.active_vlans); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, + vid); + + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mutex_lock(&priv->state_lock); + + clear_bit(vid, priv->vlan.active_vlans); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); + + mutex_unlock(&priv->state_lock); + + return 0; +} + +int mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv) +{ + u16 vid; + int err; + + for_each_set_bit(vid, priv->vlan.active_vlans, VLAN_N_VID) { + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, + vid); + if (err) + return err; + } + + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + if (err) + return err; + + if (priv->vlan.filter_disabled) { + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); + if (err) + return err; + } + + return 0; +} + +void mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv) +{ + u16 vid; + + if (priv->vlan.filter_disabled) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); + + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(vid, priv->vlan.active_vlans, VLAN_N_VID) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); +} + +#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5E_ETH_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) + +static void mlx5e_execute_action(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_hash_node *hn) +{ + switch (hn->action) { + case MLX5E_ACTION_ADD: + mlx5e_add_eth_addr_rule(priv, &hn->ai, MLX5E_FULLMATCH); + hn->action = MLX5E_ACTION_NONE; + break; + + case MLX5E_ACTION_DEL: + mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai); + mlx5e_del_eth_addr_from_hash(hn); + break; + } +} + +static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(netdev); + + mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, + priv->netdev->dev_addr); + + netdev_for_each_uc_addr(ha, netdev) + mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, ha->addr); + + netdev_for_each_mc_addr(ha, netdev) + mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_mc, ha->addr); + + netif_addr_unlock_bh(netdev); +} + +static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_hash_node *hn; + struct hlist_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) + mlx5e_execute_action(priv, hn); + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) + mlx5e_execute_action(priv, hn); +} + +static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_hash_node *hn; + struct hlist_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) + hn->action = MLX5E_ACTION_DEL; + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) + hn->action = MLX5E_ACTION_DEL; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_sync_netdev_addr(priv); + + mlx5e_apply_netdev_addr(priv); +} + +void mlx5e_set_rx_mode_core(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + struct net_device *ndev = priv->netdev; + + bool rx_mode_enable = test_bit(MLX5E_STATE_OPENED, &priv->state); + bool promisc_enabled = rx_mode_enable && (ndev->flags & IFF_PROMISC); + bool allmulti_enabled = rx_mode_enable && (ndev->flags & IFF_ALLMULTI); + bool broadcast_enabled = rx_mode_enable; + + bool enable_promisc = !ea->promisc_enabled && promisc_enabled; + bool disable_promisc = ea->promisc_enabled && !promisc_enabled; + bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled; + bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled; + bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled; + bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; + + if (enable_promisc) + mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC); + if (enable_allmulti) + mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); + if (enable_broadcast) + mlx5e_add_eth_addr_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); + + mlx5e_handle_netdev_addr(priv); + + if (disable_broadcast) + mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast); + if (disable_allmulti) + mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti); + if (disable_promisc) + mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc); + + ea->promisc_enabled = promisc_enabled; + ea->allmulti_enabled = allmulti_enabled; + ea->broadcast_enabled = broadcast_enabled; +} + +void mlx5e_set_rx_mode_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + set_rx_mode_work); + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_set_rx_mode_core(priv); + mutex_unlock(&priv->state_lock); +} + +void mlx5e_init_eth_addr(struct mlx5e_priv *priv) +{ + ether_addr_copy(priv->eth_addr.broadcast.addr, priv->netdev->broadcast); +} + +static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5_flow_table_group *g; + u8 *dmac; + + g = kcalloc(9, sizeof(*g), GFP_KERNEL); + + g[0].log_sz = 2; + g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, + outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, + outer_headers.ip_protocol); + + g[1].log_sz = 1; + g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria, + outer_headers.ethertype); + + g[2].log_sz = 0; + + g[3].log_sz = 14; + g[3].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[3].match_criteria, + outer_headers.dmac_47_16); + memset(dmac, 0xff, ETH_ALEN); + MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria, + outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria, + outer_headers.ip_protocol); + + g[4].log_sz = 13; + g[4].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[4].match_criteria, + outer_headers.dmac_47_16); + memset(dmac, 0xff, ETH_ALEN); + MLX5_SET_TO_ONES(fte_match_param, g[4].match_criteria, + outer_headers.ethertype); + + g[5].log_sz = 11; + g[5].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[5].match_criteria, + outer_headers.dmac_47_16); + memset(dmac, 0xff, ETH_ALEN); + + g[6].log_sz = 2; + g[6].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[6].match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; + MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria, + outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria, + outer_headers.ip_protocol); + + g[7].log_sz = 1; + g[7].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[7].match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; + MLX5_SET_TO_ONES(fte_match_param, g[7].match_criteria, + outer_headers.ethertype); + + g[8].log_sz = 0; + g[8].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[8].match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; + priv->ft.main = mlx5_create_flow_table(priv->mdev, 1, + MLX5_FLOW_TABLE_TYPE_NIC_RCV, + 9, g); + kfree(g); + + return priv->ft.main ? 0 : -ENOMEM; +} + +static void mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv) +{ + mlx5_destroy_flow_table(priv->ft.main); +} + +static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5_flow_table_group *g; + + g = kcalloc(2, sizeof(*g), GFP_KERNEL); + if (!g) + return -ENOMEM; + + g[0].log_sz = 12; + g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, + outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, + outer_headers.first_vid); + + /* untagged + any vlan id */ + g[1].log_sz = 1; + g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria, + outer_headers.vlan_tag); + + priv->ft.vlan = mlx5_create_flow_table(priv->mdev, 0, + MLX5_FLOW_TABLE_TYPE_NIC_RCV, + 2, g); + + kfree(g); + return priv->ft.vlan ? 0 : -ENOMEM; +} + +static void mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv) +{ + mlx5_destroy_flow_table(priv->ft.vlan); +} + +int mlx5e_open_flow_table(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_main_flow_table(priv); + if (err) + return err; + + err = mlx5e_create_vlan_flow_table(priv); + if (err) + goto err_destroy_main_flow_table; + + return 0; + +err_destroy_main_flow_table: + mlx5e_destroy_main_flow_table(priv); + + return err; +} + +void mlx5e_close_flow_table(struct mlx5e_priv *priv) +{ + mlx5e_destroy_vlan_flow_table(priv); + mlx5e_destroy_main_flow_table(priv); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c new file mode 100644 index 000000000000..ca90b9bc3b95 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "mlx5_core.h" + +struct mlx5_ftg { + struct mlx5_flow_table_group g; + u32 id; + u32 start_ix; +}; + +struct mlx5_flow_table { + struct mlx5_core_dev *dev; + u8 level; + u8 type; + u32 id; + struct mutex mutex; /* sync bitmap alloc */ + u16 num_groups; + struct mlx5_ftg *group; + unsigned long *bitmap; + u32 size; +}; + +static int mlx5_set_flow_entry_cmd(struct mlx5_flow_table *ft, u32 group_ix, + u32 flow_index, void *flow_context) +{ + u32 out[MLX5_ST_SZ_DW(set_fte_out)]; + u32 *in; + void *in_flow_context; + int fcdls = + MLX5_GET(flow_context, flow_context, destination_list_size) * + MLX5_ST_SZ_BYTES(dest_format_struct); + int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fcdls; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(ft->dev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(set_fte_in, in, table_type, ft->type); + MLX5_SET(set_fte_in, in, table_id, ft->id); + MLX5_SET(set_fte_in, in, flow_index, flow_index); + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + memcpy(in_flow_context, flow_context, + MLX5_ST_SZ_BYTES(flow_context) + fcdls); + + MLX5_SET(flow_context, in_flow_context, group_id, + ft->group[group_ix].id); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(ft->dev, in, inlen, out, + sizeof(out)); + kvfree(in); + + return err; +} + +static void mlx5_del_flow_entry_cmd(struct mlx5_flow_table *ft, u32 flow_index) +{ + u32 in[MLX5_ST_SZ_DW(delete_fte_in)]; + u32 out[MLX5_ST_SZ_DW(delete_fte_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + +#define MLX5_SET_DFTEI(p, x, v) MLX5_SET(delete_fte_in, p, x, v) + MLX5_SET_DFTEI(in, table_type, ft->type); + MLX5_SET_DFTEI(in, table_id, ft->id); + MLX5_SET_DFTEI(in, flow_index, flow_index); + MLX5_SET_DFTEI(in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + + mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); +} + +static void mlx5_destroy_flow_group_cmd(struct mlx5_flow_table *ft, int i) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + +#define MLX5_SET_DFGI(p, x, v) MLX5_SET(destroy_flow_group_in, p, x, v) + MLX5_SET_DFGI(in, table_type, ft->type); + MLX5_SET_DFGI(in, table_id, ft->id); + MLX5_SET_DFGI(in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); + MLX5_SET_DFGI(in, group_id, ft->group[i].id); + mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_create_flow_group_cmd(struct mlx5_flow_table *ft, int i) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)]; + u32 *in; + void *in_match_criteria; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_group *g = &ft->group[i].g; + u32 start_ix = ft->group[i].start_ix; + u32 end_ix = start_ix + (1 << g->log_sz) - 1; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(ft->dev, "failed to allocate inbox\n"); + return -ENOMEM; + } + in_match_criteria = MLX5_ADDR_OF(create_flow_group_in, in, + match_criteria); + + memset(out, 0, sizeof(out)); + +#define MLX5_SET_CFGI(p, x, v) MLX5_SET(create_flow_group_in, p, x, v) + MLX5_SET_CFGI(in, table_type, ft->type); + MLX5_SET_CFGI(in, table_id, ft->id); + MLX5_SET_CFGI(in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP); + MLX5_SET_CFGI(in, start_flow_index, start_ix); + MLX5_SET_CFGI(in, end_flow_index, end_ix); + MLX5_SET_CFGI(in, match_criteria_enable, g->match_criteria_enable); + + memcpy(in_match_criteria, g->match_criteria, + MLX5_ST_SZ_BYTES(fte_match_param)); + + err = mlx5_cmd_exec_check_status(ft->dev, in, inlen, out, + sizeof(out)); + if (!err) + ft->group[i].id = MLX5_GET(create_flow_group_out, out, + group_id); + + kvfree(in); + + return err; +} + +static void mlx5_destroy_flow_table_groups(struct mlx5_flow_table *ft) +{ + int i; + + for (i = 0; i < ft->num_groups; i++) + mlx5_destroy_flow_group_cmd(ft, i); +} + +static int mlx5_create_flow_table_groups(struct mlx5_flow_table *ft) +{ + int err; + int i; + + for (i = 0; i < ft->num_groups; i++) { + err = mlx5_create_flow_group_cmd(ft, i); + if (err) + goto err_destroy_flow_table_groups; + } + + return 0; + +err_destroy_flow_table_groups: + for (i--; i >= 0; i--) + mlx5_destroy_flow_group_cmd(ft, i); + + return err; +} + +static int mlx5_create_flow_table_cmd(struct mlx5_flow_table *ft) +{ + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]; + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(create_flow_table_in, in, table_type, ft->type); + MLX5_SET(create_flow_table_in, in, level, ft->level); + MLX5_SET(create_flow_table_in, in, log_size, order_base_2(ft->size)); + + MLX5_SET(create_flow_table_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_TABLE); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, + sizeof(out)); + if (err) + return err; + + ft->id = MLX5_GET(create_flow_table_out, out, table_id); + + return 0; +} + +static void mlx5_destroy_flow_table_cmd(struct mlx5_flow_table *ft) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + +#define MLX5_SET_DFTI(p, x, v) MLX5_SET(destroy_flow_table_in, p, x, v) + MLX5_SET_DFTI(in, table_type, ft->type); + MLX5_SET_DFTI(in, table_id, ft->id); + MLX5_SET_DFTI(in, opcode, MLX5_CMD_OP_DESTROY_FLOW_TABLE); + + mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_find_group(struct mlx5_flow_table *ft, u8 match_criteria_enable, + u32 *match_criteria, int *group_ix) +{ + void *mc_outer = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers); + void *mc_misc = MLX5_ADDR_OF(fte_match_param, match_criteria, + misc_parameters); + void *mc_inner = MLX5_ADDR_OF(fte_match_param, match_criteria, + inner_headers); + int mc_outer_sz = MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4); + int mc_misc_sz = MLX5_ST_SZ_BYTES(fte_match_set_misc); + int mc_inner_sz = MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4); + int i; + + for (i = 0; i < ft->num_groups; i++) { + struct mlx5_flow_table_group *g = &ft->group[i].g; + void *gmc_outer = MLX5_ADDR_OF(fte_match_param, + g->match_criteria, + outer_headers); + void *gmc_misc = MLX5_ADDR_OF(fte_match_param, + g->match_criteria, + misc_parameters); + void *gmc_inner = MLX5_ADDR_OF(fte_match_param, + g->match_criteria, + inner_headers); + + if (g->match_criteria_enable != match_criteria_enable) + continue; + + if (match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) + if (memcmp(mc_outer, gmc_outer, mc_outer_sz)) + continue; + + if (match_criteria_enable & MLX5_MATCH_MISC_PARAMETERS) + if (memcmp(mc_misc, gmc_misc, mc_misc_sz)) + continue; + + if (match_criteria_enable & MLX5_MATCH_INNER_HEADERS) + if (memcmp(mc_inner, gmc_inner, mc_inner_sz)) + continue; + + *group_ix = i; + return 0; + } + + return -EINVAL; +} + +static int alloc_flow_index(struct mlx5_flow_table *ft, int group_ix, u32 *ix) +{ + struct mlx5_ftg *g = &ft->group[group_ix]; + int err = 0; + + mutex_lock(&ft->mutex); + + *ix = find_next_zero_bit(ft->bitmap, ft->size, g->start_ix); + if (*ix >= (g->start_ix + (1 << g->g.log_sz))) + err = -ENOSPC; + else + __set_bit(*ix, ft->bitmap); + + mutex_unlock(&ft->mutex); + + return err; +} + +static void mlx5_free_flow_index(struct mlx5_flow_table *ft, u32 ix) +{ + __clear_bit(ix, ft->bitmap); +} + +int mlx5_add_flow_table_entry(void *flow_table, u8 match_criteria_enable, + void *match_criteria, void *flow_context, + u32 *flow_index) +{ + struct mlx5_flow_table *ft = flow_table; + int group_ix; + int err; + + err = mlx5_find_group(ft, match_criteria_enable, match_criteria, + &group_ix); + if (err) { + mlx5_core_warn(ft->dev, "mlx5_find_group failed\n"); + return err; + } + + err = alloc_flow_index(ft, group_ix, flow_index); + if (err) { + mlx5_core_warn(ft->dev, "alloc_flow_index failed\n"); + return err; + } + + return mlx5_set_flow_entry_cmd(ft, group_ix, *flow_index, flow_context); +} +EXPORT_SYMBOL(mlx5_add_flow_table_entry); + +void mlx5_del_flow_table_entry(void *flow_table, u32 flow_index) +{ + struct mlx5_flow_table *ft = flow_table; + + mlx5_del_flow_entry_cmd(ft, flow_index); + mlx5_free_flow_index(ft, flow_index); +} +EXPORT_SYMBOL(mlx5_del_flow_table_entry); + +void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type, + u16 num_groups, + struct mlx5_flow_table_group *group) +{ + struct mlx5_flow_table *ft; + u32 start_ix = 0; + u32 ft_size = 0; + void *gr; + void *bm; + int err; + int i; + + for (i = 0; i < num_groups; i++) + ft_size += (1 << group[i].log_sz); + + ft = kzalloc(sizeof(*ft), GFP_KERNEL); + gr = kcalloc(num_groups, sizeof(struct mlx5_ftg), GFP_KERNEL); + bm = kcalloc(BITS_TO_LONGS(ft_size), sizeof(uintptr_t), GFP_KERNEL); + if (!ft || !gr || !bm) + goto err_free_ft; + + ft->group = gr; + ft->bitmap = bm; + ft->num_groups = num_groups; + ft->level = level; + ft->type = table_type; + ft->size = ft_size; + ft->dev = dev; + mutex_init(&ft->mutex); + + for (i = 0; i < ft->num_groups; i++) { + memcpy(&ft->group[i].g, &group[i], sizeof(*group)); + ft->group[i].start_ix = start_ix; + start_ix += 1 << group[i].log_sz; + } + + err = mlx5_create_flow_table_cmd(ft); + if (err) + goto err_free_ft; + + err = mlx5_create_flow_table_groups(ft); + if (err) + goto err_destroy_flow_table_cmd; + + return ft; + +err_destroy_flow_table_cmd: + mlx5_destroy_flow_table_cmd(ft); + +err_free_ft: + mlx5_core_warn(dev, "failed to alloc flow table\n"); + kfree(bm); + kfree(gr); + kfree(ft); + + return NULL; +} +EXPORT_SYMBOL(mlx5_create_flow_table); + +void mlx5_destroy_flow_table(void *flow_table) +{ + struct mlx5_flow_table *ft = flow_table; + + mlx5_destroy_flow_table_groups(ft); + mlx5_destroy_flow_table_cmd(ft); + kfree(ft->bitmap); + kfree(ft->group); + kfree(ft); +} +EXPORT_SYMBOL(mlx5_destroy_flow_table); + +u32 mlx5_get_flow_table_id(void *flow_table) +{ + struct mlx5_flow_table *ft = flow_table; + + return ft->id; +} +EXPORT_SYMBOL(mlx5_get_flow_table_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c new file mode 100644 index 000000000000..3c555d708af1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "mlx5_core.h" +#include "transobj.h" + +int mlx5_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn) +{ + u32 out[MLX5_ST_SZ_DW(create_rq_out)]; + int err; + + MLX5_SET(create_rq_in, in, opcode, MLX5_CMD_OP_CREATE_RQ); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *rqn = MLX5_GET(create_rq_out, out, rqn); + + return err; +} + +int mlx5_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_rq_out)]; + + MLX5_SET(modify_rq_in, in, rqn, rqn); + MLX5_SET(modify_rq_in, in, opcode, MLX5_CMD_OP_MODIFY_RQ); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); +} + +void mlx5_destroy_rq(struct mlx5_core_dev *dev, u32 rqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rq_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_rq_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, in, rqn, rqn); + + mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn) +{ + u32 out[MLX5_ST_SZ_DW(create_sq_out)]; + int err; + + MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *sqn = MLX5_GET(create_sq_out, out, sqn); + + return err; +} + +int mlx5_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_sq_out)]; + + MLX5_SET(modify_sq_in, in, sqn, sqn); + MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); +} + +void mlx5_destroy_sq(struct mlx5_core_dev *dev, u32 sqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_sq_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_sq_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, in, sqn, sqn); + + mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn) +{ + u32 out[MLX5_ST_SZ_DW(create_tir_out)]; + int err; + + MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *tirn = MLX5_GET(create_tir_out, out, tirn); + + return err; +} + +void mlx5_destroy_tir(struct mlx5_core_dev *dev, u32 tirn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tir_out)]; + u32 out[MLX5_ST_SZ_DW(destroy_tir_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, in, tirn, tirn); + + mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tisn) +{ + u32 out[MLX5_ST_SZ_DW(create_tis_out)]; + int err; + + MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *tisn = MLX5_GET(create_tis_out, out, tisn); + + return err; +} + +void mlx5_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tis_out)]; + u32 out[MLX5_ST_SZ_DW(destroy_tis_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, in, tisn, tisn); + + mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h new file mode 100644 index 000000000000..1bc898cc4933 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __TRANSOBJ_H__ +#define __TRANSOBJ_H__ + +int mlx5_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn); +int mlx5_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen); +void mlx5_destroy_rq(struct mlx5_core_dev *dev, u32 rqn); +int mlx5_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn); +int mlx5_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen); +void mlx5_destroy_sq(struct mlx5_core_dev *dev, u32 sqn); +int mlx5_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn); +void mlx5_destroy_tir(struct mlx5_core_dev *dev, u32 tirn); +int mlx5_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tisn); +void mlx5_destroy_tis(struct mlx5_core_dev *dev, u32 tisn); + +#endif /* __TRANSOBJ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c new file mode 100644 index 000000000000..ba374b9a6c87 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "vport.h" +#include "mlx5_core.h" + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod) +{ + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_vport_state_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_STATE); + MLX5_SET(query_vport_state_in, in, op_mod, opmod); + + err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, + sizeof(out)); + if (err) + mlx5_core_warn(mdev, "MLX5_CMD_OP_QUERY_VPORT_STATE failed\n"); + + return MLX5_GET(query_vport_state_out, out, state); +} + +void mlx5_query_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u8 *out_addr; + + out = mlx5_vzalloc(outlen); + if (!out) + return; + + out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context.permanent_address); + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + + memset(out, 0, outlen); + mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); + + ether_addr_copy(addr, &out_addr[2]); + + kvfree(out); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.h b/drivers/net/ethernet/mellanox/mlx5/core/vport.h new file mode 100644 index 000000000000..c05ca2c3419d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_VPORT_H__ +#define __MLX5_VPORT_H__ + +#include + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); +void mlx5_query_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr); + +#endif /* __MLX5_VPORT_H__ */ diff --git a/include/linux/mlx5/flow_table.h b/include/linux/mlx5/flow_table.h new file mode 100644 index 000000000000..5f922c6d4fc2 --- /dev/null +++ b/include/linux/mlx5/flow_table.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_FLOW_TABLE_H +#define MLX5_FLOW_TABLE_H + +#include + +struct mlx5_flow_table_group { + u8 log_sz; + u8 match_criteria_enable; + u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; +}; + +void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type, + u16 num_groups, + struct mlx5_flow_table_group *group); +void mlx5_destroy_flow_table(void *flow_table); +int mlx5_add_flow_table_entry(void *flow_table, u8 match_criteria_enable, + void *match_criteria, void *flow_context, + u32 *flow_index); +void mlx5_del_flow_table_entry(void *flow_table, u32 flow_index); +u32 mlx5_get_flow_table_id(void *flow_table); + +#endif /* MLX5_FLOW_TABLE_H */ -- cgit v1.2.3 From f62b8bb8f2d30582f30f51e85a8c0e1260125d7e Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 28 May 2015 22:28:48 +0300 Subject: net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality This is the Ethernet part of the driver for the Mellanox ConnectX(R)-4 Single/Dual-Port Adapter supporting 100Gb/s with VPI. The driver extends the existing mlx5 driver with Ethernet functionality. This patch contains the driver entry points but does not include transmit and receive (see the previous patch in the series) routines. It also adds the option MLX5_CORE_EN to Kconfig to enable/disable the Ethernet functionality. Currently, Kconfig is programmed to make Ethernet and Infiniband functionality mutally exclusive. Also changed MLX5_INFINIBAND to be depandant on MLX5_CORE instead of selecting it, since MLX5_CORE could be selected without MLX5_INFINIBAND being selected. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/Kconfig | 4 +- drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 14 +- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 3 + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 19 - drivers/net/ethernet/mellanox/mlx5/core/en.h | 520 ++++++ .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 679 +++++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1899 ++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 74 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 9 +- include/linux/mlx5/device.h | 19 + include/linux/mlx5/driver.h | 1 + 11 files changed, 3213 insertions(+), 28 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_main.c (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig index 10df386c6344..bce263b92821 100644 --- a/drivers/infiniband/hw/mlx5/Kconfig +++ b/drivers/infiniband/hw/mlx5/Kconfig @@ -1,8 +1,6 @@ config MLX5_INFINIBAND tristate "Mellanox Connect-IB HCA support" - depends on NETDEVICES && ETHERNET && PCI - select NET_VENDOR_MELLANOX - select MLX5_CORE + depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE ---help--- This driver provides low-level InfiniBand support for Mellanox Connect-IB PCI Express host channel adapters (HCAs). diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 8ff57e8e3e91..0d7aef040fb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -3,6 +3,18 @@ # config MLX5_CORE - tristate + tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver" depends on PCI default n + ---help--- + Core driver for low level functionality of the ConnectX-4 and + Connect-IB cards by Mellanox Technologies. + +config MLX5_CORE_EN + bool "Mellanox Technologies ConnectX-4 Ethernet support" + depends on MLX5_INFINIBAND=n && NETDEVICES && ETHERNET && PCI && MLX5_CORE + default n + ---help--- + Ethernet support in Mellanox Technologies ConnectX-4 NIC. + Ethernet and Infiniband support in ConnectX-4 are currently mutually + exclusive. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 105780bb980b..87e9e606596a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -3,3 +3,6 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o vport.o transobj.o \ + en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ + en_txrx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 2f22cd2de2b0..75ff58dc1ff5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -75,25 +75,6 @@ enum { MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10, }; -enum { - MLX5_CMD_STAT_OK = 0x0, - MLX5_CMD_STAT_INT_ERR = 0x1, - MLX5_CMD_STAT_BAD_OP_ERR = 0x2, - MLX5_CMD_STAT_BAD_PARAM_ERR = 0x3, - MLX5_CMD_STAT_BAD_SYS_STATE_ERR = 0x4, - MLX5_CMD_STAT_BAD_RES_ERR = 0x5, - MLX5_CMD_STAT_RES_BUSY = 0x6, - MLX5_CMD_STAT_LIM_ERR = 0x8, - MLX5_CMD_STAT_BAD_RES_STATE_ERR = 0x9, - MLX5_CMD_STAT_IX_ERR = 0xa, - MLX5_CMD_STAT_NO_RES_ERR = 0xf, - MLX5_CMD_STAT_BAD_INP_LEN_ERR = 0x50, - MLX5_CMD_STAT_BAD_OUTP_LEN_ERR = 0x51, - MLX5_CMD_STAT_BAD_QP_STATE_ERR = 0x10, - MLX5_CMD_STAT_BAD_PKT_ERR = 0x30, - MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40, -}; - static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in, struct mlx5_cmd_msg *out, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h new file mode 100644 index 000000000000..cbb3c7cb53f7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -0,0 +1,520 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "vport.h" +#include "wq.h" +#include "transobj.h" +#include "mlx5_core.h" + +#define MLX5E_MAX_NUM_TC 8 + +#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x7 +#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa +#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd + +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE 0x7 +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd + +#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (16 * 1024) +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 +#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 +#define MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ 0x7 +#define MLX5E_PARAMS_MIN_MTU 46 + +#define MLX5E_TX_CQ_POLL_BUDGET 128 +#define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ + +static const char vport_strings[][ETH_GSTRING_LEN] = { + /* vport statistics */ + "rx_packets", + "rx_bytes", + "tx_packets", + "tx_bytes", + "rx_error_packets", + "rx_error_bytes", + "tx_error_packets", + "tx_error_bytes", + "rx_unicast_packets", + "rx_unicast_bytes", + "tx_unicast_packets", + "tx_unicast_bytes", + "rx_multicast_packets", + "rx_multicast_bytes", + "tx_multicast_packets", + "tx_multicast_bytes", + "rx_broadcast_packets", + "rx_broadcast_bytes", + "tx_broadcast_packets", + "tx_broadcast_bytes", + + /* SW counters */ + "tso_packets", + "tso_bytes", + "lro_packets", + "lro_bytes", + "rx_csum_good", + "rx_csum_none", + "tx_csum_offload", + "tx_queue_stopped", + "tx_queue_wake", + "tx_queue_dropped", + "rx_wqe_err", +}; + +struct mlx5e_vport_stats { + /* HW counters */ + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + u64 rx_error_packets; + u64 rx_error_bytes; + u64 tx_error_packets; + u64 tx_error_bytes; + u64 rx_unicast_packets; + u64 rx_unicast_bytes; + u64 tx_unicast_packets; + u64 tx_unicast_bytes; + u64 rx_multicast_packets; + u64 rx_multicast_bytes; + u64 tx_multicast_packets; + u64 tx_multicast_bytes; + u64 rx_broadcast_packets; + u64 rx_broadcast_bytes; + u64 tx_broadcast_packets; + u64 tx_broadcast_bytes; + + /* SW counters */ + u64 tso_packets; + u64 tso_bytes; + u64 lro_packets; + u64 lro_bytes; + u64 rx_csum_good; + u64 rx_csum_none; + u64 tx_csum_offload; + u64 tx_queue_stopped; + u64 tx_queue_wake; + u64 tx_queue_dropped; + u64 rx_wqe_err; + +#define NUM_VPORT_COUNTERS 31 +}; + +static const char rq_stats_strings[][ETH_GSTRING_LEN] = { + "packets", + "csum_none", + "lro_packets", + "lro_bytes", + "wqe_err" +}; + +struct mlx5e_rq_stats { + u64 packets; + u64 csum_none; + u64 lro_packets; + u64 lro_bytes; + u64 wqe_err; +#define NUM_RQ_STATS 5 +}; + +static const char sq_stats_strings[][ETH_GSTRING_LEN] = { + "packets", + "tso_packets", + "tso_bytes", + "csum_offload_none", + "stopped", + "wake", + "dropped", + "nop" +}; + +struct mlx5e_sq_stats { + u64 packets; + u64 tso_packets; + u64 tso_bytes; + u64 csum_offload_none; + u64 stopped; + u64 wake; + u64 dropped; + u64 nop; +#define NUM_SQ_STATS 8 +}; + +struct mlx5e_stats { + struct mlx5e_vport_stats vport; +}; + +struct mlx5e_params { + u8 log_sq_size; + u8 log_rq_size; + u16 num_channels; + u8 default_vlan_prio; + u8 num_tc; + u16 rx_cq_moderation_usec; + u16 rx_cq_moderation_pkts; + u16 tx_cq_moderation_usec; + u16 tx_cq_moderation_pkts; + u16 min_rx_wqes; + u16 rx_hash_log_tbl_sz; + bool lro_en; + u32 lro_wqe_sz; +}; + +enum { + MLX5E_RQ_STATE_POST_WQES_ENABLE, +}; + +enum cq_flags { + MLX5E_CQ_HAS_CQES = 1, +}; + +struct mlx5e_cq { + /* data path - accessed per cqe */ + struct mlx5_cqwq wq; + void *sqrq; + unsigned long flags; + + /* data path - accessed per napi poll */ + struct napi_struct *napi; + struct mlx5_core_cq mcq; + struct mlx5e_channel *channel; + + /* control */ + struct mlx5_wq_ctrl wq_ctrl; +} ____cacheline_aligned_in_smp; + +struct mlx5e_rq { + /* data path */ + struct mlx5_wq_ll wq; + u32 wqe_sz; + struct sk_buff **skb; + + struct device *pdev; + struct net_device *netdev; + struct mlx5e_rq_stats stats; + struct mlx5e_cq cq; + + unsigned long state; + int ix; + + /* control */ + struct mlx5_wq_ctrl wq_ctrl; + u32 rqn; + struct mlx5e_channel *channel; +} ____cacheline_aligned_in_smp; + +struct mlx5e_tx_skb_cb { + u32 num_bytes; + u8 num_wqebbs; + u8 num_dma; +}; + +#define MLX5E_TX_SKB_CB(__skb) ((struct mlx5e_tx_skb_cb *)__skb->cb) + +struct mlx5e_sq_dma { + dma_addr_t addr; + u32 size; +}; + +enum { + MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, +}; + +struct mlx5e_sq { + /* data path */ + + /* dirtied @completion */ + u16 cc; + u32 dma_fifo_cc; + + /* dirtied @xmit */ + u16 pc ____cacheline_aligned_in_smp; + u32 dma_fifo_pc; + u32 bf_offset; + struct mlx5e_sq_stats stats; + + struct mlx5e_cq cq; + + /* pointers to per packet info: write@xmit, read@completion */ + struct sk_buff **skb; + struct mlx5e_sq_dma *dma_fifo; + + /* read only */ + struct mlx5_wq_cyc wq; + u32 dma_fifo_mask; + void __iomem *uar_map; + struct netdev_queue *txq; + u32 sqn; + u32 bf_buf_size; + struct device *pdev; + __be32 mkey_be; + unsigned long state; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_uar uar; + struct mlx5e_channel *channel; + int tc; +} ____cacheline_aligned_in_smp; + +static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) +{ + return (((sq->wq.sz_m1 & (sq->cc - sq->pc)) >= n) || + (sq->cc == sq->pc)); +} + +enum channel_flags { + MLX5E_CHANNEL_NAPI_SCHED = 1, +}; + +struct mlx5e_channel { + /* data path */ + struct mlx5e_rq rq; + struct mlx5e_sq sq[MLX5E_MAX_NUM_TC]; + struct napi_struct napi; + struct device *pdev; + struct net_device *netdev; + __be32 mkey_be; + u8 num_tc; + unsigned long flags; + + /* control */ + struct mlx5e_priv *priv; + int ix; + int cpu; +}; + +enum mlx5e_traffic_types { + MLX5E_TT_IPV4_TCP = 0, + MLX5E_TT_IPV6_TCP = 1, + MLX5E_TT_IPV4_UDP = 2, + MLX5E_TT_IPV6_UDP = 3, + MLX5E_TT_IPV4 = 4, + MLX5E_TT_IPV6 = 5, + MLX5E_TT_ANY = 6, + MLX5E_NUM_TT = 7, +}; + +enum { + MLX5E_RQT_SPREADING = 0, + MLX5E_RQT_DEFAULT_RQ = 1, + MLX5E_NUM_RQT = 2, +}; + +struct mlx5e_eth_addr_info { + u8 addr[ETH_ALEN + 2]; + u32 tt_vec; + u32 ft_ix[MLX5E_NUM_TT]; /* flow table index per traffic type */ +}; + +#define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE) + +struct mlx5e_eth_addr_db { + struct hlist_head netdev_uc[MLX5E_ETH_ADDR_HASH_SIZE]; + struct hlist_head netdev_mc[MLX5E_ETH_ADDR_HASH_SIZE]; + struct mlx5e_eth_addr_info broadcast; + struct mlx5e_eth_addr_info allmulti; + struct mlx5e_eth_addr_info promisc; + bool broadcast_enabled; + bool allmulti_enabled; + bool promisc_enabled; +}; + +enum { + MLX5E_STATE_ASYNC_EVENTS_ENABLE, + MLX5E_STATE_OPENED, +}; + +struct mlx5e_vlan_db { + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + u32 active_vlans_ft_ix[VLAN_N_VID]; + u32 untagged_rule_ft_ix; + u32 any_vlan_rule_ft_ix; + bool filter_disabled; +}; + +struct mlx5e_flow_table { + void *vlan; + void *main; +}; + +struct mlx5e_priv { + /* priv data path fields - start */ + int order_base_2_num_channels; + int queue_mapping_channel_mask; + int num_tc; + int default_vlan_prio; + /* priv data path fields - end */ + + unsigned long state; + struct mutex state_lock; /* Protects Interface state */ + struct mlx5_uar cq_uar; + u32 pdn; + struct mlx5_core_mr mr; + + struct mlx5e_channel **channel; + u32 tisn[MLX5E_MAX_NUM_TC]; + u32 rqtn; + u32 tirn[MLX5E_NUM_TT]; + + struct mlx5e_flow_table ft; + struct mlx5e_eth_addr_db eth_addr; + struct mlx5e_vlan_db vlan; + + struct mlx5e_params params; + spinlock_t async_events_spinlock; /* sync hw events */ + struct work_struct update_carrier_work; + struct work_struct set_rx_mode_work; + struct delayed_work update_stats_work; + + struct mlx5_core_dev *mdev; + struct net_device *netdev; + struct mlx5e_stats stats; +}; + +#define MLX5E_NET_IP_ALIGN 2 + +struct mlx5e_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_eth_seg eth; +}; + +struct mlx5e_rx_wqe { + struct mlx5_wqe_srq_next_seg next; + struct mlx5_wqe_data_seg data; +}; + +enum mlx5e_link_mode { + MLX5E_1000BASE_CX_SGMII = 0, + MLX5E_1000BASE_KX = 1, + MLX5E_10GBASE_CX4 = 2, + MLX5E_10GBASE_KX4 = 3, + MLX5E_10GBASE_KR = 4, + MLX5E_20GBASE_KR2 = 5, + MLX5E_40GBASE_CR4 = 6, + MLX5E_40GBASE_KR4 = 7, + MLX5E_56GBASE_R4 = 8, + MLX5E_10GBASE_CR = 12, + MLX5E_10GBASE_SR = 13, + MLX5E_10GBASE_ER = 14, + MLX5E_40GBASE_SR4 = 15, + MLX5E_40GBASE_LR4 = 16, + MLX5E_100GBASE_CR4 = 20, + MLX5E_100GBASE_SR4 = 21, + MLX5E_100GBASE_KR4 = 22, + MLX5E_100GBASE_LR4 = 23, + MLX5E_100BASE_TX = 24, + MLX5E_100BASE_T = 25, + MLX5E_10GBASE_T = 26, + MLX5E_25GBASE_CR = 27, + MLX5E_25GBASE_KR = 28, + MLX5E_25GBASE_SR = 29, + MLX5E_50GBASE_CR2 = 30, + MLX5E_50GBASE_KR2 = 31, + MLX5E_LINK_MODES_NUMBER, +}; + +#define MLX5E_PROT_MASK(link_mode) (1 << link_mode) + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback); +netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t mlx5e_xmit_multi_tc(struct sk_buff *skb, struct net_device *dev); + +void mlx5e_completion_event(struct mlx5_core_cq *mcq); +void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); +int mlx5e_napi_poll(struct napi_struct *napi, int budget); +bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq); +bool mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); +bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); +struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); + +void mlx5e_update_stats(struct mlx5e_priv *priv); + +int mlx5e_open_flow_table(struct mlx5e_priv *priv); +void mlx5e_close_flow_table(struct mlx5e_priv *priv); +void mlx5e_init_eth_addr(struct mlx5e_priv *priv); +void mlx5e_set_rx_mode_core(struct mlx5e_priv *priv); +void mlx5e_set_rx_mode_work(struct work_struct *work); + +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid); +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid); +void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); +void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); +int mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv); +void mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv); + +int mlx5e_open_locked(struct net_device *netdev); +int mlx5e_close_locked(struct net_device *netdev); +int mlx5e_update_priv_params(struct mlx5e_priv *priv, + struct mlx5e_params *new_params); + +static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, + struct mlx5e_tx_wqe *wqe) +{ + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + + *sq->wq.db = cpu_to_be32(sq->pc); + + /* ensure doorbell record is visible to device before ringing the + * doorbell + */ + wmb(); + + mlx5_write64((__be32 *)&wqe->ctrl, + sq->uar_map + MLX5_BF_OFFSET + sq->bf_offset, + NULL); + + sq->bf_offset ^= sq->bf_buf_size; +} + +static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) +{ + struct mlx5_core_cq *mcq; + + mcq = &cq->mcq; + mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, NULL, cq->wq.cc); +} + +extern const struct ethtool_ops mlx5e_ethtool_ops; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c new file mode 100644 index 000000000000..de7aec8abca1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -0,0 +1,679 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" + +static void mlx5e_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + strlcpy(drvinfo->driver, DRIVER_NAME, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, DRIVER_VERSION " (" DRIVER_RELDATE ")", + sizeof(drvinfo->version)); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%d", + fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev)); + strlcpy(drvinfo->bus_info, pci_name(mdev->pdev), + sizeof(drvinfo->bus_info)); +} + +static const struct { + u32 supported; + u32 advertised; + u32 speed; +} ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER] = { + [MLX5E_1000BASE_CX_SGMII] = { + .supported = SUPPORTED_1000baseKX_Full, + .advertised = ADVERTISED_1000baseKX_Full, + .speed = 1000, + }, + [MLX5E_1000BASE_KX] = { + .supported = SUPPORTED_1000baseKX_Full, + .advertised = ADVERTISED_1000baseKX_Full, + .speed = 1000, + }, + [MLX5E_10GBASE_CX4] = { + .supported = SUPPORTED_10000baseKX4_Full, + .advertised = ADVERTISED_10000baseKX4_Full, + .speed = 10000, + }, + [MLX5E_10GBASE_KX4] = { + .supported = SUPPORTED_10000baseKX4_Full, + .advertised = ADVERTISED_10000baseKX4_Full, + .speed = 10000, + }, + [MLX5E_10GBASE_KR] = { + .supported = SUPPORTED_10000baseKR_Full, + .advertised = ADVERTISED_10000baseKR_Full, + .speed = 10000, + }, + [MLX5E_20GBASE_KR2] = { + .supported = SUPPORTED_20000baseKR2_Full, + .advertised = ADVERTISED_20000baseKR2_Full, + .speed = 20000, + }, + [MLX5E_40GBASE_CR4] = { + .supported = SUPPORTED_40000baseCR4_Full, + .advertised = ADVERTISED_40000baseCR4_Full, + .speed = 40000, + }, + [MLX5E_40GBASE_KR4] = { + .supported = SUPPORTED_40000baseKR4_Full, + .advertised = ADVERTISED_40000baseKR4_Full, + .speed = 40000, + }, + [MLX5E_56GBASE_R4] = { + .supported = SUPPORTED_56000baseKR4_Full, + .advertised = ADVERTISED_56000baseKR4_Full, + .speed = 56000, + }, + [MLX5E_10GBASE_CR] = { + .supported = SUPPORTED_10000baseKR_Full, + .advertised = ADVERTISED_10000baseKR_Full, + .speed = 10000, + }, + [MLX5E_10GBASE_SR] = { + .supported = SUPPORTED_10000baseKR_Full, + .advertised = ADVERTISED_10000baseKR_Full, + .speed = 10000, + }, + [MLX5E_10GBASE_ER] = { + .supported = SUPPORTED_10000baseKR_Full, + .advertised = ADVERTISED_10000baseKR_Full, + .speed = 10000, + }, + [MLX5E_40GBASE_SR4] = { + .supported = SUPPORTED_40000baseSR4_Full, + .advertised = ADVERTISED_40000baseSR4_Full, + .speed = 40000, + }, + [MLX5E_40GBASE_LR4] = { + .supported = SUPPORTED_40000baseLR4_Full, + .advertised = ADVERTISED_40000baseLR4_Full, + .speed = 40000, + }, + [MLX5E_100GBASE_CR4] = { + .speed = 100000, + }, + [MLX5E_100GBASE_SR4] = { + .speed = 100000, + }, + [MLX5E_100GBASE_KR4] = { + .speed = 100000, + }, + [MLX5E_100GBASE_LR4] = { + .speed = 100000, + }, + [MLX5E_100BASE_TX] = { + .speed = 100, + }, + [MLX5E_100BASE_T] = { + .supported = SUPPORTED_100baseT_Full, + .advertised = ADVERTISED_100baseT_Full, + .speed = 100, + }, + [MLX5E_10GBASE_T] = { + .supported = SUPPORTED_10000baseT_Full, + .advertised = ADVERTISED_10000baseT_Full, + .speed = 1000, + }, + [MLX5E_25GBASE_CR] = { + .speed = 25000, + }, + [MLX5E_25GBASE_KR] = { + .speed = 25000, + }, + [MLX5E_25GBASE_SR] = { + .speed = 25000, + }, + [MLX5E_50GBASE_CR2] = { + .speed = 50000, + }, + [MLX5E_50GBASE_KR2] = { + .speed = 50000, + }, +}; + +static int mlx5e_get_sset_count(struct net_device *dev, int sset) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (sset) { + case ETH_SS_STATS: + return NUM_VPORT_COUNTERS + + priv->params.num_channels * NUM_RQ_STATS + + priv->params.num_channels * priv->num_tc * + NUM_SQ_STATS; + /* fallthrough */ + default: + return -EOPNOTSUPP; + } +} + +static void mlx5e_get_strings(struct net_device *dev, + uint32_t stringset, uint8_t *data) +{ + int i, j, tc, idx = 0; + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (stringset) { + case ETH_SS_PRIV_FLAGS: + break; + + case ETH_SS_TEST: + break; + + case ETH_SS_STATS: + /* VPORT counters */ + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vport_strings[i]); + + /* per channel counters */ + for (i = 0; i < priv->params.num_channels; i++) + for (j = 0; j < NUM_RQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + "rx%d_%s", i, rq_stats_strings[j]); + + for (i = 0; i < priv->params.num_channels; i++) + for (tc = 0; tc < priv->num_tc; tc++) + for (j = 0; j < NUM_SQ_STATS; j++) + sprintf(data + + (idx++) * ETH_GSTRING_LEN, + "tx%d_%d_%s", i, tc, + sq_stats_strings[j]); + break; + } +} + +static void mlx5e_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int i, j, tc, idx = 0; + + if (!data) + return; + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_update_stats(priv); + mutex_unlock(&priv->state_lock); + + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + data[idx++] = ((u64 *)&priv->stats.vport)[i]; + + /* per channel counters */ + for (i = 0; i < priv->params.num_channels; i++) + for (j = 0; j < NUM_RQ_STATS; j++) + data[idx++] = !test_bit(MLX5E_STATE_OPENED, + &priv->state) ? 0 : + ((u64 *)&priv->channel[i]->rq.stats)[j]; + + for (i = 0; i < priv->params.num_channels; i++) + for (tc = 0; tc < priv->num_tc; tc++) + for (j = 0; j < NUM_SQ_STATS; j++) + data[idx++] = !test_bit(MLX5E_STATE_OPENED, + &priv->state) ? 0 : + ((u64 *)&priv->channel[i]->sq[tc].stats)[j]; +} + +static void mlx5e_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; + param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; + param->rx_pending = 1 << priv->params.log_rq_size; + param->tx_pending = 1 << priv->params.log_sq_size; +} + +static int mlx5e_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params new_params; + u16 min_rx_wqes; + u8 log_rq_size; + u8 log_sq_size; + int err = 0; + + if (param->rx_jumbo_pending) { + netdev_info(dev, "%s: rx_jumbo_pending not supported\n", + __func__); + return -EINVAL; + } + if (param->rx_mini_pending) { + netdev_info(dev, "%s: rx_mini_pending not supported\n", + __func__); + return -EINVAL; + } + if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) { + netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n", + __func__, param->rx_pending, + 1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE); + return -EINVAL; + } + if (param->rx_pending > (1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE)) { + netdev_info(dev, "%s: rx_pending (%d) > max (%d)\n", + __func__, param->rx_pending, + 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE); + return -EINVAL; + } + if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { + netdev_info(dev, "%s: tx_pending (%d) < min (%d)\n", + __func__, param->tx_pending, + 1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); + return -EINVAL; + } + if (param->tx_pending > (1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE)) { + netdev_info(dev, "%s: tx_pending (%d) > max (%d)\n", + __func__, param->tx_pending, + 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE); + return -EINVAL; + } + + log_rq_size = order_base_2(param->rx_pending); + log_sq_size = order_base_2(param->tx_pending); + min_rx_wqes = min_t(u16, param->rx_pending - 1, + MLX5E_PARAMS_DEFAULT_MIN_RX_WQES); + + if (log_rq_size == priv->params.log_rq_size && + log_sq_size == priv->params.log_sq_size && + min_rx_wqes == priv->params.min_rx_wqes) + return 0; + + mutex_lock(&priv->state_lock); + new_params = priv->params; + new_params.log_rq_size = log_rq_size; + new_params.log_sq_size = log_sq_size; + new_params.min_rx_wqes = min_rx_wqes; + err = mlx5e_update_priv_params(priv, &new_params); + mutex_unlock(&priv->state_lock); + + return err; +} + +static void mlx5e_get_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int ncv = priv->mdev->priv.eq_table.num_comp_vectors; + + ch->max_combined = ncv; + ch->combined_count = priv->params.num_channels; +} + +static int mlx5e_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int ncv = priv->mdev->priv.eq_table.num_comp_vectors; + unsigned int count = ch->combined_count; + struct mlx5e_params new_params; + int err = 0; + + if (!count) { + netdev_info(dev, "%s: combined_count=0 not supported\n", + __func__); + return -EINVAL; + } + if (ch->rx_count || ch->tx_count) { + netdev_info(dev, "%s: separate rx/tx count not supported\n", + __func__); + return -EINVAL; + } + if (count > ncv) { + netdev_info(dev, "%s: count (%d) > max (%d)\n", + __func__, count, ncv); + return -EINVAL; + } + + if (priv->params.num_channels == count) + return 0; + + mutex_lock(&priv->state_lock); + new_params = priv->params; + new_params.num_channels = count; + err = mlx5e_update_priv_params(priv, &new_params); + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + coal->rx_coalesce_usecs = priv->params.rx_cq_moderation_usec; + coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation_pkts; + coal->tx_coalesce_usecs = priv->params.tx_cq_moderation_usec; + coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation_pkts; + + return 0; +} + +static int mlx5e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channel *c; + int tc; + int i; + + priv->params.tx_cq_moderation_usec = coal->tx_coalesce_usecs; + priv->params.tx_cq_moderation_pkts = coal->tx_max_coalesced_frames; + priv->params.rx_cq_moderation_usec = coal->rx_coalesce_usecs; + priv->params.rx_cq_moderation_pkts = coal->rx_max_coalesced_frames; + + for (i = 0; i < priv->params.num_channels; ++i) { + c = priv->channel[i]; + + for (tc = 0; tc < c->num_tc; tc++) { + mlx5_core_modify_cq_moderation(mdev, + &c->sq[tc].cq.mcq, + coal->tx_coalesce_usecs, + coal->tx_max_coalesced_frames); + } + + mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq, + coal->rx_coalesce_usecs, + coal->rx_max_coalesced_frames); + } + + return 0; +} + +static u32 ptys2ethtool_supported_link(u32 eth_proto_cap) +{ + int i; + u32 supported_modes = 0; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (eth_proto_cap & MLX5E_PROT_MASK(i)) + supported_modes |= ptys2ethtool_table[i].supported; + } + return supported_modes; +} + +static u32 ptys2ethtool_adver_link(u32 eth_proto_cap) +{ + int i; + u32 advertising_modes = 0; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (eth_proto_cap & MLX5E_PROT_MASK(i)) + advertising_modes |= ptys2ethtool_table[i].advertised; + } + return advertising_modes; +} + +static u32 ptys2ethtool_supported_port(u32 eth_proto_cap) +{ + if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR) + | MLX5E_PROT_MASK(MLX5E_10GBASE_SR) + | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) + | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { + return SUPPORTED_FIBRE; + } + + if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_100GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_KX))) { + return SUPPORTED_Backplane; + } + return 0; +} + +static void get_speed_duplex(struct net_device *netdev, + u32 eth_proto_oper, + struct ethtool_cmd *cmd) +{ + int i; + u32 speed = SPEED_UNKNOWN; + u8 duplex = DUPLEX_UNKNOWN; + + if (!netif_carrier_ok(netdev)) + goto out; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (eth_proto_oper & MLX5E_PROT_MASK(i)) { + speed = ptys2ethtool_table[i].speed; + duplex = DUPLEX_FULL; + break; + } + } +out: + ethtool_cmd_speed_set(cmd, speed); + cmd->duplex = duplex; +} + +static void get_supported(u32 eth_proto_cap, u32 *supported) +{ + *supported |= ptys2ethtool_supported_port(eth_proto_cap); + *supported |= ptys2ethtool_supported_link(eth_proto_cap); + *supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; +} + +static void get_advertising(u32 eth_proto_cap, u8 tx_pause, + u8 rx_pause, u32 *advertising) +{ + *advertising |= ptys2ethtool_adver_link(eth_proto_cap); + *advertising |= tx_pause ? ADVERTISED_Pause : 0; + *advertising |= (tx_pause ^ rx_pause) ? ADVERTISED_Asym_Pause : 0; +} + +static u8 get_connector_port(u32 eth_proto) +{ + if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_SR) + | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { + return PORT_FIBRE; + } + + if (eth_proto & (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) + | MLX5E_PROT_MASK(MLX5E_10GBASE_CR) + | MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) { + return PORT_DA; + } + + if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) + | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) { + return PORT_NONE; + } + + return PORT_OTHER; +} + +static void get_lp_advertising(u32 eth_proto_lp, u32 *lp_advertising) +{ + *lp_advertising = ptys2ethtool_adver_link(eth_proto_lp); +} + +static int mlx5e_get_settings(struct net_device *netdev, + struct ethtool_cmd *cmd) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 eth_proto_cap; + u32 eth_proto_admin; + u32 eth_proto_lp; + u32 eth_proto_oper; + int err; + + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN); + + if (err) { + netdev_err(netdev, "%s: query port ptys failed: %d\n", + __func__, err); + goto err_query_ptys; + } + + eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); + eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); + eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); + + cmd->supported = 0; + cmd->advertising = 0; + + get_supported(eth_proto_cap, &cmd->supported); + get_advertising(eth_proto_admin, 0, 0, &cmd->advertising); + get_speed_duplex(netdev, eth_proto_oper, cmd); + + eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; + + cmd->port = get_connector_port(eth_proto_oper); + get_lp_advertising(eth_proto_lp, &cmd->lp_advertising); + + cmd->transceiver = XCVR_INTERNAL; + +err_query_ptys: + return err; +} + +static u32 mlx5e_ethtool2ptys_adver_link(u32 link_modes) +{ + u32 i, ptys_modes = 0; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (ptys2ethtool_table[i].advertised & link_modes) + ptys_modes |= MLX5E_PROT_MASK(i); + } + + return ptys_modes; +} + +static u32 mlx5e_ethtool2ptys_speed_link(u32 speed) +{ + u32 i, speed_links = 0; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (ptys2ethtool_table[i].speed == speed) + speed_links |= MLX5E_PROT_MASK(i); + } + + return speed_links; +} + +static int mlx5e_set_settings(struct net_device *netdev, + struct ethtool_cmd *cmd) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 link_modes; + u32 speed; + u32 eth_proto_cap, eth_proto_admin; + u8 port_status; + int err; + + speed = ethtool_cmd_speed(cmd); + + link_modes = cmd->autoneg == AUTONEG_ENABLE ? + mlx5e_ethtool2ptys_adver_link(cmd->advertising) : + mlx5e_ethtool2ptys_speed_link(speed); + + err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); + if (err) { + netdev_err(netdev, "%s: query port eth proto cap failed: %d\n", + __func__, err); + goto out; + } + + link_modes = link_modes & eth_proto_cap; + if (!link_modes) { + netdev_err(netdev, "%s: Not supported link mode(s) requested", + __func__); + err = -EINVAL; + goto out; + } + + err = mlx5_query_port_proto_admin(mdev, ð_proto_admin, MLX5_PTYS_EN); + if (err) { + netdev_err(netdev, "%s: query port eth proto admin failed: %d\n", + __func__, err); + goto out; + } + + if (link_modes == eth_proto_admin) + goto out; + + err = mlx5_set_port_proto(mdev, link_modes, MLX5_PTYS_EN); + if (err) { + netdev_err(netdev, "%s: set port eth proto admin failed: %d\n", + __func__, err); + goto out; + } + + err = mlx5_query_port_status(mdev, &port_status); + if (err) + goto out; + + if (port_status == MLX5_PORT_DOWN) + return 0; + + err = mlx5_set_port_status(mdev, MLX5_PORT_DOWN); + if (err) + goto out; + err = mlx5_set_port_status(mdev, MLX5_PORT_UP); +out: + return err; +} + +const struct ethtool_ops mlx5e_ethtool_ops = { + .get_drvinfo = mlx5e_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = mlx5e_get_strings, + .get_sset_count = mlx5e_get_sset_count, + .get_ethtool_stats = mlx5e_get_ethtool_stats, + .get_ringparam = mlx5e_get_ringparam, + .set_ringparam = mlx5e_set_ringparam, + .get_channels = mlx5e_get_channels, + .set_channels = mlx5e_set_channels, + .get_coalesce = mlx5e_get_coalesce, + .set_coalesce = mlx5e_set_coalesce, + .get_settings = mlx5e_get_settings, + .set_settings = mlx5e_set_settings, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c new file mode 100644 index 000000000000..eee829d119f9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -0,0 +1,1899 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "en.h" + +struct mlx5e_rq_param { + u32 rqc[MLX5_ST_SZ_DW(rqc)]; + struct mlx5_wq_param wq; +}; + +struct mlx5e_sq_param { + u32 sqc[MLX5_ST_SZ_DW(sqc)]; + struct mlx5_wq_param wq; +}; + +struct mlx5e_cq_param { + u32 cqc[MLX5_ST_SZ_DW(cqc)]; + struct mlx5_wq_param wq; + u16 eq_ix; +}; + +struct mlx5e_channel_param { + struct mlx5e_rq_param rq; + struct mlx5e_sq_param sq; + struct mlx5e_cq_param rx_cq; + struct mlx5e_cq_param tx_cq; +}; + +static void mlx5e_update_carrier(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 port_state; + + port_state = mlx5_query_vport_state(mdev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT); + + if (port_state == VPORT_STATE_UP) + netif_carrier_on(priv->netdev); + else + netif_carrier_off(priv->netdev); +} + +static void mlx5e_update_carrier_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + update_carrier_work); + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_update_carrier(priv); + mutex_unlock(&priv->state_lock); +} + +void mlx5e_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_vport_stats *s = &priv->stats.vport; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_sq_stats *sq_stats; + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u64 tx_offload_none; + int i, j; + + out = mlx5_vzalloc(outlen); + if (!out) + return; + + /* Collect firts the SW counters and then HW for consistency */ + s->tso_packets = 0; + s->tso_bytes = 0; + s->tx_queue_stopped = 0; + s->tx_queue_wake = 0; + s->tx_queue_dropped = 0; + tx_offload_none = 0; + s->lro_packets = 0; + s->lro_bytes = 0; + s->rx_csum_none = 0; + s->rx_wqe_err = 0; + for (i = 0; i < priv->params.num_channels; i++) { + rq_stats = &priv->channel[i]->rq.stats; + + s->lro_packets += rq_stats->lro_packets; + s->lro_bytes += rq_stats->lro_bytes; + s->rx_csum_none += rq_stats->csum_none; + s->rx_wqe_err += rq_stats->wqe_err; + + for (j = 0; j < priv->num_tc; j++) { + sq_stats = &priv->channel[i]->sq[j].stats; + + s->tso_packets += sq_stats->tso_packets; + s->tso_bytes += sq_stats->tso_bytes; + s->tx_queue_stopped += sq_stats->stopped; + s->tx_queue_wake += sq_stats->wake; + s->tx_queue_dropped += sq_stats->dropped; + tx_offload_none += sq_stats->csum_offload_none; + } + } + + /* HW counters */ + memset(in, 0, sizeof(in)); + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + MLX5_SET(query_vport_counter_in, in, op_mod, 0); + MLX5_SET(query_vport_counter_in, in, other_vport, 0); + + memset(out, 0, outlen); + + if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen)) + goto free_out; + +#define MLX5_GET_CTR(p, x) \ + MLX5_GET64(query_vport_counter_out, p, x) + + s->rx_error_packets = + MLX5_GET_CTR(out, received_errors.packets); + s->rx_error_bytes = + MLX5_GET_CTR(out, received_errors.octets); + s->tx_error_packets = + MLX5_GET_CTR(out, transmit_errors.packets); + s->tx_error_bytes = + MLX5_GET_CTR(out, transmit_errors.octets); + + s->rx_unicast_packets = + MLX5_GET_CTR(out, received_eth_unicast.packets); + s->rx_unicast_bytes = + MLX5_GET_CTR(out, received_eth_unicast.octets); + s->tx_unicast_packets = + MLX5_GET_CTR(out, transmitted_eth_unicast.packets); + s->tx_unicast_bytes = + MLX5_GET_CTR(out, transmitted_eth_unicast.octets); + + s->rx_multicast_packets = + MLX5_GET_CTR(out, received_eth_multicast.packets); + s->rx_multicast_bytes = + MLX5_GET_CTR(out, received_eth_multicast.octets); + s->tx_multicast_packets = + MLX5_GET_CTR(out, transmitted_eth_multicast.packets); + s->tx_multicast_bytes = + MLX5_GET_CTR(out, transmitted_eth_multicast.octets); + + s->rx_broadcast_packets = + MLX5_GET_CTR(out, received_eth_broadcast.packets); + s->rx_broadcast_bytes = + MLX5_GET_CTR(out, received_eth_broadcast.octets); + s->tx_broadcast_packets = + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); + s->tx_broadcast_bytes = + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + + s->rx_packets = + s->rx_unicast_packets + + s->rx_multicast_packets + + s->rx_broadcast_packets; + s->rx_bytes = + s->rx_unicast_bytes + + s->rx_multicast_bytes + + s->rx_broadcast_bytes; + s->tx_packets = + s->tx_unicast_packets + + s->tx_multicast_packets + + s->tx_broadcast_packets; + s->tx_bytes = + s->tx_unicast_bytes + + s->tx_multicast_bytes + + s->tx_broadcast_bytes; + + /* Update calculated offload counters */ + s->tx_csum_offload = s->tx_packets - tx_offload_none; + s->rx_csum_good = s->rx_packets - s->rx_csum_none; + +free_out: + kvfree(out); +} + +static void mlx5e_update_stats_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlx5e_priv *priv = container_of(dwork, struct mlx5e_priv, + update_stats_work); + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mlx5e_update_stats(priv); + schedule_delayed_work(dwork, + msecs_to_jiffies( + MLX5E_UPDATE_STATS_INTERVAL)); + } + mutex_unlock(&priv->state_lock); +} + +static void __mlx5e_async_event(struct mlx5e_priv *priv, + enum mlx5_dev_event event) +{ + switch (event) { + case MLX5_DEV_EVENT_PORT_UP: + case MLX5_DEV_EVENT_PORT_DOWN: + schedule_work(&priv->update_carrier_work); + break; + + default: + break; + } +} + +static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, + enum mlx5_dev_event event, unsigned long param) +{ + struct mlx5e_priv *priv = vpriv; + + spin_lock(&priv->async_events_spinlock); + if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state)) + __mlx5e_async_event(priv, event); + spin_unlock(&priv->async_events_spinlock); +} + +static void mlx5e_enable_async_events(struct mlx5e_priv *priv) +{ + set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); +} + +static void mlx5e_disable_async_events(struct mlx5e_priv *priv) +{ + spin_lock_irq(&priv->async_events_spinlock); + clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); + spin_unlock_irq(&priv->async_events_spinlock); +} + +static void mlx5e_send_nop(struct mlx5e_sq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + + u16 pi = sq->pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | 0x01); + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + + sq->skb[pi] = NULL; + sq->pc++; + mlx5e_tx_notify_hw(sq, wqe); +} + +static int mlx5e_create_rq(struct mlx5e_channel *c, + struct mlx5e_rq_param *param, + struct mlx5e_rq *rq) +{ + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + void *rqc = param->rqc; + void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + int wq_sz; + int err; + int i; + + err = mlx5_wq_ll_create(mdev, ¶m->wq, rqc_wq, &rq->wq, + &rq->wq_ctrl); + if (err) + return err; + + rq->wq.db = &rq->wq.db[MLX5_RCV_DBR]; + + wq_sz = mlx5_wq_ll_get_size(&rq->wq); + rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, + cpu_to_node(c->cpu)); + if (!rq->skb) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } + + rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : + priv->netdev->mtu + ETH_HLEN + VLAN_HLEN; + + for (i = 0; i < wq_sz; i++) { + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); + + wqe->data.lkey = c->mkey_be; + wqe->data.byte_count = cpu_to_be32(rq->wqe_sz); + } + + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->channel = c; + rq->ix = c->ix; + + return 0; + +err_rq_wq_destroy: + mlx5_wq_destroy(&rq->wq_ctrl); + + return err; +} + +static void mlx5e_destroy_rq(struct mlx5e_rq *rq) +{ + kfree(rq->skb); + mlx5_wq_destroy(&rq->wq_ctrl); +} + +static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *rqc; + void *wq; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + + sizeof(u64) * rq->wq_ctrl.buf.npages; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + wq = MLX5_ADDR_OF(rqc, rqc, wq); + + memcpy(rqc, param->rqc, sizeof(param->rqc)); + + MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, flush_in_error_en, 1); + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - + PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); + + mlx5_fill_page_array(&rq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_create_rq(mdev, in, inlen, &rq->rqn); + + kvfree(in); + + return err; +} + +static int mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, curr_state); + MLX5_SET(rqc, rqc, state, next_state); + + err = mlx5_modify_rq(mdev, rq->rqn, in, inlen); + + kvfree(in); + + return err; +} + +static void mlx5e_disable_rq(struct mlx5e_rq *rq) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + mlx5_destroy_rq(mdev, rq->rqn); +} + +static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_wq_ll *wq = &rq->wq; + int i; + + for (i = 0; i < 1000; i++) { + if (wq->cur_sz >= priv->params.min_rx_wqes) + return 0; + + msleep(20); + } + + return -ETIMEDOUT; +} + +static int mlx5e_open_rq(struct mlx5e_channel *c, + struct mlx5e_rq_param *param, + struct mlx5e_rq *rq) +{ + int err; + + err = mlx5e_create_rq(c, param, rq); + if (err) + return err; + + err = mlx5e_enable_rq(rq, param); + if (err) + goto err_destroy_rq; + + err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) + goto err_disable_rq; + + set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); + mlx5e_send_nop(&c->sq[0]); /* trigger mlx5e_post_rx_wqes() */ + + return 0; + +err_disable_rq: + mlx5e_disable_rq(rq); +err_destroy_rq: + mlx5e_destroy_rq(rq); + + return err; +} + +static void mlx5e_close_rq(struct mlx5e_rq *rq) +{ + clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); + napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ + + mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); + while (!mlx5_wq_ll_is_empty(&rq->wq)) + msleep(20); + + /* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */ + napi_synchronize(&rq->channel->napi); + + mlx5e_disable_rq(rq); + mlx5e_destroy_rq(rq); +} + +static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +{ + kfree(sq->dma_fifo); + kfree(sq->skb); +} + +static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + + sq->skb = kzalloc_node(wq_sz * sizeof(*sq->skb), GFP_KERNEL, numa); + sq->dma_fifo = kzalloc_node(df_sz * sizeof(*sq->dma_fifo), GFP_KERNEL, + numa); + + if (!sq->skb || !sq->dma_fifo) { + mlx5e_free_sq_db(sq); + return -ENOMEM; + } + + sq->dma_fifo_mask = df_sz - 1; + + return 0; +} + +static int mlx5e_create_sq(struct mlx5e_channel *c, + int tc, + struct mlx5e_sq_param *param, + struct mlx5e_sq *sq) +{ + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *sqc = param->sqc; + void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); + int err; + + err = mlx5_alloc_map_uar(mdev, &sq->uar); + if (err) + return err; + + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, + &sq->wq_ctrl); + if (err) + goto err_unmap_free_uar; + + sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; + sq->uar_map = sq->uar.map; + sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; + + if (mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu))) + goto err_sq_wq_destroy; + + sq->txq = netdev_get_tx_queue(priv->netdev, + c->ix + tc * priv->params.num_channels); + + sq->pdev = c->pdev; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->tc = tc; + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + +err_unmap_free_uar: + mlx5_unmap_free_uar(mdev, &sq->uar); + + return err; +} + +static void mlx5e_destroy_sq(struct mlx5e_sq *sq) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5e_priv *priv = c->priv; + + mlx5e_free_sq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); + mlx5_unmap_free_uar(priv->mdev, &sq->uar); +} + +static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *sqc; + void *wq; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * sq->wq_ctrl.buf.npages; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + + memcpy(sqc, param->sqc, sizeof(param->sqc)); + + MLX5_SET(sqc, sqc, user_index, sq->tc); + MLX5_SET(sqc, sqc, tis_num_0, priv->tisn[sq->tc]); + MLX5_SET(sqc, sqc, cqn, c->sq[sq->tc].cq.mcq.cqn); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, tis_lst_sz, 1); + MLX5_SET(sqc, sqc, flush_in_error_en, 1); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, uar_page, sq->uar.index); + MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - + PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); + + mlx5_fill_page_array(&sq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_create_sq(mdev, in, inlen, &sq->sqn); + + kvfree(in); + + return err; +} + +static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *sqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_sq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + + MLX5_SET(modify_sq_in, in, sq_state, curr_state); + MLX5_SET(sqc, sqc, state, next_state); + + err = mlx5_modify_sq(mdev, sq->sqn, in, inlen); + + kvfree(in); + + return err; +} + +static void mlx5e_disable_sq(struct mlx5e_sq *sq) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + mlx5_destroy_sq(mdev, sq->sqn); +} + +static int mlx5e_open_sq(struct mlx5e_channel *c, + int tc, + struct mlx5e_sq_param *param, + struct mlx5e_sq *sq) +{ + int err; + + err = mlx5e_create_sq(c, tc, param, sq); + if (err) + return err; + + err = mlx5e_enable_sq(sq, param); + if (err) + goto err_destroy_sq; + + err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); + if (err) + goto err_disable_sq; + + set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); + netdev_tx_reset_queue(sq->txq); + netif_tx_start_queue(sq->txq); + + return 0; + +err_disable_sq: + mlx5e_disable_sq(sq); +err_destroy_sq: + mlx5e_destroy_sq(sq); + + return err; +} + +static inline void netif_tx_disable_queue(struct netdev_queue *txq) +{ + __netif_tx_lock_bh(txq); + netif_tx_stop_queue(txq); + __netif_tx_unlock_bh(txq); +} + +static void mlx5e_close_sq(struct mlx5e_sq *sq) +{ + clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); + napi_synchronize(&sq->channel->napi); /* prevent netif_tx_wake_queue */ + netif_tx_disable_queue(sq->txq); + + /* ensure hw is notified of all pending wqes */ + if (mlx5e_sq_has_room_for(sq, 1)) + mlx5e_send_nop(sq); + + mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); + while (sq->cc != sq->pc) /* wait till sq is empty */ + msleep(20); + + /* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */ + napi_synchronize(&sq->channel->napi); + + mlx5e_disable_sq(sq); + mlx5e_destroy_sq(sq); +} + +static int mlx5e_create_cq(struct mlx5e_channel *c, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq) +{ + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + int eqn_not_used; + int irqn; + int err; + u32 i; + + param->wq.numa = cpu_to_node(c->cpu); + param->eq_ix = c->ix; + + err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, + &cq->wq_ctrl); + if (err) + return err; + + mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); + + cq->napi = &c->napi; + + mcq->cqe_sz = 64; + mcq->set_ci_db = cq->wq_ctrl.db.db; + mcq->arm_db = cq->wq_ctrl.db.db + 1; + *mcq->set_ci_db = 0; + *mcq->arm_db = 0; + mcq->vector = param->eq_ix; + mcq->comp = mlx5e_completion_event; + mcq->event = mlx5e_cq_error_event; + mcq->irqn = irqn; + mcq->uar = &priv->cq_uar; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + + cqe->op_own = 0xf1; + } + + cq->channel = c; + + return 0; +} + +static void mlx5e_destroy_cq(struct mlx5e_cq *cq) +{ + mlx5_wq_destroy(&cq->wq_ctrl); +} + +static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) +{ + struct mlx5e_channel *c = cq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + + void *in; + void *cqc; + int inlen; + int irqn_not_used; + int eqn; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + + memcpy(cqc, param->cqc, sizeof(param->cqc)); + + mlx5_fill_page_array(&cq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); + + mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); + + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, uar_page, mcq->uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + err = mlx5_core_create_cq(mdev, mcq, in, inlen); + + kvfree(in); + + if (err) + return err; + + mlx5e_cq_arm(cq); + + return 0; +} + +static void mlx5e_disable_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_channel *c = cq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + mlx5_core_destroy_cq(mdev, &cq->mcq); +} + +static int mlx5e_open_cq(struct mlx5e_channel *c, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq, + u16 moderation_usecs, + u16 moderation_frames) +{ + int err; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + err = mlx5e_create_cq(c, param, cq); + if (err) + return err; + + err = mlx5e_enable_cq(cq, param); + if (err) + goto err_destroy_cq; + + err = mlx5_core_modify_cq_moderation(mdev, &cq->mcq, + moderation_usecs, + moderation_frames); + if (err) + goto err_destroy_cq; + + return 0; + +err_destroy_cq: + mlx5e_destroy_cq(cq); + + return err; +} + +static void mlx5e_close_cq(struct mlx5e_cq *cq) +{ + mlx5e_disable_cq(cq); + mlx5e_destroy_cq(cq); +} + +static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) +{ + return cpumask_first(priv->mdev->priv.irq_info[ix].mask); +} + +static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, + struct mlx5e_channel_param *cparam) +{ + struct mlx5e_priv *priv = c->priv; + int err; + int tc; + + for (tc = 0; tc < c->num_tc; tc++) { + err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq, + priv->params.tx_cq_moderation_usec, + priv->params.tx_cq_moderation_pkts); + if (err) + goto err_close_tx_cqs; + + c->sq[tc].cq.sqrq = &c->sq[tc]; + } + + return 0; + +err_close_tx_cqs: + for (tc--; tc >= 0; tc--) + mlx5e_close_cq(&c->sq[tc].cq); + + return err; +} + +static void mlx5e_close_tx_cqs(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_cq(&c->sq[tc].cq); +} + +static int mlx5e_open_sqs(struct mlx5e_channel *c, + struct mlx5e_channel_param *cparam) +{ + int err; + int tc; + + for (tc = 0; tc < c->num_tc; tc++) { + err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]); + if (err) + goto err_close_sqs; + } + + return 0; + +err_close_sqs: + for (tc--; tc >= 0; tc--) + mlx5e_close_sq(&c->sq[tc]); + + return err; +} + +static void mlx5e_close_sqs(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_sq(&c->sq[tc]); +} + +static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, + struct mlx5e_channel_param *cparam, + struct mlx5e_channel **cp) +{ + struct net_device *netdev = priv->netdev; + int cpu = mlx5e_get_cpu(priv, ix); + struct mlx5e_channel *c; + int err; + + c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); + if (!c) + return -ENOMEM; + + c->priv = priv; + c->ix = ix; + c->cpu = cpu; + c->pdev = &priv->mdev->pdev->dev; + c->netdev = priv->netdev; + c->mkey_be = cpu_to_be32(priv->mr.key); + c->num_tc = priv->num_tc; + + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); + + err = mlx5e_open_tx_cqs(c, cparam); + if (err) + goto err_napi_del; + + err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq, + priv->params.rx_cq_moderation_usec, + priv->params.rx_cq_moderation_pkts); + if (err) + goto err_close_tx_cqs; + c->rq.cq.sqrq = &c->rq; + + napi_enable(&c->napi); + + err = mlx5e_open_sqs(c, cparam); + if (err) + goto err_disable_napi; + + err = mlx5e_open_rq(c, &cparam->rq, &c->rq); + if (err) + goto err_close_sqs; + + netif_set_xps_queue(netdev, get_cpu_mask(c->cpu), ix); + *cp = c; + + return 0; + +err_close_sqs: + mlx5e_close_sqs(c); + +err_disable_napi: + napi_disable(&c->napi); + mlx5e_close_cq(&c->rq.cq); + +err_close_tx_cqs: + mlx5e_close_tx_cqs(c); + +err_napi_del: + netif_napi_del(&c->napi); + kfree(c); + + return err; +} + +static void mlx5e_close_channel(struct mlx5e_channel *c) +{ + mlx5e_close_rq(&c->rq); + mlx5e_close_sqs(c); + napi_disable(&c->napi); + mlx5e_close_cq(&c->rq.cq); + mlx5e_close_tx_cqs(c); + netif_napi_del(&c->napi); + kfree(c); +} + +static void mlx5e_build_rq_param(struct mlx5e_priv *priv, + struct mlx5e_rq_param *param) +{ + void *rqc = param->rqc; + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); + MLX5_SET(wq, wq, pd, priv->pdn); + + param->wq.numa = dev_to_node(&priv->mdev->pdev->dev); + param->wq.linear = 1; +} + +static void mlx5e_build_sq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); + MLX5_SET(wq, wq, pd, priv->pdn); + + param->wq.numa = dev_to_node(&priv->mdev->pdev->dev); +} + +static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index); +} + +static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size); + + mlx5e_build_common_cq_param(priv, param); +} + +static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); + + mlx5e_build_common_cq_param(priv, param); +} + +static void mlx5e_build_channel_param(struct mlx5e_priv *priv, + struct mlx5e_channel_param *cparam) +{ + memset(cparam, 0, sizeof(*cparam)); + + mlx5e_build_rq_param(priv, &cparam->rq); + mlx5e_build_sq_param(priv, &cparam->sq); + mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); + mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); +} + +static int mlx5e_open_channels(struct mlx5e_priv *priv) +{ + struct mlx5e_channel_param cparam; + int err; + int i; + int j; + + priv->channel = kcalloc(priv->params.num_channels, + sizeof(struct mlx5e_channel *), GFP_KERNEL); + if (!priv->channel) + return -ENOMEM; + + mlx5e_build_channel_param(priv, &cparam); + for (i = 0; i < priv->params.num_channels; i++) { + err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]); + if (err) + goto err_close_channels; + } + + for (j = 0; j < priv->params.num_channels; j++) { + err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq); + if (err) + goto err_close_channels; + } + + return 0; + +err_close_channels: + for (i--; i >= 0; i--) + mlx5e_close_channel(priv->channel[i]); + + kfree(priv->channel); + + return err; +} + +static void mlx5e_close_channels(struct mlx5e_priv *priv) +{ + int i; + + for (i = 0; i < priv->params.num_channels; i++) + mlx5e_close_channel(priv->channel[i]); + + kfree(priv->channel); +} + +static int mlx5e_open_tis(struct mlx5e_priv *priv, int tc) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(create_tis_in)]; + void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + memset(in, 0, sizeof(in)); + + MLX5_SET(tisc, tisc, prio, tc); + + return mlx5_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]); +} + +static void mlx5e_close_tis(struct mlx5e_priv *priv, int tc) +{ + mlx5_destroy_tis(priv->mdev, priv->tisn[tc]); +} + +static int mlx5e_open_tises(struct mlx5e_priv *priv) +{ + int num_tc = priv->num_tc; + int err; + int tc; + + for (tc = 0; tc < num_tc; tc++) { + err = mlx5e_open_tis(priv, tc); + if (err) + goto err_close_tises; + } + + return 0; + +err_close_tises: + for (tc--; tc >= 0; tc--) + mlx5e_close_tis(priv, tc); + + return err; +} + +static void mlx5e_close_tises(struct mlx5e_priv *priv) +{ + int num_tc = priv->num_tc; + int tc; + + for (tc = 0; tc < num_tc; tc++) + mlx5e_close_tis(priv, tc); +} + +static int mlx5e_open_rqt(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 *in; + u32 out[MLX5_ST_SZ_DW(create_rqt_out)]; + void *rqtc; + int inlen; + int err; + int sz; + int i; + + sz = 1 << priv->params.rx_hash_log_tbl_sz; + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); + MLX5_SET(rqtc, rqtc, rqt_max_size, sz); + + for (i = 0; i < sz; i++) { + int ix = i % priv->params.num_channels; + + MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn); + } + + MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out)); + if (!err) + priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn); + + kvfree(in); + + return err; +} + +static void mlx5e_close_rqt(struct mlx5e_priv *priv) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn); + + mlx5_cmd_exec_check_status(priv->mdev, in, sizeof(in), out, + sizeof(out)); +} + +static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt) +{ + void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + +#define ROUGH_MAX_L2_L3_HDR_SZ 256 + +#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP) + +#define MLX5_HASH_ALL (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_L4_SPORT |\ + MLX5_HASH_FIELD_SEL_L4_DPORT) + + if (priv->params.lro_en) { + MLX5_SET(tirc, tirc, lro_enable_mask, + MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | + MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); + MLX5_SET(tirc, tirc, lro_max_ip_payload_size, + (priv->params.lro_wqe_sz - + ROUGH_MAX_L2_L3_HDR_SZ) >> 8); + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, + MLX5_CAP_ETH(priv->mdev, + lro_timer_supported_periods[3])); + } + + switch (tt) { + case MLX5E_TT_ANY: + MLX5_SET(tirc, tirc, disp_type, + MLX5_TIRC_DISP_TYPE_DIRECT); + MLX5_SET(tirc, tirc, inline_rqn, + priv->channel[0]->rq.rqn); + break; + default: + MLX5_SET(tirc, tirc, disp_type, + MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, + priv->rqtn); + MLX5_SET(tirc, tirc, rx_hash_fn, + MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ); + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + netdev_rss_key_fill(MLX5_ADDR_OF(tirc, tirc, + rx_hash_toeplitz_key), + MLX5_FLD_SZ_BYTES(tirc, + rx_hash_toeplitz_key)); + break; + } + + switch (tt) { + case MLX5E_TT_IPV4_TCP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_ALL); + break; + + case MLX5E_TT_IPV6_TCP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_ALL); + break; + + case MLX5E_TT_IPV4_UDP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_ALL); + break; + + case MLX5E_TT_IPV6_UDP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_ALL); + break; + + case MLX5E_TT_IPV4: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP); + break; + + case MLX5E_TT_IPV6: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP); + break; + } +} + +static int mlx5e_open_tir(struct mlx5e_priv *priv, int tt) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 *in; + void *tirc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + + mlx5e_build_tir_ctx(priv, tirc, tt); + + err = mlx5_create_tir(mdev, in, inlen, &priv->tirn[tt]); + + kvfree(in); + + return err; +} + +static void mlx5e_close_tir(struct mlx5e_priv *priv, int tt) +{ + mlx5_destroy_tir(priv->mdev, priv->tirn[tt]); +} + +static int mlx5e_open_tirs(struct mlx5e_priv *priv) +{ + int err; + int i; + + for (i = 0; i < MLX5E_NUM_TT; i++) { + err = mlx5e_open_tir(priv, i); + if (err) + goto err_close_tirs; + } + + return 0; + +err_close_tirs: + for (i--; i >= 0; i--) + mlx5e_close_tir(priv, i); + + return err; +} + +static void mlx5e_close_tirs(struct mlx5e_priv *priv) +{ + int i; + + for (i = 0; i < MLX5E_NUM_TT; i++) + mlx5e_close_tir(priv, i); +} + +int mlx5e_open_locked(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int actual_mtu; + int num_txqs; + int err; + + num_txqs = roundup_pow_of_two(priv->params.num_channels) * + priv->params.num_tc; + netif_set_real_num_tx_queues(netdev, num_txqs); + netif_set_real_num_rx_queues(netdev, priv->params.num_channels); + + err = mlx5_set_port_mtu(mdev, netdev->mtu); + if (err) { + netdev_err(netdev, "%s: mlx5_set_port_mtu failed %d\n", + __func__, err); + return err; + } + + err = mlx5_query_port_oper_mtu(mdev, &actual_mtu); + if (err) { + netdev_err(netdev, "%s: mlx5_query_port_oper_mtu failed %d\n", + __func__, err); + return err; + } + + if (actual_mtu != netdev->mtu) + netdev_warn(netdev, "%s: Failed to set MTU to %d\n", + __func__, netdev->mtu); + + netdev->mtu = actual_mtu; + + err = mlx5e_open_tises(priv); + if (err) { + netdev_err(netdev, "%s: mlx5e_open_tises failed, %d\n", + __func__, err); + return err; + } + + err = mlx5e_open_channels(priv); + if (err) { + netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n", + __func__, err); + goto err_close_tises; + } + + err = mlx5e_open_rqt(priv); + if (err) { + netdev_err(netdev, "%s: mlx5e_open_rqt failed, %d\n", + __func__, err); + goto err_close_channels; + } + + err = mlx5e_open_tirs(priv); + if (err) { + netdev_err(netdev, "%s: mlx5e_open_tir failed, %d\n", + __func__, err); + goto err_close_rqls; + } + + err = mlx5e_open_flow_table(priv); + if (err) { + netdev_err(netdev, "%s: mlx5e_open_flow_table failed, %d\n", + __func__, err); + goto err_close_tirs; + } + + err = mlx5e_add_all_vlan_rules(priv); + if (err) { + netdev_err(netdev, "%s: mlx5e_add_all_vlan_rules failed, %d\n", + __func__, err); + goto err_close_flow_table; + } + + mlx5e_init_eth_addr(priv); + + set_bit(MLX5E_STATE_OPENED, &priv->state); + + mlx5e_update_carrier(priv); + mlx5e_set_rx_mode_core(priv); + + schedule_delayed_work(&priv->update_stats_work, 0); + return 0; + +err_close_flow_table: + mlx5e_close_flow_table(priv); + +err_close_tirs: + mlx5e_close_tirs(priv); + +err_close_rqls: + mlx5e_close_rqt(priv); + +err_close_channels: + mlx5e_close_channels(priv); + +err_close_tises: + mlx5e_close_tises(priv); + + return err; +} + +static int mlx5e_open(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_open_locked(netdev); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_close_locked(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + clear_bit(MLX5E_STATE_OPENED, &priv->state); + + mlx5e_set_rx_mode_core(priv); + mlx5e_del_all_vlan_rules(priv); + netif_carrier_off(priv->netdev); + mlx5e_close_flow_table(priv); + mlx5e_close_tirs(priv); + mlx5e_close_rqt(priv); + mlx5e_close_channels(priv); + mlx5e_close_tises(priv); + + return 0; +} + +static int mlx5e_close(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_close_locked(netdev); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_update_priv_params(struct mlx5e_priv *priv, + struct mlx5e_params *new_params) +{ + int err = 0; + int was_opened; + + WARN_ON(!mutex_is_locked(&priv->state_lock)); + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (was_opened) + mlx5e_close_locked(priv->netdev); + + priv->params = *new_params; + + if (was_opened) + err = mlx5e_open_locked(priv->netdev); + + return err; +} + +static struct rtnl_link_stats64 * +mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_vport_stats *vstats = &priv->stats.vport; + + stats->rx_packets = vstats->rx_packets; + stats->rx_bytes = vstats->rx_bytes; + stats->tx_packets = vstats->tx_packets; + stats->tx_bytes = vstats->tx_bytes; + stats->multicast = vstats->rx_multicast_packets + + vstats->tx_multicast_packets; + stats->tx_errors = vstats->tx_error_packets; + stats->rx_errors = vstats->rx_error_packets; + stats->tx_dropped = vstats->tx_queue_dropped; + stats->rx_crc_errors = 0; + stats->rx_length_errors = 0; + + return stats; +} + +static void mlx5e_set_rx_mode(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + schedule_work(&priv->set_rx_mode_work); +} + +static int mlx5e_set_mac(struct net_device *netdev, void *addr) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct sockaddr *saddr = addr; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + netif_addr_lock_bh(netdev); + ether_addr_copy(netdev->dev_addr, saddr->sa_data); + netif_addr_unlock_bh(netdev); + + schedule_work(&priv->set_rx_mode_work); + + return 0; +} + +static int mlx5e_set_features(struct net_device *netdev, + netdev_features_t features) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + netdev_features_t changes = features ^ netdev->features; + struct mlx5e_params new_params; + bool update_params = false; + + mutex_lock(&priv->state_lock); + new_params = priv->params; + + if (changes & NETIF_F_LRO) { + new_params.lro_en = !!(features & NETIF_F_LRO); + update_params = true; + } + + if (update_params) + mlx5e_update_priv_params(priv, &new_params); + + if (changes & NETIF_F_HW_VLAN_CTAG_FILTER) { + if (features & NETIF_F_HW_VLAN_CTAG_FILTER) + mlx5e_enable_vlan_filter(priv); + else + mlx5e_disable_vlan_filter(priv); + } + + mutex_unlock(&priv->state_lock); + + return 0; +} + +static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int max_mtu; + int err = 0; + + err = mlx5_query_port_max_mtu(mdev, &max_mtu); + if (err) + return err; + + if (new_mtu > max_mtu || new_mtu < MLX5E_PARAMS_MIN_MTU) { + netdev_err(netdev, "%s: Bad MTU size, mtu must be [%d-%d]\n", + __func__, MLX5E_PARAMS_MIN_MTU, max_mtu); + return -EINVAL; + } + + mutex_lock(&priv->state_lock); + netdev->mtu = new_mtu; + err = mlx5e_update_priv_params(priv, &priv->params); + mutex_unlock(&priv->state_lock); + + return err; +} + +static struct net_device_ops mlx5e_netdev_ops = { + .ndo_open = mlx5e_open, + .ndo_stop = mlx5e_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_get_stats64 = mlx5e_get_stats, + .ndo_set_rx_mode = mlx5e_set_rx_mode, + .ndo_set_mac_address = mlx5e_set_mac, + .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, + .ndo_set_features = mlx5e_set_features, + .ndo_change_mtu = mlx5e_change_mtu, +}; + +static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) +{ + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return -ENOTSUPP; + if (!MLX5_CAP_GEN(mdev, eth_net_offloads) || + !MLX5_CAP_GEN(mdev, nic_flow_table) || + !MLX5_CAP_ETH(mdev, csum_cap) || + !MLX5_CAP_ETH(mdev, max_lso_cap) || + !MLX5_CAP_ETH(mdev, vlan_cap) || + !MLX5_CAP_ETH(mdev, rss_ind_tbl_cap)) { + mlx5_core_warn(mdev, + "Not creating net device, some required device capabilities are missing\n"); + return -ENOTSUPP; + } + return 0; +} + +static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, + struct net_device *netdev, + int num_comp_vectors) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + priv->params.log_sq_size = + MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + priv->params.log_rq_size = + MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + priv->params.rx_cq_moderation_usec = + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; + priv->params.rx_cq_moderation_pkts = + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; + priv->params.tx_cq_moderation_usec = + MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + priv->params.tx_cq_moderation_pkts = + MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + priv->params.min_rx_wqes = + MLX5E_PARAMS_DEFAULT_MIN_RX_WQES; + priv->params.rx_hash_log_tbl_sz = + (order_base_2(num_comp_vectors) > + MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ? + order_base_2(num_comp_vectors) : + MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ; + priv->params.num_tc = 1; + priv->params.default_vlan_prio = 0; + + priv->params.lro_en = false && !!MLX5_CAP_ETH(priv->mdev, lro_cap); + priv->params.lro_wqe_sz = + MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + + priv->mdev = mdev; + priv->netdev = netdev; + priv->params.num_channels = num_comp_vectors; + priv->order_base_2_num_channels = order_base_2(num_comp_vectors); + priv->queue_mapping_channel_mask = + roundup_pow_of_two(num_comp_vectors) - 1; + priv->num_tc = priv->params.num_tc; + priv->default_vlan_prio = priv->params.default_vlan_prio; + + spin_lock_init(&priv->async_events_spinlock); + mutex_init(&priv->state_lock); + + INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); + INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); + INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); +} + +static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5_query_vport_mac_address(priv->mdev, netdev->dev_addr); +} + +static void mlx5e_build_netdev(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + SET_NETDEV_DEV(netdev, &mdev->pdev->dev); + + if (priv->num_tc > 1) { + mlx5e_netdev_ops.ndo_select_queue = mlx5e_select_queue; + mlx5e_netdev_ops.ndo_start_xmit = mlx5e_xmit_multi_tc; + } + + netdev->netdev_ops = &mlx5e_netdev_ops; + netdev->watchdog_timeo = 15 * HZ; + + netdev->ethtool_ops = &mlx5e_ethtool_ops; + + netdev->vlan_features |= NETIF_F_IP_CSUM; + netdev->vlan_features |= NETIF_F_IPV6_CSUM; + netdev->vlan_features |= NETIF_F_GRO; + netdev->vlan_features |= NETIF_F_TSO; + netdev->vlan_features |= NETIF_F_TSO6; + netdev->vlan_features |= NETIF_F_RXCSUM; + netdev->vlan_features |= NETIF_F_RXHASH; + + if (!!MLX5_CAP_ETH(mdev, lro_cap)) + netdev->vlan_features |= NETIF_F_LRO; + + netdev->hw_features = netdev->vlan_features; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + + netdev->features = netdev->hw_features; + if (!priv->params.lro_en) + netdev->features &= ~NETIF_F_LRO; + + netdev->features |= NETIF_F_HIGHDMA; + + netdev->priv_flags |= IFF_UNICAST_FLT; + + mlx5e_set_netdev_dev_addr(netdev); +} + +static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, + struct mlx5_core_mr *mr) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_create_mkey_mbox_in *in; + int err; + + in = mlx5_vzalloc(sizeof(*in)); + if (!in) + return -ENOMEM; + + in->seg.flags = MLX5_PERM_LOCAL_WRITE | + MLX5_PERM_LOCAL_READ | + MLX5_ACCESS_MODE_PA; + in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + + err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL, + NULL); + + kvfree(in); + + return err; +} + +static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) +{ + struct net_device *netdev; + struct mlx5e_priv *priv; + int ncv = mdev->priv.eq_table.num_comp_vectors; + int err; + + if (mlx5e_check_required_hca_cap(mdev)) + return NULL; + + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), + roundup_pow_of_two(ncv) * MLX5E_MAX_NUM_TC, + ncv); + if (!netdev) { + mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); + return NULL; + } + + mlx5e_build_netdev_priv(mdev, netdev, ncv); + mlx5e_build_netdev(netdev); + + netif_carrier_off(netdev); + + priv = netdev_priv(netdev); + + err = mlx5_alloc_map_uar(mdev, &priv->cq_uar); + if (err) { + netdev_err(netdev, "%s: mlx5_alloc_map_uar failed, %d\n", + __func__, err); + goto err_free_netdev; + } + + err = mlx5_core_alloc_pd(mdev, &priv->pdn); + if (err) { + netdev_err(netdev, "%s: mlx5_core_alloc_pd failed, %d\n", + __func__, err); + goto err_unmap_free_uar; + } + + err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr); + if (err) { + netdev_err(netdev, "%s: mlx5e_create_mkey failed, %d\n", + __func__, err); + goto err_dealloc_pd; + } + + err = register_netdev(netdev); + if (err) { + netdev_err(netdev, "%s: register_netdev failed, %d\n", + __func__, err); + goto err_destroy_mkey; + } + + mlx5e_enable_async_events(priv); + + return priv; + +err_destroy_mkey: + mlx5_core_destroy_mkey(mdev, &priv->mr); + +err_dealloc_pd: + mlx5_core_dealloc_pd(mdev, priv->pdn); + +err_unmap_free_uar: + mlx5_unmap_free_uar(mdev, &priv->cq_uar); + +err_free_netdev: + free_netdev(netdev); + + return NULL; +} + +static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + struct net_device *netdev = priv->netdev; + + unregister_netdev(netdev); + mlx5_core_destroy_mkey(priv->mdev, &priv->mr); + mlx5_core_dealloc_pd(priv->mdev, priv->pdn); + mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); + mlx5e_disable_async_events(priv); + flush_scheduled_work(); + free_netdev(netdev); +} + +static void *mlx5e_get_netdev(void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + + return priv->netdev; +} + +static struct mlx5_interface mlx5e_interface = { + .add = mlx5e_create_netdev, + .remove = mlx5e_destroy_netdev, + .event = mlx5e_async_event, + .protocol = MLX5_INTERFACE_PROTOCOL_ETH, + .get_dev = mlx5e_get_netdev, +}; + +void mlx5e_init(void) +{ + mlx5_register_interface(&mlx5e_interface); +} + +void mlx5e_cleanup(void) +{ + mlx5_unregister_interface(&mlx5e_interface); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e7b7b123a128..1c37f587426d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -48,10 +48,6 @@ #include #include "mlx5_core.h" -#define DRIVER_NAME "mlx5_core" -#define DRIVER_VERSION "3.0" -#define DRIVER_RELDATE "January 2015" - MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); MODULE_LICENSE("Dual BSD/GPL"); @@ -614,6 +610,61 @@ clean: return err; } +#ifdef CONFIG_MLX5_CORE_EN +static int mlx5_core_set_issi(struct mlx5_core_dev *dev) +{ + u32 query_in[MLX5_ST_SZ_DW(query_issi_in)]; + u32 query_out[MLX5_ST_SZ_DW(query_issi_out)]; + u32 set_in[MLX5_ST_SZ_DW(set_issi_in)]; + u32 set_out[MLX5_ST_SZ_DW(set_issi_out)]; + int err; + u32 sup_issi; + + memset(query_in, 0, sizeof(query_in)); + memset(query_out, 0, sizeof(query_out)); + + MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI); + + err = mlx5_cmd_exec_check_status(dev, query_in, sizeof(query_in), + query_out, sizeof(query_out)); + if (err) { + if (((struct mlx5_outbox_hdr *)query_out)->status == + MLX5_CMD_STAT_BAD_OP_ERR) { + pr_debug("Only ISSI 0 is supported\n"); + return 0; + } + + pr_err("failed to query ISSI\n"); + return err; + } + + sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0); + + if (sup_issi & (1 << 1)) { + memset(set_in, 0, sizeof(set_in)); + memset(set_out, 0, sizeof(set_out)); + + MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI); + MLX5_SET(set_issi_in, set_in, current_issi, 1); + + err = mlx5_cmd_exec_check_status(dev, set_in, sizeof(set_in), + set_out, sizeof(set_out)); + if (err) { + pr_err("failed to set ISSI=1\n"); + return err; + } + + dev->issi = 1; + + return 0; + } else if (sup_issi & (1 << 0)) { + return 0; + } + + return -ENOTSUPP; +} +#endif + static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) { struct mlx5_priv *priv = &dev->priv; @@ -676,6 +727,14 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) goto err_pagealloc_cleanup; } +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_core_set_issi(dev); + if (err) { + dev_err(&pdev->dev, "failed to set issi\n"); + goto err_disable_hca; + } +#endif + err = mlx5_satisfy_startup_pages(dev, 1); if (err) { dev_err(&pdev->dev, "failed to allocate boot pages\n"); @@ -1084,6 +1143,10 @@ static int __init init(void) if (err) goto err_health; +#ifdef CONFIG_MLX5_CORE_EN + mlx5e_init(); +#endif + return 0; err_health: @@ -1096,6 +1159,9 @@ err_debug: static void __exit cleanup(void) { +#ifdef CONFIG_MLX5_CORE_EN + mlx5e_cleanup(); +#endif pci_unregister_driver(&mlx5_core_driver); mlx5_health_cleanup(); destroy_workqueue(mlx5_core_wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index b986f1c258bc..6983c1047255 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -37,6 +37,10 @@ #include #include +#define DRIVER_NAME "mlx5_core" +#define DRIVER_VERSION "3.0-1" +#define DRIVER_RELDATE "January 2015" + extern int mlx5_core_debug_mask; #define mlx5_core_dbg(dev, format, ...) \ @@ -78,4 +82,7 @@ int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); +void mlx5e_init(void); +void mlx5e_cleanup(void); + #endif /* __MLX5_CORE_H__ */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 4ee52bf1f959..b288c538347a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1153,4 +1153,23 @@ enum mlx5_cap_type { #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap) +enum { + MLX5_CMD_STAT_OK = 0x0, + MLX5_CMD_STAT_INT_ERR = 0x1, + MLX5_CMD_STAT_BAD_OP_ERR = 0x2, + MLX5_CMD_STAT_BAD_PARAM_ERR = 0x3, + MLX5_CMD_STAT_BAD_SYS_STATE_ERR = 0x4, + MLX5_CMD_STAT_BAD_RES_ERR = 0x5, + MLX5_CMD_STAT_RES_BUSY = 0x6, + MLX5_CMD_STAT_LIM_ERR = 0x8, + MLX5_CMD_STAT_BAD_RES_STATE_ERR = 0x9, + MLX5_CMD_STAT_IX_ERR = 0xa, + MLX5_CMD_STAT_NO_RES_ERR = 0xf, + MLX5_CMD_STAT_BAD_INP_LEN_ERR = 0x50, + MLX5_CMD_STAT_BAD_OUTP_LEN_ERR = 0x51, + MLX5_CMD_STAT_BAD_QP_STATE_ERR = 0x10, + MLX5_CMD_STAT_BAD_PKT_ERR = 0x30, + MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40, +}; + #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 51738472657e..7fa26f03acc1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -489,6 +489,7 @@ struct mlx5_core_dev { struct mlx5_priv priv; struct mlx5_profile *profile; atomic_t num_qps; + u32 issi; }; struct mlx5_db { -- cgit v1.2.3 From c66fa19c405a36673d4aab13658c8246413d5c0f Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 31 May 2015 09:30:16 +0300 Subject: net/mlx4: Add EQ pool Previously, mlx4_en allocated EQs and used them exclusively. This affected RoCE performance, as applications which are events sensitive were limited to use only the legacy EQs. Change that by introducing an EQ pool. This pool is managed by mlx4_core. EQs are assigned to ports (when there are limited number of EQs, multiple ports could be assigned to the same EQs). An exception to this rule is the ASYNC EQ which handles various events. Legacy EQs are completely removed as all EQs could be shared. When a consumer (mlx4_ib/mlx4_en) requests an EQ, it asks for EQ serving on a specific port. The core driver calculates which EQ should be assigned to that request. Because IRQs are shared between IB and Ethernet modules, their names only include the PCI device BDF address. Signed-off-by: Matan Barak Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 71 ++--- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 - drivers/net/ethernet/mellanox/mlx4/cq.c | 10 +- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 48 ++-- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 7 +- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 13 +- drivers/net/ethernet/mellanox/mlx4/eq.c | 353 +++++++++++++++---------- drivers/net/ethernet/mellanox/mlx4/main.c | 74 ++++-- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 11 +- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 +- include/linux/mlx4/device.h | 11 +- 11 files changed, 342 insertions(+), 259 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8c96c71e7bab..024b0f745035 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2041,77 +2041,52 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev) static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { - char name[80]; - int eq_per_port = 0; - int added_eqs = 0; - int total_eqs = 0; - int i, j, eq; - - /* Legacy mode or comp_pool is not large enough */ - if (dev->caps.comp_pool == 0 || - dev->caps.num_ports > dev->caps.comp_pool) - return; - - eq_per_port = dev->caps.comp_pool / dev->caps.num_ports; - - /* Init eq table */ - added_eqs = 0; - mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) - added_eqs += eq_per_port; - - total_eqs = dev->caps.num_comp_vectors + added_eqs; + int i, j, eq = 0, total_eqs = 0; - ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL); + ibdev->eq_table = kcalloc(dev->caps.num_comp_vectors, + sizeof(ibdev->eq_table[0]), GFP_KERNEL); if (!ibdev->eq_table) return; - ibdev->eq_added = added_eqs; - - eq = 0; - mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { - for (j = 0; j < eq_per_port; j++) { - snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s", - i, j, dev->persist->pdev->bus->name); - /* Set IRQ for specific name (per ring) */ - if (mlx4_assign_eq(dev, name, NULL, - &ibdev->eq_table[eq])) { - /* Use legacy (same as mlx4_en driver) */ - pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); - ibdev->eq_table[eq] = - (eq % dev->caps.num_comp_vectors); - } - eq++; + for (i = 1; i <= dev->caps.num_ports; i++) { + for (j = 0; j < mlx4_get_eqs_per_port(dev, i); + j++, total_eqs++) { + if (i > 1 && mlx4_is_eq_shared(dev, total_eqs)) + continue; + ibdev->eq_table[eq] = total_eqs; + if (!mlx4_assign_eq(dev, i, + &ibdev->eq_table[eq])) + eq++; + else + ibdev->eq_table[eq] = -1; } } - /* Fill the reset of the vector with legacy EQ */ - for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++) - ibdev->eq_table[eq++] = i; + for (i = eq; i < dev->caps.num_comp_vectors; + ibdev->eq_table[i++] = -1) + ; /* Advertise the new number of EQs to clients */ - ibdev->ib_dev.num_comp_vectors = total_eqs; + ibdev->ib_dev.num_comp_vectors = eq; } static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { int i; + int total_eqs = ibdev->ib_dev.num_comp_vectors; - /* no additional eqs were added */ + /* no eqs were allocated */ if (!ibdev->eq_table) return; /* Reset the advertised EQ number */ - ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; + ibdev->ib_dev.num_comp_vectors = 0; - /* Free only the added eqs */ - for (i = 0; i < ibdev->eq_added; i++) { - /* Don't free legacy eqs if used */ - if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors) - continue; + for (i = 0; i < total_eqs; i++) mlx4_release_eq(dev, ibdev->eq_table[i]); - } kfree(ibdev->eq_table); + ibdev->eq_table = NULL; } static void *mlx4_ib_add(struct mlx4_dev *dev) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index fce3934372a1..ef80e6c99a68 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -523,7 +523,6 @@ struct mlx4_ib_dev { struct mlx4_ib_iboe iboe; int counters[MLX4_MAX_PORTS]; int *eq_table; - int eq_added; struct kobject *iov_parent; struct kobject *ports_parent; struct kobject *dev_ports_parent[MLX4_MFUNC_MAX]; diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index e71f31387ac6..7431cd4d7390 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -292,7 +292,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, u64 mtt_addr; int err; - if (vector > dev->caps.num_comp_vectors + dev->caps.comp_pool) + if (vector >= dev->caps.num_comp_vectors) return -EINVAL; cq->vector = vector; @@ -319,7 +319,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, cq_context->flags |= cpu_to_be32(1 << 19); cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index); - cq_context->comp_eqn = priv->eq_table.eq[vector].eqn; + cq_context->comp_eqn = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn; cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; mtt_addr = mlx4_mtt_addr(dev, mtt); @@ -339,11 +339,11 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, init_completion(&cq->free); cq->comp = mlx4_add_cq_to_tasklet; cq->tasklet_ctx.priv = - &priv->eq_table.eq[cq->vector].tasklet_ctx; + &priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].tasklet_ctx; INIT_LIST_HEAD(&cq->tasklet_ctx.list); - cq->irq = priv->eq_table.eq[cq->vector].irq; + cq->irq = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].irq; return 0; err_radix: @@ -368,7 +368,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) if (err) mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); - synchronize_irq(priv->eq_table.eq[cq->vector].irq); + synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq); spin_lock_irq(&cq_table->lock); radix_tree_delete(&cq_table->tree, cq->cqn); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 22da4d0d0f05..d71c567eb076 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -66,6 +66,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, cq->ring = ring; cq->is_tx = mode; + cq->vector = mdev->dev->caps.num_comp_vectors; /* Allocate HW buffers on provided NUMA node. * dev->numa_node is used in mtt range allocation flow. @@ -101,12 +102,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, int err = 0; char name[25]; int timestamp_en = 0; - struct cpu_rmap *rmap = -#ifdef CONFIG_RFS_ACCEL - priv->dev->rx_cpu_rmap; -#else - NULL; -#endif + bool assigned_eq = false; cq->dev = mdev->pndev[priv->port]; cq->mcq.set_ci_db = cq->wqres.db.db; @@ -116,23 +112,19 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, memset(cq->buf, 0, cq->buf_size); if (cq->is_tx == RX) { - if (mdev->dev->caps.comp_pool) { - if (!cq->vector) { - sprintf(name, "%s-%d", priv->dev->name, - cq->ring); - /* Set IRQ for specific name (per ring) */ - if (mlx4_assign_eq(mdev->dev, name, rmap, - &cq->vector)) { - cq->vector = (cq->ring + 1 + priv->port) - % mdev->dev->caps.num_comp_vectors; - mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n", - name); - } - + if (!mlx4_is_eq_vector_valid(mdev->dev, priv->port, + cq->vector)) { + cq->vector = cq_idx; + + err = mlx4_assign_eq(mdev->dev, priv->port, + &cq->vector); + if (err) { + mlx4_err(mdev, "Failed assigning an EQ to %s\n", + name); + goto free_eq; } - } else { - cq->vector = (cq->ring + 1 + priv->port) % - mdev->dev->caps.num_comp_vectors; + + assigned_eq = true; } cq->irq_desc = @@ -159,7 +151,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, &mdev->priv_uar, cq->wqres.db.dma, &cq->mcq, cq->vector, 0, timestamp_en); if (err) - return err; + goto free_eq; cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq; cq->mcq.event = mlx4_en_cq_event; @@ -182,6 +174,12 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, napi_enable(&cq->napi); return 0; + +free_eq: + if (assigned_eq) + mlx4_release_eq(mdev->dev, cq->vector); + cq->vector = mdev->dev->caps.num_comp_vectors; + return err; } void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) @@ -191,9 +189,9 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) mlx4_en_unmap_buffer(&cq->wqres.buf); mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); - if (priv->mdev->dev->caps.comp_pool && cq->vector) { + if (mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector) && + cq->is_tx == RX) mlx4_release_eq(priv->mdev->dev, cq->vector); - } cq->vector = 0; cq->buf_size = 0; cq->buf = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 32f5ec737472..455cecae5aa4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1958,7 +1958,6 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv) int i; #ifdef CONFIG_RFS_ACCEL - free_irq_cpu_rmap(priv->dev->rx_cpu_rmap); priv->dev->rx_cpu_rmap = NULL; #endif @@ -2016,11 +2015,7 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) } #ifdef CONFIG_RFS_ACCEL - if (priv->mdev->dev->caps.comp_pool) { - priv->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->mdev->dev->caps.comp_pool); - if (!priv->dev->rx_cpu_rmap) - goto err; - } + priv->dev->rx_cpu_rmap = mlx4_get_cpu_rmap(priv->mdev->dev, priv->port); #endif return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 2a77a6b19121..35f726c17e48 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -337,15 +337,10 @@ void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev) struct mlx4_dev *dev = mdev->dev; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { - if (!dev->caps.comp_pool) - num_of_eqs = max_t(int, MIN_RX_RINGS, - min_t(int, - dev->caps.num_comp_vectors, - DEF_RX_RINGS)); - else - num_of_eqs = min_t(int, MAX_MSIX_P_PORT, - dev->caps.comp_pool/ - dev->caps.num_ports) - 1; + num_of_eqs = max_t(int, MIN_RX_RINGS, + min_t(int, + mlx4_get_eqs_per_port(mdev->dev, i), + DEF_RX_RINGS)); num_rx_rings = mlx4_low_memory_profile() ? MIN_RX_RINGS : min_t(int, num_of_eqs, diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 80bcd648c5e0..2e6fc6a860a7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -895,8 +895,8 @@ static int mlx4_num_eq_uar(struct mlx4_dev *dev) * we need to map, take the difference of highest index and * the lowest index we'll use and add 1. */ - return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs + - dev->caps.comp_pool)/4 - dev->caps.reserved_eqs/4 + 1; + return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs) / 4 - + dev->caps.reserved_eqs / 4 + 1; } static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq) @@ -1085,8 +1085,7 @@ static void mlx4_free_eq(struct mlx4_dev *dev, static void mlx4_free_irqs(struct mlx4_dev *dev) { struct mlx4_eq_table *eq_table = &mlx4_priv(dev)->eq_table; - struct mlx4_priv *priv = mlx4_priv(dev); - int i, vec; + int i; if (eq_table->have_irq) free_irq(dev->persist->pdev->irq, dev); @@ -1097,20 +1096,6 @@ static void mlx4_free_irqs(struct mlx4_dev *dev) eq_table->eq[i].have_irq = 0; } - for (i = 0; i < dev->caps.comp_pool; i++) { - /* - * Freeing the assigned irq's - * all bits should be 0, but we need to validate - */ - if (priv->msix_ctl.pool_bm & 1ULL << i) { - /* NO need protecting*/ - vec = dev->caps.num_comp_vectors + 1 + i; - free_irq(priv->eq_table.eq[vec].irq, - &priv->eq_table.eq[vec]); - } - } - - kfree(eq_table->irq_names); } @@ -1191,76 +1176,73 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) } priv->eq_table.irq_names = - kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1 + - dev->caps.comp_pool), + kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1), GFP_KERNEL); if (!priv->eq_table.irq_names) { err = -ENOMEM; - goto err_out_bitmap; + goto err_out_clr_int; } - for (i = 0; i < dev->caps.num_comp_vectors; ++i) { - err = mlx4_create_eq(dev, dev->caps.num_cqs - - dev->caps.reserved_cqs + - MLX4_NUM_SPARE_EQE, - (dev->flags & MLX4_FLAG_MSI_X) ? i : 0, - &priv->eq_table.eq[i]); - if (err) { - --i; - goto err_out_unmap; - } - } - - err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE, - (dev->flags & MLX4_FLAG_MSI_X) ? dev->caps.num_comp_vectors : 0, - &priv->eq_table.eq[dev->caps.num_comp_vectors]); - if (err) - goto err_out_comp; - - /*if additional completion vectors poolsize is 0 this loop will not run*/ - for (i = dev->caps.num_comp_vectors + 1; - i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i) { + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) { + if (i == MLX4_EQ_ASYNC) { + err = mlx4_create_eq(dev, + MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE, + 0, &priv->eq_table.eq[MLX4_EQ_ASYNC]); + } else { +#ifdef CONFIG_RFS_ACCEL + struct mlx4_eq *eq = &priv->eq_table.eq[i]; + int port = find_first_bit(eq->actv_ports.ports, + dev->caps.num_ports) + 1; + + if (port <= dev->caps.num_ports) { + struct mlx4_port_info *info = + &mlx4_priv(dev)->port[port]; + + if (!info->rmap) { + info->rmap = alloc_irq_cpu_rmap( + mlx4_get_eqs_per_port(dev, port)); + if (!info->rmap) { + mlx4_warn(dev, "Failed to allocate cpu rmap\n"); + err = -ENOMEM; + goto err_out_unmap; + } + } - err = mlx4_create_eq(dev, dev->caps.num_cqs - - dev->caps.reserved_cqs + - MLX4_NUM_SPARE_EQE, - (dev->flags & MLX4_FLAG_MSI_X) ? i : 0, - &priv->eq_table.eq[i]); - if (err) { - --i; - goto err_out_unmap; + err = irq_cpu_rmap_add( + info->rmap, eq->irq); + if (err) + mlx4_warn(dev, "Failed adding irq rmap\n"); + } +#endif + err = mlx4_create_eq(dev, dev->caps.num_cqs - + dev->caps.reserved_cqs + + MLX4_NUM_SPARE_EQE, + (dev->flags & MLX4_FLAG_MSI_X) ? + i + 1 - !!(i > MLX4_EQ_ASYNC) : 0, + eq); } + if (err) + goto err_out_unmap; } - if (dev->flags & MLX4_FLAG_MSI_X) { const char *eq_name; - for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) { - if (i < dev->caps.num_comp_vectors) { - snprintf(priv->eq_table.irq_names + - i * MLX4_IRQNAME_SIZE, - MLX4_IRQNAME_SIZE, - "mlx4-comp-%d@pci:%s", i, - pci_name(dev->persist->pdev)); - } else { - snprintf(priv->eq_table.irq_names + - i * MLX4_IRQNAME_SIZE, - MLX4_IRQNAME_SIZE, - "mlx4-async@pci:%s", - pci_name(dev->persist->pdev)); - } + snprintf(priv->eq_table.irq_names + + MLX4_EQ_ASYNC * MLX4_IRQNAME_SIZE, + MLX4_IRQNAME_SIZE, + "mlx4-async@pci:%s", + pci_name(dev->persist->pdev)); + eq_name = priv->eq_table.irq_names + + MLX4_EQ_ASYNC * MLX4_IRQNAME_SIZE; - eq_name = priv->eq_table.irq_names + - i * MLX4_IRQNAME_SIZE; - err = request_irq(priv->eq_table.eq[i].irq, - mlx4_msi_x_interrupt, 0, eq_name, - priv->eq_table.eq + i); - if (err) - goto err_out_async; + err = request_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq, + mlx4_msi_x_interrupt, 0, eq_name, + priv->eq_table.eq + MLX4_EQ_ASYNC); + if (err) + goto err_out_unmap; - priv->eq_table.eq[i].have_irq = 1; - } + priv->eq_table.eq[MLX4_EQ_ASYNC].have_irq = 1; } else { snprintf(priv->eq_table.irq_names, MLX4_IRQNAME_SIZE, @@ -1269,36 +1251,38 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) err = request_irq(dev->persist->pdev->irq, mlx4_interrupt, IRQF_SHARED, priv->eq_table.irq_names, dev); if (err) - goto err_out_async; + goto err_out_unmap; priv->eq_table.have_irq = 1; } err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, - priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); if (err) mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", - priv->eq_table.eq[dev->caps.num_comp_vectors].eqn, err); + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err); - for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) - eq_set_ci(&priv->eq_table.eq[i], 1); + /* arm ASYNC eq */ + eq_set_ci(&priv->eq_table.eq[MLX4_EQ_ASYNC], 1); return 0; -err_out_async: - mlx4_free_eq(dev, &priv->eq_table.eq[dev->caps.num_comp_vectors]); - -err_out_comp: - i = dev->caps.num_comp_vectors - 1; - err_out_unmap: - while (i >= 0) { - mlx4_free_eq(dev, &priv->eq_table.eq[i]); - --i; + while (i >= 0) + mlx4_free_eq(dev, &priv->eq_table.eq[i--]); +#ifdef CONFIG_RFS_ACCEL + for (i = 1; i <= dev->caps.num_ports; i++) { + if (mlx4_priv(dev)->port[i].rmap) { + free_irq_cpu_rmap(mlx4_priv(dev)->port[i].rmap); + mlx4_priv(dev)->port[i].rmap = NULL; + } } +#endif + mlx4_free_irqs(dev); + +err_out_clr_int: if (!mlx4_is_slave(dev)) mlx4_unmap_clr_int(dev); - mlx4_free_irqs(dev); err_out_bitmap: mlx4_unmap_uar(dev); @@ -1316,11 +1300,19 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev) int i; mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 1, - priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); +#ifdef CONFIG_RFS_ACCEL + for (i = 1; i <= dev->caps.num_ports; i++) { + if (mlx4_priv(dev)->port[i].rmap) { + free_irq_cpu_rmap(mlx4_priv(dev)->port[i].rmap); + mlx4_priv(dev)->port[i].rmap = NULL; + } + } +#endif mlx4_free_irqs(dev); - for (i = 0; i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i) + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) mlx4_free_eq(dev, &priv->eq_table.eq[i]); if (!mlx4_is_slave(dev)) @@ -1371,87 +1363,166 @@ int mlx4_test_interrupts(struct mlx4_dev *dev) /* Return to default */ mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, - priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); return err; } EXPORT_SYMBOL(mlx4_test_interrupts); -int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, - int *vector) +bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector) { + struct mlx4_priv *priv = mlx4_priv(dev); + vector = MLX4_CQ_TO_EQ_VECTOR(vector); + if (vector < 0 || (vector >= dev->caps.num_comp_vectors + 1) || + (vector == MLX4_EQ_ASYNC)) + return false; + + return test_bit(port - 1, priv->eq_table.eq[vector].actv_ports.ports); +} +EXPORT_SYMBOL(mlx4_is_eq_vector_valid); + +u32 mlx4_get_eqs_per_port(struct mlx4_dev *dev, u8 port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + unsigned int i; + unsigned int sum = 0; + + for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) + sum += !!test_bit(port - 1, + priv->eq_table.eq[i].actv_ports.ports); + + return sum; +} +EXPORT_SYMBOL(mlx4_get_eqs_per_port); + +int mlx4_is_eq_shared(struct mlx4_dev *dev, int vector) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + vector = MLX4_CQ_TO_EQ_VECTOR(vector); + if (vector <= 0 || (vector >= dev->caps.num_comp_vectors + 1)) + return -EINVAL; + + return !!(bitmap_weight(priv->eq_table.eq[vector].actv_ports.ports, + dev->caps.num_ports) > 1); +} +EXPORT_SYMBOL(mlx4_is_eq_shared); + +struct cpu_rmap *mlx4_get_cpu_rmap(struct mlx4_dev *dev, int port) +{ + return mlx4_priv(dev)->port[port].rmap; +} +EXPORT_SYMBOL(mlx4_get_cpu_rmap); + +int mlx4_assign_eq(struct mlx4_dev *dev, u8 port, int *vector) +{ struct mlx4_priv *priv = mlx4_priv(dev); - int vec = 0, err = 0, i; + int err = 0, i = 0; + u32 min_ref_count_val = (u32)-1; + int requested_vector = MLX4_CQ_TO_EQ_VECTOR(*vector); + int *prequested_vector = NULL; + mutex_lock(&priv->msix_ctl.pool_lock); - for (i = 0; !vec && i < dev->caps.comp_pool; i++) { - if (~priv->msix_ctl.pool_bm & 1ULL << i) { - priv->msix_ctl.pool_bm |= 1ULL << i; - vec = dev->caps.num_comp_vectors + 1 + i; - snprintf(priv->eq_table.irq_names + - vec * MLX4_IRQNAME_SIZE, - MLX4_IRQNAME_SIZE, "%s", name); -#ifdef CONFIG_RFS_ACCEL - if (rmap) { - err = irq_cpu_rmap_add(rmap, - priv->eq_table.eq[vec].irq); - if (err) - mlx4_warn(dev, "Failed adding irq rmap\n"); + if (requested_vector < (dev->caps.num_comp_vectors + 1) && + (requested_vector >= 0) && + (requested_vector != MLX4_EQ_ASYNC)) { + if (test_bit(port - 1, + priv->eq_table.eq[requested_vector].actv_ports.ports)) { + prequested_vector = &requested_vector; + } else { + struct mlx4_eq *eq; + + for (i = 1; i < port; + requested_vector += mlx4_get_eqs_per_port(dev, i++)) + ; + + eq = &priv->eq_table.eq[requested_vector]; + if (requested_vector < dev->caps.num_comp_vectors + 1 && + test_bit(port - 1, eq->actv_ports.ports)) { + prequested_vector = &requested_vector; } -#endif - err = request_irq(priv->eq_table.eq[vec].irq, - mlx4_msi_x_interrupt, 0, - &priv->eq_table.irq_names[vec<<5], - priv->eq_table.eq + vec); - if (err) { - /*zero out bit by fliping it*/ - priv->msix_ctl.pool_bm ^= 1 << i; - vec = 0; - continue; - /*we dont want to break here*/ + } + } + + if (!prequested_vector) { + requested_vector = -1; + for (i = 0; min_ref_count_val && i < dev->caps.num_comp_vectors + 1; + i++) { + struct mlx4_eq *eq = &priv->eq_table.eq[i]; + + if (min_ref_count_val > eq->ref_count && + test_bit(port - 1, eq->actv_ports.ports)) { + min_ref_count_val = eq->ref_count; + requested_vector = i; } + } - eq_set_ci(&priv->eq_table.eq[vec], 1); + if (requested_vector < 0) { + err = -ENOSPC; + goto err_unlock; } + + prequested_vector = &requested_vector; } + + if (!test_bit(*prequested_vector, priv->msix_ctl.pool_bm) && + dev->flags & MLX4_FLAG_MSI_X) { + set_bit(*prequested_vector, priv->msix_ctl.pool_bm); + snprintf(priv->eq_table.irq_names + + *prequested_vector * MLX4_IRQNAME_SIZE, + MLX4_IRQNAME_SIZE, "mlx4-%d@%s", + *prequested_vector, dev_name(&dev->persist->pdev->dev)); + + err = request_irq(priv->eq_table.eq[*prequested_vector].irq, + mlx4_msi_x_interrupt, 0, + &priv->eq_table.irq_names[*prequested_vector << 5], + priv->eq_table.eq + *prequested_vector); + + if (err) { + clear_bit(*prequested_vector, priv->msix_ctl.pool_bm); + *prequested_vector = -1; + } else { + eq_set_ci(&priv->eq_table.eq[*prequested_vector], 1); + priv->eq_table.eq[*prequested_vector].have_irq = 1; + } + } + + if (!err && *prequested_vector >= 0) + priv->eq_table.eq[*prequested_vector].ref_count++; + +err_unlock: mutex_unlock(&priv->msix_ctl.pool_lock); - if (vec) { - *vector = vec; - } else { + if (!err && *prequested_vector >= 0) + *vector = MLX4_EQ_TO_CQ_VECTOR(*prequested_vector); + else *vector = 0; - err = (i == dev->caps.comp_pool) ? -ENOSPC : err; - } + return err; } EXPORT_SYMBOL(mlx4_assign_eq); -int mlx4_eq_get_irq(struct mlx4_dev *dev, int vec) +int mlx4_eq_get_irq(struct mlx4_dev *dev, int cq_vec) { struct mlx4_priv *priv = mlx4_priv(dev); - return priv->eq_table.eq[vec].irq; + return priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq_vec)].irq; } EXPORT_SYMBOL(mlx4_eq_get_irq); void mlx4_release_eq(struct mlx4_dev *dev, int vec) { struct mlx4_priv *priv = mlx4_priv(dev); - /*bm index*/ - int i = vec - dev->caps.num_comp_vectors - 1; - - if (likely(i >= 0)) { - /*sanity check , making sure were not trying to free irq's - Belonging to a legacy EQ*/ - mutex_lock(&priv->msix_ctl.pool_lock); - if (priv->msix_ctl.pool_bm & 1ULL << i) { - free_irq(priv->eq_table.eq[vec].irq, - &priv->eq_table.eq[vec]); - priv->msix_ctl.pool_bm &= ~(1ULL << i); - } - mutex_unlock(&priv->msix_ctl.pool_lock); - } + int eq_vec = MLX4_CQ_TO_EQ_VECTOR(vec); + + mutex_lock(&priv->msix_ctl.pool_lock); + priv->eq_table.eq[eq_vec].ref_count--; + /* once we allocated EQ, we don't release it because it might be binded + * to cpu_rmap. + */ + mutex_unlock(&priv->msix_ctl.pool_lock); } EXPORT_SYMBOL(mlx4_release_eq); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 70d33f6e2a41..3ec5113c5a33 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2364,11 +2364,11 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) if (err) { if (dev->flags & MLX4_FLAG_MSI_X) { mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n", - priv->eq_table.eq[dev->caps.num_comp_vectors].irq); + priv->eq_table.eq[MLX4_EQ_ASYNC].irq); mlx4_warn(dev, "Trying again without MSI-X\n"); } else { mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n", - priv->eq_table.eq[dev->caps.num_comp_vectors].irq); + priv->eq_table.eq[MLX4_EQ_ASYNC].irq); mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); } @@ -2486,9 +2486,10 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct msix_entry *entries; int i; + int port = 0; if (msi_x) { - int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ; + int nreq = dev->caps.num_ports * num_online_cpus() + 1; nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, nreq); @@ -2503,20 +2504,49 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2, nreq); - if (nreq < 0) { + if (nreq < 0 || nreq < MLX4_EQ_ASYNC) { kfree(entries); goto no_msi; - } else if (nreq < MSIX_LEGACY_SZ + - dev->caps.num_ports * MIN_MSIX_P_PORT) { - /*Working in legacy mode , all EQ's shared*/ - dev->caps.comp_pool = 0; - dev->caps.num_comp_vectors = nreq - 1; - } else { - dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ; - dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1; } - for (i = 0; i < nreq; ++i) - priv->eq_table.eq[i].irq = entries[i].vector; + /* 1 is reserved for events (asyncrounous EQ) */ + dev->caps.num_comp_vectors = nreq - 1; + + priv->eq_table.eq[MLX4_EQ_ASYNC].irq = entries[0].vector; + bitmap_zero(priv->eq_table.eq[MLX4_EQ_ASYNC].actv_ports.ports, + dev->caps.num_ports); + + for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) { + if (i == MLX4_EQ_ASYNC) + continue; + + priv->eq_table.eq[i].irq = + entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector; + + if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) { + bitmap_fill(priv->eq_table.eq[i].actv_ports.ports, + dev->caps.num_ports); + } else { + set_bit(port, + priv->eq_table.eq[i].actv_ports.ports); + } + /* We divide the Eqs evenly between the two ports. + * (dev->caps.num_comp_vectors / dev->caps.num_ports) + * refers to the number of Eqs per port + * (i.e eqs_per_port). Theoretically, we would like to + * write something like (i + 1) % eqs_per_port == 0. + * However, since there's an asynchronous Eq, we have + * to skip over it by comparing this condition to + * !!((i + 1) > MLX4_EQ_ASYNC). + */ + if ((dev->caps.num_comp_vectors > dev->caps.num_ports) && + ((i + 1) % + (dev->caps.num_comp_vectors / dev->caps.num_ports)) == + !!((i + 1) > MLX4_EQ_ASYNC)) + /* If dev->caps.num_comp_vectors < dev->caps.num_ports, + * everything is shared anyway. + */ + port++; + } dev->flags |= MLX4_FLAG_MSI_X; @@ -2526,10 +2556,15 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) no_msi: dev->caps.num_comp_vectors = 1; - dev->caps.comp_pool = 0; - for (i = 0; i < 2; ++i) + BUG_ON(MLX4_EQ_ASYNC >= 2); + for (i = 0; i < 2; ++i) { priv->eq_table.eq[i].irq = dev->persist->pdev->irq; + if (i != MLX4_EQ_ASYNC) { + bitmap_fill(priv->eq_table.eq[i].actv_ports.ports, + dev->caps.num_ports); + } + } } static int mlx4_init_port_info(struct mlx4_dev *dev, int port) @@ -2594,6 +2629,10 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info) device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); device_remove_file(&info->dev->persist->pdev->dev, &info->port_mtu_attr); +#ifdef CONFIG_RFS_ACCEL + free_irq_cpu_rmap(info->rmap); + info->rmap = NULL; +#endif } static int mlx4_init_steering(struct mlx4_dev *dev) @@ -3024,7 +3063,7 @@ slave_start: if (err) goto err_master_mfunc; - priv->msix_ctl.pool_bm = 0; + bitmap_zero(priv->msix_ctl.pool_bm, MAX_MSIX); mutex_init(&priv->msix_ctl.pool_lock); mlx4_enable_msi_x(dev); @@ -3046,7 +3085,6 @@ slave_start: !mlx4_is_mfunc(dev)) { dev->flags &= ~MLX4_FLAG_MSI_X; dev->caps.num_comp_vectors = 1; - dev->caps.comp_pool = 0; pci_disable_msix(pdev); err = mlx4_setup_hca(dev); } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 502d3dd2c888..ff40098eaf4c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -287,6 +287,12 @@ struct mlx4_icm_table { #define MLX4_CQE_SIZE_MASK_STRIDE 0x3 #define MLX4_EQE_SIZE_MASK_STRIDE 0x30 +#define MLX4_EQ_ASYNC 0 +#define MLX4_EQ_TO_CQ_VECTOR(vector) ((vector) - \ + !!((int)(vector) >= MLX4_EQ_ASYNC)) +#define MLX4_CQ_TO_EQ_VECTOR(vector) ((vector) + \ + !!((int)(vector) >= MLX4_EQ_ASYNC)) + /* * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. */ @@ -391,6 +397,8 @@ struct mlx4_eq { struct mlx4_buf_list *page_list; struct mlx4_mtt mtt; struct mlx4_eq_tasklet tasklet_ctx; + struct mlx4_active_ports actv_ports; + u32 ref_count; }; struct mlx4_slave_eqe { @@ -808,6 +816,7 @@ struct mlx4_port_info { struct mlx4_vlan_table vlan_table; struct mlx4_roce_gid_table gid_table; int base_qpn; + struct cpu_rmap *rmap; }; struct mlx4_sense { @@ -818,7 +827,7 @@ struct mlx4_sense { }; struct mlx4_msix_ctl { - u64 pool_bm; + DECLARE_BITMAP(pool_bm, MAX_MSIX); struct mutex pool_lock; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index d021f079f181..edd8fd69ec9a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -338,7 +338,7 @@ struct mlx4_en_cq { struct napi_struct napi; int size; int buf_size; - unsigned vector; + int vector; enum cq_type is_tx; u16 moder_time; u16 moder_cnt; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 83e80ab94500..ad31e476873f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -46,8 +46,9 @@ #define MAX_MSIX_P_PORT 17 #define MAX_MSIX 64 -#define MSIX_LEGACY_SZ 4 #define MIN_MSIX_P_PORT 5 +#define MLX4_IS_LEGACY_EQ_MODE(dev_cap) ((dev_cap).num_comp_vectors < \ + (dev_cap).num_ports * MIN_MSIX_P_PORT) #define MLX4_MAX_100M_UNITS_VAL 255 /* * work around: can't set values @@ -528,7 +529,6 @@ struct mlx4_caps { int num_eqs; int reserved_eqs; int num_comp_vectors; - int comp_pool; int num_mpts; int max_fmr_maps; int num_mtts; @@ -1332,10 +1332,13 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); int mlx4_test_interrupts(struct mlx4_dev *dev); -int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, - int *vector); +u32 mlx4_get_eqs_per_port(struct mlx4_dev *dev, u8 port); +bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector); +struct cpu_rmap *mlx4_get_cpu_rmap(struct mlx4_dev *dev, int port); +int mlx4_assign_eq(struct mlx4_dev *dev, u8 port, int *vector); void mlx4_release_eq(struct mlx4_dev *dev, int vec); +int mlx4_is_eq_shared(struct mlx4_dev *dev, int vector); int mlx4_eq_get_irq(struct mlx4_dev *dev, int vec); int mlx4_get_phys_port_id(struct mlx4_dev *dev); -- cgit v1.2.3 From abf2e7d6e2e315b32ee00067a69aaad2cf4e1b3f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 28 May 2015 19:26:02 -0700 Subject: bpf: add missing rcu protection when releasing programs from prog_array Normally the program attachment place (like sockets, qdiscs) takes care of rcu protection and calls bpf_prog_put() after a grace period. The programs stored inside prog_array may not be attached anywhere, so prog_array needs to take care of preserving rcu protection. Otherwise bpf_tail_call() will race with bpf_prog_put(). To solve that introduce bpf_prog_put_rcu() helper function and use it in 3 places where unattached program can decrement refcnt: closing program fd, deleting/replacing program in prog_array. Fixes: 04fd61ab36ec ("bpf: allow bpf programs to tail-call other bpf programs") Reported-by: Martin Schwidefsky Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 6 +++++- kernel/bpf/arraymap.c | 4 ++-- kernel/bpf/syscall.c | 19 ++++++++++++++++++- 3 files changed, 25 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8821b9a8689e..5f520f5f087e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -123,7 +123,10 @@ struct bpf_prog_aux { const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; - struct work_struct work; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; struct bpf_array { @@ -153,6 +156,7 @@ void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); void bpf_prog_put(struct bpf_prog *prog); +void bpf_prog_put_rcu(struct bpf_prog *prog); struct bpf_map *bpf_map_get(struct fd f); void bpf_map_put(struct bpf_map *map); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 614bcd4c1d74..cb31229a6fa4 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -202,7 +202,7 @@ static int prog_array_map_update_elem(struct bpf_map *map, void *key, old_prog = xchg(array->prog + index, prog); if (old_prog) - bpf_prog_put(old_prog); + bpf_prog_put_rcu(old_prog); return 0; } @@ -218,7 +218,7 @@ static int prog_array_map_delete_elem(struct bpf_map *map, void *key) old_prog = xchg(array->prog + index, NULL); if (old_prog) { - bpf_prog_put(old_prog); + bpf_prog_put_rcu(old_prog); return 0; } else { return -ENOENT; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 98a69bd83069..a1b14d197a4f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -432,6 +432,23 @@ static void free_used_maps(struct bpf_prog_aux *aux) kfree(aux->used_maps); } +static void __prog_put_rcu(struct rcu_head *rcu) +{ + struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); + + free_used_maps(aux); + bpf_prog_free(aux->prog); +} + +/* version of bpf_prog_put() that is called after a grace period */ +void bpf_prog_put_rcu(struct bpf_prog *prog) +{ + if (atomic_dec_and_test(&prog->aux->refcnt)) { + prog->aux->prog = prog; + call_rcu(&prog->aux->rcu, __prog_put_rcu); + } +} + void bpf_prog_put(struct bpf_prog *prog) { if (atomic_dec_and_test(&prog->aux->refcnt)) { @@ -445,7 +462,7 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) { struct bpf_prog *prog = filp->private_data; - bpf_prog_put(prog); + bpf_prog_put_rcu(prog); return 0; } -- cgit v1.2.3 From 3b369bd212d5cabb46cff0e863298971b382bbd6 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 28 May 2015 15:38:32 +0300 Subject: ieee802154: Fix generation of random EUI-64 addresses. Currently, ieee802154_random_extended_addr() has a 50% chance of generating a group (multicast) address, while this function is used for generating station addresses (which can't be group addresses) for interfaces that don't have a hardware-provided address. Also, in case get_random_bytes() generates the EUI-64 address 00:00:00:00:00:00:00:00 (extremely unlikely), which is an invalid address, ieee802154_random_extended_addr() reacts by changing it to 01:00:00:00:00:00:00:00, which is an invalid station address as well, as it is a group address. This patch changes the address generation procedure to grab eight random bytes, treat that as an EUI-64, and then clear the Group address bit and set the Locally Administered bit, which is in line with how eth_random_addr() generates random EUI-48s. Signed-off-by: Lennert Buytenhek Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 8872ca103d06..552210d0a46f 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -244,9 +244,9 @@ static inline void ieee802154_random_extended_addr(__le64 *addr) { get_random_bytes(addr, IEEE802154_EXTENDED_ADDR_LEN); - /* toggle some bit if we hit an invalid extended addr */ - if (!ieee802154_is_valid_extended_addr(*addr)) - ((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] ^= 0x01; + /* clear the group bit, and set the locally administered bit */ + ((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] &= ~0x01; + ((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] |= 0x02; } #endif /* LINUX_IEEE802154_H */ -- cgit v1.2.3 From daf4e2c89254ed6eb8cf7ef60f614edebfdb9f3a Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 28 May 2015 15:38:43 +0300 Subject: ieee802154: Fix EUI-64 station address validation. Refuse to allow setting an EUI-64 group address as an interface address, as those are not valid station addresses. Signed-off-by: Lennert Buytenhek Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 10 ++++------ net/mac802154/iface.c | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 552210d0a46f..1dc1f4ed4001 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -225,15 +225,13 @@ static inline bool ieee802154_is_valid_psdu_len(const u8 len) * ieee802154_is_valid_psdu_len - check if extended addr is valid * @addr: extended addr to check */ -static inline bool ieee802154_is_valid_extended_addr(const __le64 addr) +static inline bool ieee802154_is_valid_extended_unicast_addr(const __le64 addr) { - /* These EUI-64 addresses are reserved by IEEE. 0xffffffffffffffff - * is used internally as extended to short address broadcast mapping. - * This is currently a workaround because neighbor discovery can't - * deal with short addresses types right now. + /* Bail out if the address is all zero, or if the group + * address bit is set. */ return ((addr != cpu_to_le64(0x0000000000000000ULL)) && - (addr != cpu_to_le64(0xffffffffffffffffULL))); + !(addr & cpu_to_le64(0x0100000000000000ULL))); } /** diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index b544b5dc4bfb..6ac023932ce0 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -126,7 +126,7 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p) return -EBUSY; ieee802154_be64_to_le64(&extended_addr, addr->sa_data); - if (!ieee802154_is_valid_extended_addr(extended_addr)) + if (!ieee802154_is_valid_extended_unicast_addr(extended_addr)) return -EINVAL; memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); @@ -539,7 +539,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, switch (type) { case NL802154_IFTYPE_NODE: ndev->type = ARPHRD_IEEE802154; - if (ieee802154_is_valid_extended_addr(extended_addr)) + if (ieee802154_is_valid_extended_unicast_addr(extended_addr)) ieee802154_le64_to_be64(ndev->dev_addr, &extended_addr); else memcpy(ndev->dev_addr, ndev->perm_addr, -- cgit v1.2.3 From 1a1bc59c5f7657387d1a4b45d63248fed55ab88c Mon Sep 17 00:00:00 2001 From: Varka Bhadram Date: Fri, 29 May 2015 10:56:55 +0530 Subject: cc2520: fix CC2591 handling This patch changes tha way of handling of cc2591-cc2520 combination by moving amplified variable from platform data to private data. This will be useful in other sections like tx power support. Signed-off-by: Varka Bhadram Cc: Brad Campbell Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/cc2520.c | 10 ++++++++-- include/linux/spi/cc2520.h | 1 - 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index ea280d437ca1..0d9353756598 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -196,6 +196,7 @@ struct cc2520_private { u8 *buf; /* SPI TX/Rx data buffer */ struct mutex buffer_mutex; /* SPI buffer mutex */ bool is_tx; /* Flag for sync b/w Tx and Rx */ + bool amplified; /* Flag for CC2591 */ int fifo_pin; /* FIFO GPIO pin number */ struct work_struct fifop_irqwork;/* Workqueue for FIFOP */ spinlock_t lock; /* Lock for is_tx*/ @@ -738,7 +739,9 @@ static int cc2520_get_platform_data(struct spi_device *spi, pdata->vreg = of_get_named_gpio(np, "vreg-gpio", 0); pdata->reset = of_get_named_gpio(np, "reset-gpio", 0); - pdata->amplified = of_property_read_bool(np, "amplified"); + /* CC2591 front end for CC2520 */ + if (of_property_read_bool(np, "amplified")) + priv->amplified = true; return 0; } @@ -781,7 +784,7 @@ static int cc2520_hw_init(struct cc2520_private *priv) * amplifier. See section 8 page 17 of TI application note AN065. * http://www.ti.com/lit/an/swra229a/swra229a.pdf */ - if (pdata.amplified) { + if (priv->amplified) { ret = cc2520_write_register(priv, CC2520_AGCCTRL1, 0x16); if (ret) goto err_ret; @@ -896,6 +899,9 @@ static int cc2520_probe(struct spi_device *spi) spin_lock_init(&priv->lock); init_completion(&priv->tx_complete); + /* Assumption that CC2591 is not connected */ + priv->amplified = false; + /* Request all the gpio's */ if (!gpio_is_valid(pdata.fifo)) { dev_err(&spi->dev, "fifo gpio is not valid\n"); diff --git a/include/linux/spi/cc2520.h b/include/linux/spi/cc2520.h index e741e8baad92..85b8ee67e937 100644 --- a/include/linux/spi/cc2520.h +++ b/include/linux/spi/cc2520.h @@ -21,7 +21,6 @@ struct cc2520_platform_data { int sfd; int reset; int vreg; - bool amplified; }; #endif -- cgit v1.2.3 From 17ca8cbf49be3aa94bb1c2b7ee6545fd70094eb4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 29 May 2015 23:23:06 +0200 Subject: ebpf: allow bpf_ktime_get_ns_proto also for networking As this is already exported from tracing side via commit d9847d310ab4 ("tracing: Allow BPF programs to call bpf_ktime_get_ns()"), we might as well want to move it to the core, so also networking users can make use of it, e.g. to measure diffs for certain flows from ingress/egress. Signed-off-by: Daniel Borkmann Cc: Alexei Starovoitov Cc: Ingo Molnar Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + kernel/bpf/core.c | 1 + kernel/bpf/helpers.c | 13 +++++++++++++ kernel/trace/bpf_trace.c | 12 ------------ net/core/filter.c | 2 ++ 5 files changed, 17 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5f520f5f087e..ca854e5bb2f7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -186,5 +186,6 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; +extern const struct bpf_func_proto bpf_ktime_get_ns_proto; #endif /* _LINUX_BPF_H */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d44b25cbe460..4548422d5f6c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -734,6 +734,7 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; +const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index bd7f5988ed9c..b3aaabdf9a50 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -13,6 +13,7 @@ #include #include #include +#include /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return @@ -111,3 +112,15 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = { .gpl_only = false, .ret_type = RET_INTEGER, }; + +static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + /* NMI safe access to clock monotonic */ + return ktime_get_mono_fast_ns(); +} + +const struct bpf_func_proto bpf_ktime_get_ns_proto = { + .func = bpf_ktime_get_ns, + .gpl_only = true, + .ret_type = RET_INTEGER, +}; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 646445e41bd4..50c4015a8ad3 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -79,18 +79,6 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .arg3_type = ARG_ANYTHING, }; -static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) -{ - /* NMI safe access to clock monotonic */ - return ktime_get_mono_fast_ns(); -} - -static const struct bpf_func_proto bpf_ktime_get_ns_proto = { - .func = bpf_ktime_get_ns, - .gpl_only = true, - .ret_type = RET_INTEGER, -}; - /* * limited trace_printk() * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed diff --git a/net/core/filter.c b/net/core/filter.c index 2c30d6632d66..b78a010a957f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1423,6 +1423,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) return &bpf_get_smp_processor_id_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; + case BPF_FUNC_ktime_get_ns: + return &bpf_ktime_get_ns_proto; default: return NULL; } -- cgit v1.2.3 From a28c257c9eb0bd76a4adcac97c07e34044ec71fb Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Fri, 29 May 2015 17:28:07 -0400 Subject: net/rds: Declare SO_RDS_TRANSPORT and RDS_TRANS_* constants in uapi/linux/rds.h User space applications that desire to explicitly select the underlying transport for a PF_RDS socket may do so by using the SO_RDS_TRANSPORT socket option at the SOL_RDS level before bind(). The integer argument provided to the socket option would be one of the RDS_TRANS_* values, e.g., RDS_TRANS_TCP. This commit exports the constant values need by such applications via Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- include/uapi/linux/rds.h | 10 ++++++++++ net/rds/rds.h | 5 ----- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index 91950950aa59..0f9265cb2a96 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -38,6 +38,8 @@ #define RDS_IB_ABI_VERSION 0x301 +#define SOL_RDS 276 + /* * setsockopt/getsockopt for SOL_RDS */ @@ -48,6 +50,14 @@ #define RDS_RECVERR 5 #define RDS_CONG_MONITOR 6 #define RDS_GET_MR_FOR_DEST 7 +#define SO_RDS_TRANSPORT 8 + +/* supported values for SO_RDS_TRANSPORT */ +#define RDS_TRANS_IB 0 +#define RDS_TRANS_IWARP 1 +#define RDS_TRANS_TCP 2 +#define RDS_TRANS_COUNT 3 +#define RDS_TRANS_NONE (~0) /* * Control message types for SOL_RDS. diff --git a/net/rds/rds.h b/net/rds/rds.h index 0d41155a2258..76db508f73a1 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -408,11 +408,6 @@ struct rds_notifier { * should try hard not to block. */ -#define RDS_TRANS_IB 0 -#define RDS_TRANS_IWARP 1 -#define RDS_TRANS_TCP 2 -#define RDS_TRANS_COUNT 3 - struct rds_transport { char t_name[TRANSNAMSIZ]; struct list_head t_item; -- cgit v1.2.3 From bdef7de4b8d9be4cf7bf5aea977f827310ab3ff0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 1 Jun 2015 14:56:09 -0700 Subject: net: Add priority to packet_offload objects. When we scan a packet for GRO processing, we want to see the most common packet types in the front of the offload_base list. So add a priority field so we can handle this properly. IPv4/IPv6 get the highest priority with the implicit zero priority field. Next comes ethernet with a priority of 10, and then we have the MPLS types with a priority of 15. Suggested-by: Eric Dumazet Suggested-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 8 ++++++-- net/ethernet/eth.c | 1 + net/mpls/mpls_gso.c | 2 ++ 4 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 51f8d2f5dc3f..6f5f71ff5169 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1997,6 +1997,7 @@ struct offload_callbacks { struct packet_offload { __be16 type; /* This is really htons(ether_type). */ + u16 priority; struct offload_callbacks callbacks; struct list_head list; }; diff --git a/net/core/dev.c b/net/core/dev.c index 594163d0c6eb..0602e917a305 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -469,10 +469,14 @@ EXPORT_SYMBOL(dev_remove_pack); */ void dev_add_offload(struct packet_offload *po) { - struct list_head *head = &offload_base; + struct packet_offload *elem; spin_lock(&offload_lock); - list_add_rcu(&po->list, head); + list_for_each_entry(elem, &offload_base, list) { + if (po->priority < elem->priority) + break; + } + list_add_rcu(&po->list, elem->list.prev); spin_unlock(&offload_lock); } EXPORT_SYMBOL(dev_add_offload); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index c3325bd2f3fb..7d0e239a6755 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -470,6 +470,7 @@ EXPORT_SYMBOL(eth_gro_complete); static struct packet_offload eth_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_TEB), + .priority = 10, .callbacks = { .gro_receive = eth_gro_receive, .gro_complete = eth_gro_complete, diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 809df534a720..0183b32da942 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -62,6 +62,7 @@ out: static struct packet_offload mpls_mc_offload __read_mostly = { .type = cpu_to_be16(ETH_P_MPLS_MC), + .priority = 15, .callbacks = { .gso_segment = mpls_gso_segment, }, @@ -69,6 +70,7 @@ static struct packet_offload mpls_mc_offload __read_mostly = { static struct packet_offload mpls_uc_offload __read_mostly = { .type = cpu_to_be16(ETH_P_MPLS_UC), + .priority = 15, .callbacks = { .gso_segment = mpls_gso_segment, }, -- cgit v1.2.3 From ccea74457bbdafe33dce8bffcb5cb183aeb5f2bb Mon Sep 17 00:00:00 2001 From: Neil McKee Date: Tue, 26 May 2015 20:59:43 -0700 Subject: openvswitch: include datapath actions with sampled-packet upcall to userspace If new optional attribute OVS_USERSPACE_ATTR_ACTIONS is added to an OVS_ACTION_ATTR_USERSPACE action, then include the datapath actions in the upcall. This Directly associates the sampled packet with the path it takes through the virtual switch. Path information currently includes mangling, encapsulation and decapsulation actions for tunneling protocols GRE, VXLAN, Geneve, MPLS and QinQ, but this extension requires no further changes to accommodate datapath actions that may be added in the future. Adding path information enhances visibility into complex virtual networks. Signed-off-by: Neil McKee Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 4 ++++ net/openvswitch/actions.c | 23 +++++++++++++++-------- net/openvswitch/datapath.c | 18 ++++++++++++++++-- net/openvswitch/datapath.h | 2 ++ 4 files changed, 37 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index bbd49a0c46c7..1dab77601c21 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -153,6 +153,8 @@ enum ovs_packet_cmd { * flow key against the kernel's. * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes. + * Also used in upcall when %OVS_ACTION_ATTR_USERSPACE has optional + * %OVS_USERSPACE_ATTR_ACTIONS attribute. * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content @@ -528,6 +530,7 @@ enum ovs_sample_attr { * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA. * @OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: If present, u32 output port to get * tunnel info. + * @OVS_USERSPACE_ATTR_ACTIONS: If present, send actions with upcall. */ enum ovs_userspace_attr { OVS_USERSPACE_ATTR_UNSPEC, @@ -535,6 +538,7 @@ enum ovs_userspace_attr { OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */ OVS_USERSPACE_ATTR_EGRESS_TUN_PORT, /* Optional, u32 output port * to get tunnel info. */ + OVS_USERSPACE_ATTR_ACTIONS, /* Optional flag to get actions. */ __OVS_USERSPACE_ATTR_MAX }; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index b491c1c296fe..8a8c0b8b4f63 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -608,17 +608,16 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port) } static int output_userspace(struct datapath *dp, struct sk_buff *skb, - struct sw_flow_key *key, const struct nlattr *attr) + struct sw_flow_key *key, const struct nlattr *attr, + const struct nlattr *actions, int actions_len) { struct ovs_tunnel_info info; struct dp_upcall_info upcall; const struct nlattr *a; int rem; + memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_ACTION; - upcall.userdata = NULL; - upcall.portid = 0; - upcall.egress_tun_info = NULL; for (a = nla_data(attr), rem = nla_len(attr); rem > 0; a = nla_next(a, &rem)) { @@ -647,6 +646,13 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, break; } + case OVS_USERSPACE_ATTR_ACTIONS: { + /* Include actions. */ + upcall.actions = actions; + upcall.actions_len = actions_len; + break; + } + } /* End of switch. */ } @@ -654,7 +660,8 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, } static int sample(struct datapath *dp, struct sk_buff *skb, - struct sw_flow_key *key, const struct nlattr *attr) + struct sw_flow_key *key, const struct nlattr *attr, + const struct nlattr *actions, int actions_len) { const struct nlattr *acts_list = NULL; const struct nlattr *a; @@ -688,7 +695,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb, */ if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE && nla_is_last(a, rem))) - return output_userspace(dp, skb, key, a); + return output_userspace(dp, skb, key, a, actions, actions_len); skb = skb_clone(skb, GFP_ATOMIC); if (!skb) @@ -872,7 +879,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, break; case OVS_ACTION_ATTR_USERSPACE: - output_userspace(dp, skb, key, a); + output_userspace(dp, skb, key, a, attr, len); break; case OVS_ACTION_ATTR_HASH: @@ -916,7 +923,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, break; case OVS_ACTION_ATTR_SAMPLE: - err = sample(dp, skb, key, a); + err = sample(dp, skb, key, a, attr, len); break; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 3b90461317ec..ff8c4a4c1609 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -272,10 +272,9 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) struct dp_upcall_info upcall; int error; + memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_MISS; - upcall.userdata = NULL; upcall.portid = ovs_vport_find_upcall_portid(p, skb); - upcall.egress_tun_info = NULL; error = ovs_dp_upcall(dp, skb, key, &upcall); if (unlikely(error)) kfree_skb(skb); @@ -397,6 +396,10 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, if (upcall_info->egress_tun_info) size += nla_total_size(ovs_tun_key_attr_size()); + /* OVS_PACKET_ATTR_ACTIONS */ + if (upcall_info->actions_len) + size += nla_total_size(upcall_info->actions_len); + return size; } @@ -478,6 +481,17 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, nla_nest_end(user_skb, nla); } + if (upcall_info->actions_len) { + nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS); + err = ovs_nla_put_actions(upcall_info->actions, + upcall_info->actions_len, + user_skb); + if (!err) + nla_nest_end(user_skb, nla); + else + nla_nest_cancel(user_skb, nla); + } + /* Only reserve room for attribute header, packet data is added * in skb_zerocopy() */ if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 4ec4a480b147..cd691e935e08 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -116,6 +116,8 @@ struct ovs_skb_cb { struct dp_upcall_info { const struct ovs_tunnel_info *egress_tun_info; const struct nlattr *userdata; + const struct nlattr *actions; + int actions_len; u32 portid; u8 cmd; }; -- cgit v1.2.3 From 66e5133f19e901a044fa5eaeeb6ecff4545839e5 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Mon, 1 Jun 2015 21:55:06 +0900 Subject: vlan: Add GRO support for non hardware accelerated vlan Currently packets with non-hardware-accelerated vlan cannot be handled by GRO. This causes low performance for 802.1ad and stacked vlan, as their vlan tags are currently not stripped by hardware. This patch adds GRO support for non-hardware-accelerated vlan and improves receive performance of them. Test Environment: vlan device (.1Q) on vlan device (.1ad) on ixgbe (82599) Result: - Before $ netperf -t TCP_STREAM -H 192.168.20.2 -l 60 Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 60.00 5233.17 Rx side CPU usage: %usr %sys %irq %soft %idle 0.27 58.03 0.00 41.70 0.00 - After $ netperf -t TCP_STREAM -H 192.168.20.2 -l 60 Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 60.00 7586.85 Rx side CPU usage: %usr %sys %irq %soft %idle 0.50 25.83 0.00 59.53 14.14 [ Register VLAN offloads with priority 10 -DaveM ] Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 20 +++++++++++ net/8021q/vlan.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index a40d29846ac2..67ce5bd3b56a 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -628,4 +628,24 @@ static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, return features; } +/** + * compare_vlan_header - Compare two vlan headers + * @h1: Pointer to vlan header + * @h2: Pointer to vlan header + * + * Compare two vlan headers, returns 0 if equal. + * + * Please note that alignment of h1 & h2 are only guaranteed to be 16 bits. + */ +static inline unsigned long compare_vlan_header(const struct vlan_hdr *h1, + const struct vlan_hdr *h2) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + return *(u32 *)h1 ^ *(u32 *)h2; +#else + return ((__force u32)h1->h_vlan_TCI ^ (__force u32)h2->h_vlan_TCI) | + ((__force u32)h1->h_vlan_encapsulated_proto ^ + (__force u32)h2->h_vlan_encapsulated_proto); +#endif +} #endif /* !(_LINUX_IF_VLAN_H_) */ diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 59555f0f8fc8..d2cd9de4b724 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -618,6 +618,92 @@ out: return err; } +static struct sk_buff **vlan_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct sk_buff *p, **pp = NULL; + struct vlan_hdr *vhdr; + unsigned int hlen, off_vlan; + const struct packet_offload *ptype; + __be16 type; + int flush = 1; + + off_vlan = skb_gro_offset(skb); + hlen = off_vlan + sizeof(*vhdr); + vhdr = skb_gro_header_fast(skb, off_vlan); + if (skb_gro_header_hard(skb, hlen)) { + vhdr = skb_gro_header_slow(skb, hlen, off_vlan); + if (unlikely(!vhdr)) + goto out; + } + + type = vhdr->h_vlan_encapsulated_proto; + + rcu_read_lock(); + ptype = gro_find_receive_by_type(type); + if (!ptype) + goto out_unlock; + + flush = 0; + + for (p = *head; p; p = p->next) { + struct vlan_hdr *vhdr2; + + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + vhdr2 = (struct vlan_hdr *)(p->data + off_vlan); + if (compare_vlan_header(vhdr, vhdr2)) + NAPI_GRO_CB(p)->same_flow = 0; + } + + skb_gro_pull(skb, sizeof(*vhdr)); + skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr)); + pp = ptype->callbacks.gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int vlan_gro_complete(struct sk_buff *skb, int nhoff) +{ + struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + nhoff); + __be16 type = vhdr->h_vlan_encapsulated_proto; + struct packet_offload *ptype; + int err = -ENOENT; + + rcu_read_lock(); + ptype = gro_find_complete_by_type(type); + if (ptype) + err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr)); + + rcu_read_unlock(); + return err; +} + +static struct packet_offload vlan_packet_offloads[] __read_mostly = { + { + .type = cpu_to_be16(ETH_P_8021Q), + .priority = 10, + .callbacks = { + .gro_receive = vlan_gro_receive, + .gro_complete = vlan_gro_complete, + }, + }, + { + .type = cpu_to_be16(ETH_P_8021AD), + .priority = 10, + .callbacks = { + .gro_receive = vlan_gro_receive, + .gro_complete = vlan_gro_complete, + }, + }, +}; + static int __net_init vlan_init_net(struct net *net) { struct vlan_net *vn = net_generic(net, vlan_net_id); @@ -645,6 +731,7 @@ static struct pernet_operations vlan_net_ops = { static int __init vlan_proto_init(void) { int err; + unsigned int i; pr_info("%s v%s\n", vlan_fullname, vlan_version); @@ -668,6 +755,9 @@ static int __init vlan_proto_init(void) if (err < 0) goto err5; + for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++) + dev_add_offload(&vlan_packet_offloads[i]); + vlan_ioctl_set(vlan_ioctl_handler); return 0; @@ -685,7 +775,13 @@ err0: static void __exit vlan_cleanup_module(void) { + unsigned int i; + vlan_ioctl_set(NULL); + + for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++) + dev_remove_offload(&vlan_packet_offloads[i]); + vlan_netlink_fini(); unregister_netdevice_notifier(&vlan_notifier_block); -- cgit v1.2.3 From 8760ce58353c2099be35ead62a572ee2d1e83b5b Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 1 Jun 2015 15:51:34 -0400 Subject: geneve: allow user to specify TTL for tunnel frames Signed-off-by: John W. Linville Signed-off-by: David S. Miller --- drivers/net/geneve.c | 18 ++++++++++++++---- include/uapi/linux/if_link.h | 1 + 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index b7eafa4c1a67..1675dfdbfa70 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -44,7 +44,8 @@ struct geneve_dev { struct net *net; /* netns for packet i/o */ struct net_device *dev; /* netdev for geneve tunnel */ struct geneve_sock *sock; /* socket used for geneve tunnel */ - u8 vni[3]; /* virtual network ID for tunnel */ + u8 vni[3]; /* virtual network ID for tunnel */ + u8 ttl; /* TTL override */ struct sockaddr_in remote; /* IPv4 address for link partner */ struct list_head next; /* geneve's per namespace list */ }; @@ -184,7 +185,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) struct flowi4 fl4; int err; __be16 sport; - __u8 tos, ttl = 0; + __u8 tos, ttl; iip = ip_hdr(skb); @@ -207,11 +208,12 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) goto rt_tx_error; } - /* TODO: tos and ttl should be configurable */ + /* TODO: tos should be configurable */ tos = ip_tunnel_ecn_encap(0, iip, skb); - if (IN_MULTICAST(ntohl(fl4.daddr))) + ttl = geneve->ttl; + if (!ttl && IN_MULTICAST(ntohl(fl4.daddr))) ttl = 1; ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); @@ -297,6 +299,7 @@ static void geneve_setup(struct net_device *dev) static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { [IFLA_GENEVE_ID] = { .type = NLA_U32 }, [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, + [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, }; static int geneve_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -364,6 +367,9 @@ static int geneve_newlink(struct net *net, struct net_device *dev, if (err) return err; + if (data[IFLA_GENEVE_TTL]) + geneve->ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); + list_add(&geneve->next, &gn->geneve_list); hlist_add_head_rcu(&geneve->hlist, &gn->vni_list[hash]); @@ -386,6 +392,7 @@ static size_t geneve_get_size(const struct net_device *dev) { return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ nla_total_size(sizeof(struct in_addr)) + /* IFLA_GENEVE_REMOTE */ + nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ 0; } @@ -402,6 +409,9 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) geneve->remote.sin_addr.s_addr)) goto nla_put_failure; + if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl)) + goto nla_put_failure; + return 0; nla_put_failure: diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index afccc9393fef..a834f31db915 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -395,6 +395,7 @@ enum { IFLA_GENEVE_UNSPEC, IFLA_GENEVE_ID, IFLA_GENEVE_REMOTE, + IFLA_GENEVE_TTL, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) -- cgit v1.2.3 From d89511251f6519599b109dc6cda87a6ab314ed8c Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 1 Jun 2015 15:51:35 -0400 Subject: geneve: allow user to specify TOS info for tunnel frames Signed-off-by: John W. Linville Signed-off-by: David S. Miller --- drivers/net/geneve.c | 18 ++++++++++++++---- include/uapi/linux/if_link.h | 1 + 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 1675dfdbfa70..78d49d186e05 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -46,6 +46,7 @@ struct geneve_dev { struct geneve_sock *sock; /* socket used for geneve tunnel */ u8 vni[3]; /* virtual network ID for tunnel */ u8 ttl; /* TTL override */ + u8 tos; /* TOS override */ struct sockaddr_in remote; /* IPv4 address for link partner */ struct list_head next; /* geneve's per namespace list */ }; @@ -194,7 +195,12 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) /* TODO: port min/max limits should be configurable */ sport = udp_flow_src_port(dev_net(dev), skb, 0, 0, true); + tos = geneve->tos; + if (tos == 1) + tos = ip_tunnel_get_dsfield(iip, skb); + memset(&fl4, 0, sizeof(fl4)); + fl4.flowi4_tos = RT_TOS(tos); fl4.daddr = geneve->remote.sin_addr.s_addr; rt = ip_route_output_key(geneve->net, &fl4); if (IS_ERR(rt)) { @@ -208,9 +214,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) goto rt_tx_error; } - /* TODO: tos should be configurable */ - - tos = ip_tunnel_ecn_encap(0, iip, skb); + tos = ip_tunnel_ecn_encap(tos, iip, skb); ttl = geneve->ttl; if (!ttl && IN_MULTICAST(ntohl(fl4.daddr))) @@ -300,6 +304,7 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { [IFLA_GENEVE_ID] = { .type = NLA_U32 }, [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, + [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, }; static int geneve_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -370,6 +375,9 @@ static int geneve_newlink(struct net *net, struct net_device *dev, if (data[IFLA_GENEVE_TTL]) geneve->ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); + if (data[IFLA_GENEVE_TOS]) + geneve->tos = nla_get_u8(data[IFLA_GENEVE_TOS]); + list_add(&geneve->next, &gn->geneve_list); hlist_add_head_rcu(&geneve->hlist, &gn->vni_list[hash]); @@ -393,6 +401,7 @@ static size_t geneve_get_size(const struct net_device *dev) return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ nla_total_size(sizeof(struct in_addr)) + /* IFLA_GENEVE_REMOTE */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ + nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ 0; } @@ -409,7 +418,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) geneve->remote.sin_addr.s_addr)) goto nla_put_failure; - if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl)) + if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl) || + nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos)) goto nla_put_failure; return 0; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index a834f31db915..1737b7a8272b 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -396,6 +396,7 @@ enum { IFLA_GENEVE_ID, IFLA_GENEVE_REMOTE, IFLA_GENEVE_TTL, + IFLA_GENEVE_TOS, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) -- cgit v1.2.3 From db388a567ff9600debc2433c1fddf79a8fc38b21 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 1 Jun 2015 15:36:51 +0200 Subject: mac80211: move TX PN to public part of key struct For drivers supporting TSO or similar features, but that still have PN assignment in software, there's a need to have some memory to store the current PN value. As mac80211 already stores this and it's somewhat complicated to add a per-driver area to the key struct (due to the dynamic sizing thereof) it makes sense to just move the TX PN to the keyconf, i.e. the public part of the key struct. As TKIP is more complicated and we won't able to offload it in this way right now (fast-xmit is skipped for TKIP unless the HW does it all, and our hardware needs MMIC calculation in software) I've not moved that for now - it's possible but requires exposing a lot of the internal TKIP state. As an bonus side effect, we can remove a lot of code by assuming the keyseq struct has a certain layout - with BUILD_BUG_ON to verify it. This might also improve performance, since now TX and RX no longer share a cacheline. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 +++ net/mac80211/cfg.c | 59 ++++++---------------------------- net/mac80211/debugfs_key.c | 17 +--------- net/mac80211/key.c | 80 +++++++++++++--------------------------------- net/mac80211/key.h | 4 --- net/mac80211/tx.c | 10 +----- net/mac80211/wpa.c | 10 +++--- 7 files changed, 42 insertions(+), 142 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 887fe95b9805..39e864b35083 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1479,6 +1479,9 @@ enum ieee80211_key_flags { * wants to be given when a frame is transmitted and needs to be * encrypted in hardware. * @cipher: The key's cipher suite selector. + * @tx_pn: PN used for TX on non-TKIP keys, may be used by the driver + * as well if it needs to do software PN assignment by itself + * (e.g. due to TSO) * @flags: key flags, see &enum ieee80211_key_flags. * @keyidx: the key index (0-3) * @keylen: key material length @@ -1491,6 +1494,7 @@ enum ieee80211_key_flags { * @iv_len: The IV length for this key type */ struct ieee80211_key_conf { + atomic64_t tx_pn; u32 cipher; u8 icv_len; u8 iv_len; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index bb9f83640b46..02f48c848ef5 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -361,66 +361,25 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: - if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && - !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { - drv_get_key_seq(sdata->local, key, &kseq); - memcpy(seq, kseq.ccmp.pn, 6); - } else { - pn64 = atomic64_read(&key->u.ccmp.tx_pn); - seq[0] = pn64; - seq[1] = pn64 >> 8; - seq[2] = pn64 >> 16; - seq[3] = pn64 >> 24; - seq[4] = pn64 >> 32; - seq[5] = pn64 >> 40; - } - params.seq = seq; - params.seq_len = 6; - break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: - if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && - !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { - drv_get_key_seq(sdata->local, key, &kseq); - memcpy(seq, kseq.aes_cmac.pn, 6); - } else { - pn64 = atomic64_read(&key->u.aes_cmac.tx_pn); - seq[0] = pn64; - seq[1] = pn64 >> 8; - seq[2] = pn64 >> 16; - seq[3] = pn64 >> 24; - seq[4] = pn64 >> 32; - seq[5] = pn64 >> 40; - } - params.seq = seq; - params.seq_len = 6; - break; + BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != + offsetof(typeof(kseq), aes_cmac)); case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: - if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && - !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { - drv_get_key_seq(sdata->local, key, &kseq); - memcpy(seq, kseq.aes_gmac.pn, 6); - } else { - pn64 = atomic64_read(&key->u.aes_gmac.tx_pn); - seq[0] = pn64; - seq[1] = pn64 >> 8; - seq[2] = pn64 >> 16; - seq[3] = pn64 >> 24; - seq[4] = pn64 >> 32; - seq[5] = pn64 >> 40; - } - params.seq = seq; - params.seq_len = 6; - break; + BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != + offsetof(typeof(kseq), aes_gmac)); case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: + BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != + offsetof(typeof(kseq), gcmp)); + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { drv_get_key_seq(sdata->local, key, &kseq); - memcpy(seq, kseq.gcmp.pn, 6); + memcpy(seq, kseq.ccmp.pn, 6); } else { - pn64 = atomic64_read(&key->u.gcmp.tx_pn); + pn64 = atomic64_read(&key->conf.tx_pn); seq[0] = pn64; seq[1] = pn64 >> 8; seq[2] = pn64 >> 16; diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 71ac1b5f4da5..e82bf1e9d7a8 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -95,28 +95,13 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf, break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: - pn = atomic64_read(&key->u.ccmp.tx_pn); - len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", - (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24), - (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn); - break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: - pn = atomic64_read(&key->u.aes_cmac.tx_pn); - len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", - (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24), - (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn); - break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: - pn = atomic64_read(&key->u.aes_gmac.tx_pn); - len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", - (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24), - (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn); - break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: - pn = atomic64_read(&key->u.gcmp.tx_pn); + pn = atomic64_read(&key->conf.tx_pn); len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24), (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 2e677376c958..9a4a4bfafdc2 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -832,27 +832,19 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf, break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: - pn64 = atomic64_read(&key->u.ccmp.tx_pn); - seq->ccmp.pn[5] = pn64; - seq->ccmp.pn[4] = pn64 >> 8; - seq->ccmp.pn[3] = pn64 >> 16; - seq->ccmp.pn[2] = pn64 >> 24; - seq->ccmp.pn[1] = pn64 >> 32; - seq->ccmp.pn[0] = pn64 >> 40; - break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: - pn64 = atomic64_read(&key->u.aes_cmac.tx_pn); - seq->ccmp.pn[5] = pn64; - seq->ccmp.pn[4] = pn64 >> 8; - seq->ccmp.pn[3] = pn64 >> 16; - seq->ccmp.pn[2] = pn64 >> 24; - seq->ccmp.pn[1] = pn64 >> 32; - seq->ccmp.pn[0] = pn64 >> 40; - break; + BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) != + offsetof(typeof(*seq), aes_cmac)); case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: - pn64 = atomic64_read(&key->u.aes_gmac.tx_pn); + BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) != + offsetof(typeof(*seq), aes_gmac)); + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) != + offsetof(typeof(*seq), gcmp)); + pn64 = atomic64_read(&key->conf.tx_pn); seq->ccmp.pn[5] = pn64; seq->ccmp.pn[4] = pn64 >> 8; seq->ccmp.pn[3] = pn64 >> 16; @@ -860,16 +852,6 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf, seq->ccmp.pn[1] = pn64 >> 32; seq->ccmp.pn[0] = pn64 >> 40; break; - case WLAN_CIPHER_SUITE_GCMP: - case WLAN_CIPHER_SUITE_GCMP_256: - pn64 = atomic64_read(&key->u.gcmp.tx_pn); - seq->gcmp.pn[5] = pn64; - seq->gcmp.pn[4] = pn64 >> 8; - seq->gcmp.pn[3] = pn64 >> 16; - seq->gcmp.pn[2] = pn64 >> 24; - seq->gcmp.pn[1] = pn64 >> 32; - seq->gcmp.pn[0] = pn64 >> 40; - break; default: WARN_ON(1); } @@ -944,43 +926,25 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf, break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: - pn64 = (u64)seq->ccmp.pn[5] | - ((u64)seq->ccmp.pn[4] << 8) | - ((u64)seq->ccmp.pn[3] << 16) | - ((u64)seq->ccmp.pn[2] << 24) | - ((u64)seq->ccmp.pn[1] << 32) | - ((u64)seq->ccmp.pn[0] << 40); - atomic64_set(&key->u.ccmp.tx_pn, pn64); - break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: - pn64 = (u64)seq->aes_cmac.pn[5] | - ((u64)seq->aes_cmac.pn[4] << 8) | - ((u64)seq->aes_cmac.pn[3] << 16) | - ((u64)seq->aes_cmac.pn[2] << 24) | - ((u64)seq->aes_cmac.pn[1] << 32) | - ((u64)seq->aes_cmac.pn[0] << 40); - atomic64_set(&key->u.aes_cmac.tx_pn, pn64); - break; + BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) != + offsetof(typeof(*seq), aes_cmac)); case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: - pn64 = (u64)seq->aes_gmac.pn[5] | - ((u64)seq->aes_gmac.pn[4] << 8) | - ((u64)seq->aes_gmac.pn[3] << 16) | - ((u64)seq->aes_gmac.pn[2] << 24) | - ((u64)seq->aes_gmac.pn[1] << 32) | - ((u64)seq->aes_gmac.pn[0] << 40); - atomic64_set(&key->u.aes_gmac.tx_pn, pn64); - break; + BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) != + offsetof(typeof(*seq), aes_gmac)); case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: - pn64 = (u64)seq->gcmp.pn[5] | - ((u64)seq->gcmp.pn[4] << 8) | - ((u64)seq->gcmp.pn[3] << 16) | - ((u64)seq->gcmp.pn[2] << 24) | - ((u64)seq->gcmp.pn[1] << 32) | - ((u64)seq->gcmp.pn[0] << 40); - atomic64_set(&key->u.gcmp.tx_pn, pn64); + BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) != + offsetof(typeof(*seq), gcmp)); + pn64 = (u64)seq->ccmp.pn[5] | + ((u64)seq->ccmp.pn[4] << 8) | + ((u64)seq->ccmp.pn[3] << 16) | + ((u64)seq->ccmp.pn[2] << 24) | + ((u64)seq->ccmp.pn[1] << 32) | + ((u64)seq->ccmp.pn[0] << 40); + atomic64_set(&key->conf.tx_pn, pn64); break; default: WARN_ON(1); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index df430a618764..ac747e442139 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -77,7 +77,6 @@ struct ieee80211_key { u32 mic_failures; } tkip; struct { - atomic64_t tx_pn; /* * Last received packet number. The first * IEEE80211_NUM_TIDS counters are used with Data @@ -89,21 +88,18 @@ struct ieee80211_key { u32 replays; /* dot11RSNAStatsCCMPReplays */ } ccmp; struct { - atomic64_t tx_pn; u8 rx_pn[IEEE80211_CMAC_PN_LEN]; struct crypto_cipher *tfm; u32 replays; /* dot11RSNAStatsCMACReplays */ u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ } aes_cmac; struct { - atomic64_t tx_pn; u8 rx_pn[IEEE80211_GMAC_PN_LEN]; struct crypto_aead *tfm; u32 replays; /* dot11RSNAStatsCMACReplays */ u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ } aes_gmac; struct { - atomic64_t tx_pn; /* Last received packet number. The first * IEEE80211_NUM_TIDS counters are used with Data * frames and the last counter is used with Robust diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8df134213adf..dbef1b881b3d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2813,17 +2813,9 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, switch (fast_tx->key->conf.cipher) { case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: - pn = atomic64_inc_return(&fast_tx->key->u.ccmp.tx_pn); - crypto_hdr[0] = pn; - crypto_hdr[1] = pn >> 8; - crypto_hdr[4] = pn >> 16; - crypto_hdr[5] = pn >> 24; - crypto_hdr[6] = pn >> 32; - crypto_hdr[7] = pn >> 40; - break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: - pn = atomic64_inc_return(&fast_tx->key->u.gcmp.tx_pn); + pn = atomic64_inc_return(&fast_tx->key->conf.tx_pn); crypto_hdr[0] = pn; crypto_hdr[1] = pn >> 8; crypto_hdr[4] = pn >> 16; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 9d63d93c836e..943f7606527e 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -444,7 +444,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, hdr = (struct ieee80211_hdr *) pos; pos += hdrlen; - pn64 = atomic64_inc_return(&key->u.ccmp.tx_pn); + pn64 = atomic64_inc_return(&key->conf.tx_pn); pn[5] = pn64; pn[4] = pn64 >> 8; @@ -670,7 +670,7 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) hdr = (struct ieee80211_hdr *)pos; pos += hdrlen; - pn64 = atomic64_inc_return(&key->u.gcmp.tx_pn); + pn64 = atomic64_inc_return(&key->conf.tx_pn); pn[5] = pn64; pn[4] = pn64 >> 8; @@ -940,7 +940,7 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx) mmie->key_id = cpu_to_le16(key->conf.keyidx); /* PN = PN + 1 */ - pn64 = atomic64_inc_return(&key->u.aes_cmac.tx_pn); + pn64 = atomic64_inc_return(&key->conf.tx_pn); bip_ipn_set64(mmie->sequence_number, pn64); @@ -984,7 +984,7 @@ ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx) mmie->key_id = cpu_to_le16(key->conf.keyidx); /* PN = PN + 1 */ - pn64 = atomic64_inc_return(&key->u.aes_cmac.tx_pn); + pn64 = atomic64_inc_return(&key->conf.tx_pn); bip_ipn_set64(mmie->sequence_number, pn64); @@ -1129,7 +1129,7 @@ ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx) mmie->key_id = cpu_to_le16(key->conf.keyidx); /* PN = PN + 1 */ - pn64 = atomic64_inc_return(&key->u.aes_gmac.tx_pn); + pn64 = atomic64_inc_return(&key->conf.tx_pn); bip_ipn_set64(mmie->sequence_number, pn64); -- cgit v1.2.3 From 3b79af973cf42de059d0e90e20fd145d7ed8c5c1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 1 Jun 2015 23:14:59 +0200 Subject: mac80211: stop using pointers as userspace cookies Even if the pointers are really only accessible to root and used pretty much only by wpa_supplicant, this is still not great; even for debugging it'd be easier to have something that's easier to read and guaranteed to never get reused. With the recent change to make mac80211 create an ack_skb for the mgmt-tx path this becomes possible, only the client probe method needs to also allocate an ack_skb, and we can store the cookie in that skb. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 ++ net/mac80211/cfg.c | 115 +++++++++++++++++++++++++++++++------------------ net/mac80211/status.c | 27 ++++++------ 3 files changed, 90 insertions(+), 55 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 39e864b35083..7466c55bfc0b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -874,6 +874,9 @@ struct ieee80211_tx_info { u32 flags; /* 4 bytes free */ } control; + struct { + u64 cookie; + } ack; struct { struct ieee80211_tx_rate rates[IEEE80211_TX_MAX_RATES]; s32 ack_signal; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5ba528f13300..1a17d3208d8f 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2546,6 +2546,19 @@ static bool ieee80211_coalesce_started_roc(struct ieee80211_local *local, return true; } +static u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local) +{ + lockdep_assert_held(&local->mtx); + + local->roc_cookie_counter++; + + /* wow, you wrapped 64 bits ... more likely a bug */ + if (WARN_ON(local->roc_cookie_counter == 0)) + local->roc_cookie_counter++; + + return local->roc_cookie_counter; +} + static int ieee80211_start_roc_work(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel, @@ -2583,7 +2596,6 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, roc->req_duration = duration; roc->frame = txskb; roc->type = type; - roc->mgmt_tx_cookie = (unsigned long)txskb; roc->sdata = sdata; INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); INIT_LIST_HEAD(&roc->dependents); @@ -2593,17 +2605,10 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, * or the SKB (for mgmt TX) */ if (!txskb) { - /* local->mtx protects this */ - local->roc_cookie_counter++; - roc->cookie = local->roc_cookie_counter; - /* wow, you wrapped 64 bits ... more likely a bug */ - if (WARN_ON(roc->cookie == 0)) { - roc->cookie = 1; - local->roc_cookie_counter++; - } + roc->cookie = ieee80211_mgmt_tx_cookie(local); *cookie = roc->cookie; } else { - *cookie = (unsigned long)txskb; + roc->mgmt_tx_cookie = *cookie; } /* if there's one pending or we're scanning, queue this one */ @@ -3284,6 +3289,36 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, return err; } +static struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, + struct sk_buff *skb, u64 *cookie, + gfp_t gfp) +{ + unsigned long spin_flags; + struct sk_buff *ack_skb; + int id; + + ack_skb = skb_copy(skb, gfp); + if (!ack_skb) + return ERR_PTR(-ENOMEM); + + spin_lock_irqsave(&local->ack_status_lock, spin_flags); + id = idr_alloc(&local->ack_status_frames, ack_skb, + 1, 0x10000, GFP_ATOMIC); + spin_unlock_irqrestore(&local->ack_status_lock, spin_flags); + + if (id < 0) { + kfree_skb(ack_skb); + return ERR_PTR(-ENOMEM); + } + + IEEE80211_SKB_CB(skb)->ack_frame_id = id; + + *cookie = ieee80211_mgmt_tx_cookie(local); + IEEE80211_SKB_CB(ack_skb)->ack.cookie = *cookie; + + return ack_skb; +} + static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_mgmt_tx_params *params, u64 *cookie) @@ -3429,40 +3464,22 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, skb->dev = sdata->dev; if (!params->dont_wait_for_ack) { - unsigned long spin_flags; - int id; - - /* make a copy to preserve the original cookie (in case the - * driver decides to reallocate the skb) and the frame contents + /* make a copy to preserve the frame contents * in case of encryption. */ - ack_skb = skb_copy(skb, GFP_KERNEL); - if (!ack_skb) { - ret = -ENOMEM; - kfree_skb(skb); - goto out_unlock; - } - - spin_lock_irqsave(&local->ack_status_lock, spin_flags); - id = idr_alloc(&local->ack_status_frames, ack_skb, - 1, 0x10000, GFP_ATOMIC); - spin_unlock_irqrestore(&local->ack_status_lock, spin_flags); - - if (id < 0) { - ret = -ENOMEM; - kfree_skb(ack_skb); + ack_skb = ieee80211_make_ack_skb(local, skb, cookie, + GFP_KERNEL); + if (IS_ERR(ack_skb)) { + ret = PTR_ERR(ack_skb); kfree_skb(skb); goto out_unlock; } - - IEEE80211_SKB_CB(skb)->ack_frame_id = id; } else { /* for cookie below */ ack_skb = skb; } if (!need_offchan) { - *cookie = (unsigned long)ack_skb; ieee80211_tx_skb(sdata, skb); ret = 0; goto out_unlock; @@ -3555,7 +3572,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_qos_hdr *nullfunc; - struct sk_buff *skb; + struct sk_buff *skb, *ack_skb; int size = sizeof(*nullfunc); __le16 fc; bool qos; @@ -3563,20 +3580,24 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; enum ieee80211_band band; + int ret; + + /* the lock is needed to assign the cookie later */ + mutex_lock(&local->mtx); rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (WARN_ON(!chanctx_conf)) { - rcu_read_unlock(); - return -EINVAL; + ret = -EINVAL; + goto unlock; } band = chanctx_conf->def.chan->band; sta = sta_info_get_bss(sdata, peer); if (sta) { qos = sta->sta.wme; } else { - rcu_read_unlock(); - return -ENOLINK; + ret = -ENOLINK; + goto unlock; } if (qos) { @@ -3592,8 +3613,8 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, skb = dev_alloc_skb(local->hw.extra_tx_headroom + size); if (!skb) { - rcu_read_unlock(); - return -ENOMEM; + ret = -ENOMEM; + goto unlock; } skb->dev = dev; @@ -3619,13 +3640,23 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, if (qos) nullfunc->qos_ctrl = cpu_to_le16(7); + ack_skb = ieee80211_make_ack_skb(local, skb, cookie, GFP_ATOMIC); + if (IS_ERR(ack_skb)) { + kfree_skb(skb); + ret = PTR_ERR(ack_skb); + goto unlock; + } + local_bh_disable(); ieee80211_xmit(sdata, sta, skb); local_bh_enable(); + + ret = 0; +unlock: rcu_read_unlock(); + mutex_unlock(&local->mtx); - *cookie = (unsigned long) skb; - return 0; + return ret; } static int ieee80211_cfg_get_channel(struct wiphy *wiphy, diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 56b73e012757..67c428735a51 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -471,15 +471,23 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, } if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { + u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie; struct ieee80211_sub_if_data *sdata; + struct ieee80211_hdr *hdr = (void *)skb->data; rcu_read_lock(); sdata = ieee80211_sdata_from_skb(local, skb); - if (sdata) - cfg80211_mgmt_tx_status(&sdata->wdev, - (unsigned long)skb, - skb->data, skb->len, - acked, GFP_ATOMIC); + if (sdata) { + if (ieee80211_is_nullfunc(hdr->frame_control) || + ieee80211_is_qos_nullfunc(hdr->frame_control)) + cfg80211_probe_status(sdata->dev, hdr->addr1, + cookie, acked, + GFP_ATOMIC); + else + cfg80211_mgmt_tx_status(&sdata->wdev, cookie, + skb->data, skb->len, + acked, GFP_ATOMIC); + } rcu_read_unlock(); dev_kfree_skb_any(skb); @@ -499,11 +507,8 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, if (dropped) acked = false; - if (info->flags & (IEEE80211_TX_INTFL_NL80211_FRAME_TX | - IEEE80211_TX_INTFL_MLME_CONN_TX) && - !info->ack_frame_id) { + if (info->flags & IEEE80211_TX_INTFL_MLME_CONN_TX) { struct ieee80211_sub_if_data *sdata; - u64 cookie = (unsigned long)skb; rcu_read_lock(); @@ -525,10 +530,6 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, ieee80211_mgd_conn_tx_status(sdata, hdr->frame_control, acked); - } else if (ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control)) { - cfg80211_probe_status(sdata->dev, hdr->addr1, - cookie, acked, GFP_ATOMIC); } else { /* we assign ack frame ID for the others */ WARN_ON(1); -- cgit v1.2.3 From ea1b2b45f513c6f9ee49b465b1a9281feb783532 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jun 2015 20:15:49 +0200 Subject: mac80211: remove short slot/short preamble incapable flags There are no drivers setting IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE or IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE, so any code using the two flags is dead; it's also exceedingly unlikely that any new driver could ever need to set these flags. The wcn36xx code is almost certainly broken, but this preserves the previous behaviour. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/wcn36xx/smd.c | 4 +--- include/net/mac80211.h | 17 ++--------------- net/mac80211/debugfs.c | 7 ------- net/mac80211/mesh_plink.c | 4 +--- net/mac80211/mlme.c | 6 ++---- net/mac80211/tdls.c | 17 +++++------------ 6 files changed, 11 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c index dbd894428be6..c9263e1c75d4 100644 --- a/drivers/net/wireless/ath/wcn36xx/smd.c +++ b/drivers/net/wireless/ath/wcn36xx/smd.c @@ -216,9 +216,7 @@ static void wcn36xx_smd_set_sta_params(struct wcn36xx *wcn, memcpy(&sta_params->bssid, vif->addr, ETH_ALEN); sta_params->encrypt_type = priv_vif->encrypt_type; - sta_params->short_preamble_supported = - !(WCN36XX_FLAGS(wcn) & - IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE); + sta_params->short_preamble_supported = true; sta_params->rifs_mode = 0; sta_params->rmf = 0; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7466c55bfc0b..68a3cc9796d2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -446,12 +446,8 @@ struct ieee80211_event { * @ibss_creator: indicates if a new IBSS network is being created * @aid: association ID number, valid only when @assoc is true * @use_cts_prot: use CTS protection - * @use_short_preamble: use 802.11b short preamble; - * if the hardware cannot handle this it must set the - * IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE hardware flag - * @use_short_slot: use short slot time (only relevant for ERP); - * if the hardware cannot handle this it must set the - * IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE hardware flag + * @use_short_preamble: use 802.11b short preamble + * @use_short_slot: use short slot time (only relevant for ERP) * @dtim_period: num of beacons before the next DTIM, for beaconing, * valid in station mode only if after the driver was notified * with the %BSS_CHANGED_BEACON_INFO flag, will be non-zero then. @@ -1784,13 +1780,6 @@ struct ieee80211_txq { * multicast frames when there are power saving stations so that * the driver can fetch them with ieee80211_get_buffered_bc(). * - * @IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE: - * Hardware is not capable of short slot operation on the 2.4 GHz band. - * - * @IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE: - * Hardware is not capable of receiving frames with short preamble on - * the 2.4 GHz band. - * * @IEEE80211_HW_SIGNAL_UNSPEC: * Hardware can provide signal values but we don't know its units. We * expect values between 0 and @max_signal. @@ -1903,8 +1892,6 @@ enum ieee80211_hw_flags { IEEE80211_HW_HAS_RATE_CONTROL = 1<<0, IEEE80211_HW_RX_INCLUDES_FCS = 1<<1, IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING = 1<<2, - IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE = 1<<3, - IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE = 1<<4, IEEE80211_HW_SIGNAL_UNSPEC = 1<<5, IEEE80211_HW_SIGNAL_DBM = 1<<6, IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC = 1<<7, diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index b17206db49b4..1b94d2704c27 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -1,4 +1,3 @@ - /* * mac80211 debugfs for wireless PHYs * @@ -112,12 +111,6 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, if (local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) sf += scnprintf(buf + sf, mxln - sf, "HOST_BCAST_PS_BUFFERING\n"); - if (local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE) - sf += scnprintf(buf + sf, mxln - sf, - "2GHZ_SHORT_SLOT_INCAPABLE\n"); - if (local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE) - sf += scnprintf(buf + sf, mxln - sf, - "2GHZ_SHORT_PREAMBLE_INCAPABLE\n"); if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) sf += scnprintf(buf + sf, mxln - sf, "SIGNAL_UNSPEC\n"); if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index ac843fc88745..72a127e8a1b6 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -106,9 +106,7 @@ static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata) /* (IEEE 802.11-2012 19.4.5) */ short_slot = true; goto out; - } else if (band != IEEE80211_BAND_2GHZ || - (band == IEEE80211_BAND_2GHZ && - local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) + } else if (band != IEEE80211_BAND_2GHZ) goto out; for (i = 0; i < sband->n_bitrates; i++) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 853cfa71b96a..94274ef569fa 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -669,10 +669,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) capab = WLAN_CAPABILITY_ESS; if (sband->band == IEEE80211_BAND_2GHZ) { - if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) - capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME; - if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE)) - capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; + capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME; + capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; } if (assoc_data->capability & WLAN_CAPABILITY_PRIVACY) diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 75e8e3bba538..28298cc50326 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -167,23 +167,16 @@ static void ieee80211_tdls_add_bss_coex_ie(struct sk_buff *skb) static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata, u16 status_code) { - struct ieee80211_local *local = sdata->local; - u16 capab; - /* The capability will be 0 when sending a failure code */ if (status_code != 0) return 0; - capab = 0; - if (ieee80211_get_sdata_band(sdata) != IEEE80211_BAND_2GHZ) - return capab; - - if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) - capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME; - if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE)) - capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; + if (ieee80211_get_sdata_band(sdata) == IEEE80211_BAND_2GHZ) { + return WLAN_CAPABILITY_SHORT_SLOT_TIME | + WLAN_CAPABILITY_SHORT_PREAMBLE; + } - return capab; + return 0; } static void ieee80211_tdls_add_link_ie(struct ieee80211_sub_if_data *sdata, -- cgit v1.2.3 From c526a467671960922b5cb5fc385a1813602526bc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jun 2015 20:32:00 +0200 Subject: mac80211: rename single hw-scan flag to follow naming convention The naming convention is to always have the flags prefixed with IEEE80211_HW_ so they're 'namespaced', make this flag follow it. Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 2 +- include/net/mac80211.h | 4 ++-- net/mac80211/scan.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 84555170b6f7..6b45b96ac238 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -450,7 +450,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) !iwlwifi_mod_params.sw_crypto) hw->flags |= IEEE80211_HW_MFP_CAPABLE; - hw->flags |= IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS; + hw->flags |= IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS; hw->wiphy->features |= NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR | NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 68a3cc9796d2..e09a32cb139f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1885,7 +1885,7 @@ struct ieee80211_txq { * @IEEE80211_HW_SUPPORTS_CLONED_SKBS: The driver will never modify the payload * or tailroom of TX skbs without copying them first. * - * @IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS: The HW supports scanning on all bands + * @IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS: The HW supports scanning on all bands * in one command, mac80211 doesn't have to run separate scans per band. */ enum ieee80211_hw_flags { @@ -1917,7 +1917,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_HT_CCK_RATES = 1<<27, IEEE80211_HW_CHANCTX_STA_CSA = 1<<28, IEEE80211_HW_SUPPORTS_CLONED_SKBS = 1<<29, - IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS = 1<<30, + IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS = 1<<30, }; /** diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 7bb6a9383f58..3eb121d2aa45 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -257,7 +257,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) if (test_bit(SCAN_HW_CANCELLED, &local->scanning)) return false; - if (local->hw.flags & IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS) { + if (local->hw.flags & IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS) { for (i = 0; i < req->n_channels; i++) { local->hw_scan_req->req.channels[i] = req->channels[i]; bands_used |= BIT(req->channels[i]->band); @@ -326,7 +326,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) return; if (hw_scan && !aborted && - !(local->hw.flags & IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS) && + !(local->hw.flags & IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS) && ieee80211_prep_hw_scan(local)) { int rc; @@ -520,7 +520,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, local->hw_scan_ies_bufsize = local->scan_ies_len + req->ie_len; - if (local->hw.flags & IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS) { + if (local->hw.flags & IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS) { int i, n_bands = 0; u8 bands_counted = 0; -- cgit v1.2.3 From 2a10154abcb75ad0d7b6bfea6210ac743ec60897 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Tue, 2 Jun 2015 09:34:37 -0500 Subject: net: phy: dp83867: Add TI dp83867 phy Add support for the TI dp83867 Gigabit ethernet phy device. The DP83867 is a robust, low power, fully featured Physical Layer transceiver with integrated PMD sublayers to support 10BASE-T, 100BASE-TX and 1000BASE-T Ethernet protocols. Signed-off-by: Dan Murphy Signed-off-by: David S. Miller --- .../devicetree/bindings/net/ti,dp83867.txt | 19 ++ drivers/net/phy/Kconfig | 6 +- drivers/net/phy/Makefile | 1 + drivers/net/phy/dp83867.c | 239 +++++++++++++++++++++ include/dt-bindings/net/ti-dp83867.h | 45 ++++ 5 files changed, 309 insertions(+), 1 deletion(-) create mode 100644 Documentation/devicetree/bindings/net/ti,dp83867.txt create mode 100644 drivers/net/phy/dp83867.c create mode 100644 include/dt-bindings/net/ti-dp83867.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt new file mode 100644 index 000000000000..46bb67a222ea --- /dev/null +++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt @@ -0,0 +1,19 @@ +* Texas Instruments - dp83867 Giga bit ethernet phy + +Required properties: + - reg - The ID number for the phy, usually a small integer + - ti,rx_int_delay - RGMII Recieve Clock Delay - see dt-bindings/net/ti-dp83867.h + for applicable values + - ti,tx_int_delay - RGMII Transmit Clock Delay - see dt-bindings/net/ti-dp83867.h + for applicable values + - ti,fifo_depth - Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h + for applicable values + +Example: + + ethernet-phy@0 { + reg = <0>; + ti,rx_int_delay = ; + ti,tx_int_delay = ; + ti,fifo_depth = ; + }; diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 7c0cb87d1f2f..cf18940f4e84 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -112,6 +112,11 @@ config MICREL_PHY ---help--- Supports the KSZ9021, VSC8201, KS8001 PHYs. +config DP83867_PHY + tristate "Drivers for Texas Instruments DP83867 Gigabit PHY" + ---help--- + Currently supports the DP83867 PHY. + config FIXED_PHY tristate "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs" depends on PHYLIB @@ -205,7 +210,6 @@ config MDIO_BCM_UNIMAC This hardware can be found in the Broadcom GENET Ethernet MAC controllers as well as some Broadcom Ethernet switches such as the Starfighter 2 switches. - endif # PHYLIB config MICREL_KS8995MA diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index e97e7f921862..fcc25a0c45cd 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_MDIO_BITBANG) += mdio-bitbang.o obj-$(CONFIG_MDIO_GPIO) += mdio-gpio.o obj-$(CONFIG_NATIONAL_PHY) += national.o obj-$(CONFIG_DP83640_PHY) += dp83640.o +obj-$(CONFIG_DP83867_PHY) += dp83867.o obj-$(CONFIG_STE10XP) += ste10Xp.o obj-$(CONFIG_MICREL_PHY) += micrel.o obj-$(CONFIG_MDIO_OCTEON) += mdio-octeon.o diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c new file mode 100644 index 000000000000..ef0b4eb15f8d --- /dev/null +++ b/drivers/net/phy/dp83867.c @@ -0,0 +1,239 @@ +/* + * Driver for the Texas Instruments DP83867 PHY + * + * Copyright (C) 2015 Texas Instruments Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#define DP83867_PHY_ID 0x2000a231 +#define DP83867_DEVADDR 0x1f + +#define MII_DP83867_PHYCTRL 0x10 +#define MII_DP83867_MICR 0x12 +#define MII_DP83867_ISR 0x13 +#define DP83867_CTRL 0x1f + +/* Extended Registers */ +#define DP83867_RGMIICTL 0x0032 +#define DP83867_RGMIIDCTL 0x0086 + +#define DP83867_SW_RESET BIT(15) +#define DP83867_SW_RESTART BIT(14) + +/* MICR Interrupt bits */ +#define MII_DP83867_MICR_AN_ERR_INT_EN BIT(15) +#define MII_DP83867_MICR_SPEED_CHNG_INT_EN BIT(14) +#define MII_DP83867_MICR_DUP_MODE_CHNG_INT_EN BIT(13) +#define MII_DP83867_MICR_PAGE_RXD_INT_EN BIT(12) +#define MII_DP83867_MICR_AUTONEG_COMP_INT_EN BIT(11) +#define MII_DP83867_MICR_LINK_STS_CHNG_INT_EN BIT(10) +#define MII_DP83867_MICR_FALSE_CARRIER_INT_EN BIT(8) +#define MII_DP83867_MICR_SLEEP_MODE_CHNG_INT_EN BIT(4) +#define MII_DP83867_MICR_WOL_INT_EN BIT(3) +#define MII_DP83867_MICR_XGMII_ERR_INT_EN BIT(2) +#define MII_DP83867_MICR_POL_CHNG_INT_EN BIT(1) +#define MII_DP83867_MICR_JABBER_INT_EN BIT(0) + +/* RGMIICTL bits */ +#define DP83867_RGMII_TX_CLK_DELAY_EN BIT(1) +#define DP83867_RGMII_RX_CLK_DELAY_EN BIT(0) + +/* PHY CTRL bits */ +#define DP83867_PHYCR_FIFO_DEPTH_SHIFT 14 + +/* RGMIIDCTL bits */ +#define DP83867_RGMII_TX_CLK_DELAY_SHIFT 4 + +struct dp83867_private { + int rx_id_delay; + int tx_id_delay; + int fifo_depth; +}; + +static int dp83867_ack_interrupt(struct phy_device *phydev) +{ + int err = phy_read(phydev, MII_DP83867_ISR); + + if (err < 0) + return err; + + return 0; +} + +static int dp83867_config_intr(struct phy_device *phydev) +{ + int micr_status; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + micr_status = phy_read(phydev, MII_DP83867_MICR); + if (micr_status < 0) + return micr_status; + + micr_status |= + (MII_DP83867_MICR_AN_ERR_INT_EN | + MII_DP83867_MICR_SPEED_CHNG_INT_EN | + MII_DP83867_MICR_DUP_MODE_CHNG_INT_EN | + MII_DP83867_MICR_SLEEP_MODE_CHNG_INT_EN); + + return phy_write(phydev, MII_DP83867_MICR, micr_status); + } + + micr_status = 0x0; + return phy_write(phydev, MII_DP83867_MICR, micr_status); +} + +#ifdef CONFIG_OF_MDIO +static int dp83867_of_init(struct phy_device *phydev) +{ + struct dp83867_private *dp83867 = phydev->priv; + struct device *dev = &phydev->dev; + struct device_node *of_node = dev->of_node; + int ret; + + if (!of_node && dev->parent->of_node) + of_node = dev->parent->of_node; + + if (!phydev->dev.of_node) + return -ENODEV; + + ret = of_property_read_u32(of_node, "ti,rx_int_delay", + &dp83867->rx_id_delay); + if (ret) + return ret; + + ret = of_property_read_u32(of_node, "ti,tx_int_delay", + &dp83867->tx_id_delay); + if (ret) + return ret; + + ret = of_property_read_u32(of_node, "ti,fifo_depth", + &dp83867->fifo_depth); + if (ret) + return ret; + + return 0; +} +#else +static int dp83867_of_init(struct phy_device *phydev) +{ + return 0; +} +#endif /* CONFIG_OF_MDIO */ + +static int dp83867_config_init(struct phy_device *phydev) +{ + struct dp83867_private *dp83867; + int ret; + u16 val, delay; + + if (!phydev->priv) { + dp83867 = devm_kzalloc(&phydev->dev, sizeof(*dp83867), + GFP_KERNEL); + if (!dp83867) + return -ENOMEM; + + phydev->priv = dp83867; + ret = dp83867_of_init(phydev); + if (ret) + return ret; + } else { + dp83867 = (struct dp83867_private *)phydev->priv; + } + + if (phy_interface_is_rgmii(phydev)) { + ret = phy_write(phydev, MII_DP83867_PHYCTRL, + (dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT)); + if (ret) + return ret; + } + + if ((phydev->interface >= PHY_INTERFACE_MODE_RGMII_ID) || + (phydev->interface <= PHY_INTERFACE_MODE_RGMII_RXID)) { + val = phy_read_mmd_indirect(phydev, DP83867_RGMIICTL, + DP83867_DEVADDR, phydev->addr); + + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) + val |= (DP83867_RGMII_TX_CLK_DELAY_EN | DP83867_RGMII_RX_CLK_DELAY_EN); + + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) + val |= DP83867_RGMII_TX_CLK_DELAY_EN; + + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) + val |= DP83867_RGMII_RX_CLK_DELAY_EN; + + phy_write_mmd_indirect(phydev, DP83867_RGMIICTL, + DP83867_DEVADDR, phydev->addr, val); + + delay = (dp83867->rx_id_delay | + (dp83867->tx_id_delay << DP83867_RGMII_TX_CLK_DELAY_SHIFT)); + + phy_write_mmd_indirect(phydev, DP83867_RGMIIDCTL, + DP83867_DEVADDR, phydev->addr, delay); + } + + return 0; +} + +static int dp83867_phy_reset(struct phy_device *phydev) +{ + int err; + + err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESET); + if (err < 0) + return err; + + return dp83867_config_init(phydev); +} + +static struct phy_driver dp83867_driver[] = { + { + .phy_id = DP83867_PHY_ID, + .phy_id_mask = 0xfffffff0, + .name = "TI DP83867", + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + + .config_init = dp83867_config_init, + .soft_reset = dp83867_phy_reset, + + /* IRQ related */ + .ack_interrupt = dp83867_ack_interrupt, + .config_intr = dp83867_config_intr, + + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .suspend = genphy_suspend, + .resume = genphy_resume, + + .driver = {.owner = THIS_MODULE,} + }, +}; +module_phy_driver(dp83867_driver); + +static struct mdio_device_id __maybe_unused dp83867_tbl[] = { + { DP83867_PHY_ID, 0xfffffff0 }, + { } +}; + +MODULE_DEVICE_TABLE(mdio, dp83867_tbl); + +MODULE_DESCRIPTION("Texas Instruments DP83867 PHY driver"); +MODULE_AUTHOR("Dan Murphy + * + * Copyright: (C) 2015 Texas Instruments, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _DT_BINDINGS_TI_DP83867_H +#define _DT_BINDINGS_TI_DP83867_H + +/* PHY CTRL bits */ +#define DP83867_PHYCR_FIFO_DEPTH_3_B_NIB 0x00 +#define DP83867_PHYCR_FIFO_DEPTH_4_B_NIB 0x01 +#define DP83867_PHYCR_FIFO_DEPTH_6_B_NIB 0x02 +#define DP83867_PHYCR_FIFO_DEPTH_8_B_NIB 0x03 + +/* RGMIIDCTL internal delay for rx and tx */ +#define DP83867_RGMIIDCTL_250_PS 0x0 +#define DP83867_RGMIIDCTL_500_PS 0x1 +#define DP83867_RGMIIDCTL_750_PS 0x2 +#define DP83867_RGMIIDCTL_1_NS 0x3 +#define DP83867_RGMIIDCTL_1_25_NS 0x4 +#define DP83867_RGMIIDCTL_1_50_NS 0x5 +#define DP83867_RGMIIDCTL_1_75_NS 0x6 +#define DP83867_RGMIIDCTL_2_00_NS 0x7 +#define DP83867_RGMIIDCTL_2_25_NS 0x8 +#define DP83867_RGMIIDCTL_2_50_NS 0x9 +#define DP83867_RGMIIDCTL_2_75_NS 0xa +#define DP83867_RGMIIDCTL_3_00_NS 0xb +#define DP83867_RGMIIDCTL_3_25_NS 0xc +#define DP83867_RGMIIDCTL_3_50_NS 0xd +#define DP83867_RGMIIDCTL_3_75_NS 0xe +#define DP83867_RGMIIDCTL_4_00_NS 0xf + +#endif -- cgit v1.2.3 From 3896d655f4d491c67d669a15f275a39f713410f8 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 2 Jun 2015 16:03:14 -0700 Subject: bpf: introduce bpf_clone_redirect() helper Allow eBPF programs attached to classifier/actions to call bpf_clone_redirect(skb, ifindex, flags) helper which will mirror or redirect the packet by dynamic ifindex selection from within the program to a target device either at ingress or at egress. Can be used for various scenarios, for example, to load balance skbs into veths, split parts of the traffic to local taps, etc. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 10 ++++++++++ net/core/filter.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 72f3080afa1e..42aa19abab86 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -220,6 +220,16 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_tail_call, + + /** + * bpf_clone_redirect(skb, ifindex, flags) - redirect to another netdev + * @skb: pointer to skb + * @ifindex: ifindex of the net device + * @flags: bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * Return: 0 on success + */ + BPF_FUNC_clone_redirect, __BPF_FUNC_MAX_ID, }; diff --git a/net/core/filter.c b/net/core/filter.c index b78a010a957f..64c121c09655 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -46,6 +46,7 @@ #include #include #include +#include /** * sk_filter - run a packet through a socket filter @@ -1407,6 +1408,43 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = { .arg5_type = ARG_ANYTHING, }; +#define BPF_IS_REDIRECT_INGRESS(flags) ((flags) & 1) + +static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2; + struct net_device *dev; + + dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex); + if (unlikely(!dev)) + return -EINVAL; + + if (unlikely(!(dev->flags & IFF_UP))) + return -EINVAL; + + skb2 = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!skb2)) + return -ENOMEM; + + if (G_TC_AT(skb2->tc_verd) & AT_INGRESS) + skb_push(skb2, skb2->mac_len); + + if (BPF_IS_REDIRECT_INGRESS(flags)) + return dev_forward_skb(dev, skb2); + + skb2->dev = dev; + return dev_queue_xmit(skb2); +} + +const struct bpf_func_proto bpf_clone_redirect_proto = { + .func = bpf_clone_redirect, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * sk_filter_func_proto(enum bpf_func_id func_id) { @@ -1440,6 +1478,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_l3_csum_replace_proto; case BPF_FUNC_l4_csum_replace: return &bpf_l4_csum_replace_proto; + case BPF_FUNC_clone_redirect: + return &bpf_clone_redirect_proto; default: return sk_filter_func_proto(func_id); } -- cgit v1.2.3 From 133be0264f28e59d772c6a259349ba3ee2b183b3 Mon Sep 17 00:00:00 2001 From: Varka Bhadram Date: Thu, 4 Jun 2015 13:07:36 +0530 Subject: nl802154: export supported commands This patch will export the supported commands by the devices to the userspace. This will be useful to check if HardMAC drivers can support a specific command or not. Signed-off-by: Varka Bhadram Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/nl802154.h | 2 ++ net/ieee802154/nl802154.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) (limited to 'include') diff --git a/include/net/nl802154.h b/include/net/nl802154.h index 0badebd1de7f..6fc231e60920 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -102,6 +102,8 @@ enum nl802154_attrs { NL802154_ATTR_WPAN_PHY_CAPS, + NL802154_ATTR_SUPPORTED_COMMANDS, + /* add attributes here, update the policy in nl802154.c */ __NL802154_ATTR_AFTER_LAST, diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 7dbb1f4ce7df..68f24016860c 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -228,6 +228,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = { [NL802154_ATTR_LBT_MODE] = { .type = NLA_U8, }, [NL802154_ATTR_WPAN_PHY_CAPS] = { .type = NLA_NESTED }, + + [NL802154_ATTR_SUPPORTED_COMMANDS] = { .type = NLA_NESTED }, }; /* message building helper */ @@ -372,7 +374,9 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev, struct sk_buff *msg, u32 portid, u32 seq, int flags) { + struct nlattr *nl_cmds; void *hdr; + int i; hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) @@ -431,6 +435,42 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev, if (nl802154_put_capabilities(msg, rdev)) goto nla_put_failure; + nl_cmds = nla_nest_start(msg, NL802154_ATTR_SUPPORTED_COMMANDS); + if (!nl_cmds) + goto nla_put_failure; + + i = 0; +#define CMD(op, n) \ + do { \ + if (rdev->ops->op) { \ + i++; \ + if (nla_put_u32(msg, i, NL802154_CMD_ ## n)) \ + goto nla_put_failure; \ + } \ + } while (0) + + CMD(add_virtual_intf, NEW_INTERFACE); + CMD(del_virtual_intf, DEL_INTERFACE); + CMD(set_channel, SET_CHANNEL); + CMD(set_pan_id, SET_PAN_ID); + CMD(set_short_addr, SET_SHORT_ADDR); + CMD(set_backoff_exponent, SET_BACKOFF_EXPONENT); + CMD(set_max_csma_backoffs, SET_MAX_CSMA_BACKOFFS); + CMD(set_max_frame_retries, SET_MAX_FRAME_RETRIES); + CMD(set_lbt_mode, SET_LBT_MODE); + + if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER) + CMD(set_tx_power, SET_TX_POWER); + + if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL) + CMD(set_cca_ed_level, SET_CCA_ED_LEVEL); + + if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_MODE) + CMD(set_cca_mode, SET_CCA_MODE); + +#undef CMD + nla_nest_end(msg, nl_cmds); + finish: genlmsg_end(msg, hdr); return 0; -- cgit v1.2.3 From 730fc4371333636a00fed32c587fc1e85c5367e2 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:37 -0700 Subject: mpls: Add definition for IPPROTO_MPLS Add uapi define for MPLS over IP. Acked-by: Jiri Pirko Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 589ced069e8a..641338bef651 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -69,6 +69,8 @@ enum { #define IPPROTO_SCTP IPPROTO_SCTP IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */ #define IPPROTO_UDPLITE IPPROTO_UDPLITE + IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */ +#define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_MAX -- cgit v1.2.3 From 42aecaa9bb2bd57eb8d61b4565cee5d3640863fb Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:39 -0700 Subject: net: Get skb hash over flow_keys structure This patch changes flow hashing to use jhash2 over the flow_keys structure instead just doing jhash_3words over src, dst, and ports. This method will allow us take more input into the hashing function so that we can include full IPv6 addresses, VLAN, flow labels etc. without needing to resort to xor'ing which makes for a poor hash. Acked-by: Jiri Pirko Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- include/net/flow_dissector.h | 21 ++++++++++++++--- include/net/ip.h | 2 ++ include/net/ipv6.h | 2 ++ net/core/flow_dissector.c | 54 +++++++++++++++++++++++++++++++++----------- net/sched/cls_flower.c | 2 ++ 6 files changed, 66 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6b41c15efa27..cc612fc0a894 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1943,7 +1943,7 @@ static inline void skb_probe_transport_header(struct sk_buff *skb, if (skb_transport_header_was_set(skb)) return; else if (skb_flow_dissect_flow_keys(skb, &keys)) - skb_set_transport_header(skb, keys.basic.thoff); + skb_set_transport_header(skb, keys.control.thoff); else skb_set_transport_header(skb, offset_hint); } diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index bac9c1421f58..cba6a10b214a 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -6,6 +6,15 @@ #include #include +/** + * struct flow_dissector_key_control: + * @thoff: Transport header offset + */ +struct flow_dissector_key_control { + u16 thoff; + u16 padding; +}; + /** * struct flow_dissector_key_basic: * @thoff: Transport header offset @@ -13,9 +22,9 @@ * @ip_proto: Transport header protocol (eg. TCP/UDP) */ struct flow_dissector_key_basic { - u16 thoff; __be16 n_proto; u8 ip_proto; + u8 padding; }; /** @@ -70,6 +79,7 @@ struct flow_dissector_key_eth_addrs { }; enum flow_dissector_key_id { + FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ @@ -109,11 +119,16 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb, } struct flow_keys { - struct flow_dissector_key_addrs addrs; - struct flow_dissector_key_ports ports; + struct flow_dissector_key_control control; +#define FLOW_KEYS_HASH_START_FIELD basic struct flow_dissector_key_basic basic; + struct flow_dissector_key_ports ports; + struct flow_dissector_key_addrs addrs; }; +#define FLOW_KEYS_HASH_OFFSET \ + offsetof(struct flow_keys, FLOW_KEYS_HASH_START_FIELD) + extern struct flow_dissector flow_keys_dissector; extern struct flow_dissector flow_keys_buf_dissector; diff --git a/include/net/ip.h b/include/net/ip.h index 9b976cf99122..16cfc87fed6c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -360,6 +360,8 @@ static inline void inet_set_txhash(struct sock *sk) struct inet_sock *inet = inet_sk(sk); struct flow_keys keys; + memset(&keys, 0, sizeof(keys)); + keys.addrs.src = inet->inet_saddr; keys.addrs.dst = inet->inet_daddr; keys.ports.src = inet->inet_sport; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 35d485c78080..474ca466a091 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -699,6 +699,8 @@ static inline void ip6_set_txhash(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct flow_keys keys; + memset(&keys, 0, sizeof(keys)); + keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr); keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); keys.ports.src = inet->inet_sport; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 0763795bea8f..55b5f2962afa 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -57,9 +57,11 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, flow_dissector->offset[key->key_id] = key->offset; } - /* Ensure that the dissector always includes basic key. That way - * we are able to avoid handling lack of it in fast path. + /* Ensure that the dissector always includes control and basic key. + * That way we are able to avoid handling lack of these in fast path. */ + BUG_ON(!skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_CONTROL)); BUG_ON(!skb_flow_dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_BASIC)); } @@ -120,6 +122,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, void *target_container, void *data, __be16 proto, int nhoff, int hlen) { + struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_ports *key_ports; @@ -132,6 +135,13 @@ bool __skb_flow_dissect(const struct sk_buff *skb, hlen = skb_headlen(skb); } + /* It is ensured by skb_flow_dissector_init() that control key will + * be always present. + */ + key_control = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_CONTROL, + target_container); + /* It is ensured by skb_flow_dissector_init() that basic key will * be always present. */ @@ -219,7 +229,7 @@ flow_label: key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; - key_basic->thoff = (u16)nhoff; + key_control->thoff = (u16)nhoff; if (skb_flow_dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { @@ -275,7 +285,7 @@ flow_label: if (!hdr) return false; key_basic->n_proto = proto; - key_basic->thoff = (u16)nhoff; + key_control->thoff = (u16)nhoff; if (skb_flow_dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) { @@ -288,7 +298,7 @@ flow_label: return true; } case htons(ETH_P_FCOE): - key_basic->thoff = (u16)(nhoff + FCOE_HEADER_LEN); + key_control->thoff = (u16)(nhoff + FCOE_HEADER_LEN); /* fall through */ default: return false; @@ -345,7 +355,7 @@ flow_label: key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; - key_basic->thoff = (u16) nhoff; + key_control->thoff = (u16)nhoff; if (skb_flow_dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { @@ -366,9 +376,21 @@ static __always_inline void __flow_hash_secret_init(void) net_get_random_once(&hashrnd, sizeof(hashrnd)); } -static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c, u32 keyval) +static __always_inline u32 __flow_hash_words(u32 *words, u32 length, u32 keyval) +{ + return jhash2(words, length, keyval); +} + +static inline void *flow_keys_hash_start(struct flow_keys *flow) { - return jhash_3words(a, b, c, keyval); + BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32)); + return (void *)flow + FLOW_KEYS_HASH_OFFSET; +} + +static inline size_t flow_keys_hash_length(struct flow_keys *flow) +{ + BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); + return (sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) / sizeof(u32); } static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) @@ -383,10 +405,8 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) swap(keys->ports.src, keys->ports.dst); } - hash = __flow_hash_3words((__force u32)keys->addrs.dst, - (__force u32)keys->addrs.src, - (__force u32)keys->ports.ports, - keyval); + hash = __flow_hash_words((u32 *)flow_keys_hash_start(keys), + flow_keys_hash_length(keys), keyval); if (!hash) hash = 1; @@ -473,7 +493,7 @@ EXPORT_SYMBOL(skb_get_hash_perturb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen) { - u32 poff = keys->basic.thoff; + u32 poff = keys->control.thoff; switch (keys->basic.ip_proto) { case IPPROTO_TCP: { @@ -536,6 +556,10 @@ u32 skb_get_poff(const struct sk_buff *skb) } static const struct flow_dissector_key flow_keys_dissector_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, + .offset = offsetof(struct flow_keys, control), + }, { .key_id = FLOW_DISSECTOR_KEY_BASIC, .offset = offsetof(struct flow_keys, basic), @@ -555,6 +579,10 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { }; static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, + .offset = offsetof(struct flow_keys, control), + }, { .key_id = FLOW_DISSECTOR_KEY_BASIC, .offset = offsetof(struct flow_keys, basic), diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 8c8f34ef6980..5a7d66c59684 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -25,6 +25,7 @@ struct fl_flow_key { int indev_ifindex; + struct flow_dissector_key_control control; struct flow_dissector_key_basic basic; struct flow_dissector_key_eth_addrs eth; union { @@ -347,6 +348,7 @@ static void fl_init_dissector(struct cls_fl_head *head, struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX]; size_t cnt = 0; + FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control); FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic); FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, FLOW_DISSECTOR_KEY_ETH_ADDRS, eth); -- cgit v1.2.3 From c3f8324188fa80178f20c8209b492ca6191177e8 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:40 -0700 Subject: net: Add full IPv6 addresses to flow_keys This patch adds full IPv6 addresses into flow_keys and uses them as input to the flow hash function. The implementation supports either IPv4 or IPv6 addresses in a union, and selector is used to determine how may words to input to jhash2. We also add flow_get_u32_dst and flow_get_u32_src functions which are used to get a u32 representation of the source and destination addresses. For IPv6, ipv6_addr_hash is called. These functions retain getting the legacy values of src and dst in flow_keys. With this patch, Ethertype and IP protocol are now included in the flow hash input. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 9 +- drivers/net/ethernet/cisco/enic/enic_clsf.c | 8 +- drivers/net/ethernet/cisco/enic/enic_ethtool.c | 4 +- include/net/flow_dissector.h | 52 +++++++---- include/net/ip.h | 19 +++- include/net/ipv6.h | 21 ++++- net/core/flow_dissector.c | 116 +++++++++++++++++++++---- net/ethernet/eth.c | 2 +- net/sched/cls_flow.c | 14 ++- net/sched/cls_flower.c | 11 +-- 10 files changed, 193 insertions(+), 63 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2268438f3f63..19eb990d398c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3059,8 +3059,7 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph)))) return false; iph = ip_hdr(skb); - fk->addrs.src = iph->saddr; - fk->addrs.dst = iph->daddr; + iph_to_flow_copy_v4addrs(fk, iph); noff += iph->ihl << 2; if (!ip_is_fragment(iph)) proto = iph->protocol; @@ -3068,8 +3067,7 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6)))) return false; iph6 = ipv6_hdr(skb); - fk->addrs.src = (__force __be32)ipv6_addr_hash(&iph6->saddr); - fk->addrs.dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); + iph_to_flow_copy_v6addrs(fk, iph6); noff += sizeof(*iph6); proto = iph6->nexthdr; } else { @@ -3103,7 +3101,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) hash = bond_eth_hash(skb); else hash = (__force u32)flow.ports.ports; - hash ^= (__force u32)flow.addrs.dst ^ (__force u32)flow.addrs.src; + hash ^= (__force u32)flow_get_u32_dst(&flow) ^ + (__force u32)flow_get_u32_src(&flow); hash ^= (hash >> 16); hash ^= (hash >> 8); diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c index a31b57adcb37..d106186f4f4a 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.c +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c @@ -33,8 +33,8 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq) return -EPROTONOSUPPORT; }; data.type = FILTER_IPV4_5TUPLE; - data.u.ipv4.src_addr = ntohl(keys->addrs.src); - data.u.ipv4.dst_addr = ntohl(keys->addrs.dst); + data.u.ipv4.src_addr = ntohl(keys->addrs.v4addrs.src); + data.u.ipv4.dst_addr = ntohl(keys->addrs.v4addrs.dst); data.u.ipv4.src_port = ntohs(keys->ports.src); data.u.ipv4.dst_port = ntohs(keys->ports.dst); data.u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE; @@ -158,8 +158,8 @@ static struct enic_rfs_fltr_node *htbl_key_search(struct hlist_head *h, struct enic_rfs_fltr_node *tpos; hlist_for_each_entry(tpos, h, node) - if (tpos->keys.addrs.src == k->addrs.src && - tpos->keys.addrs.dst == k->addrs.dst && + if (tpos->keys.addrs.v4addrs.src == k->addrs.v4addrs.src && + tpos->keys.addrs.v4addrs.dst == k->addrs.v4addrs.dst && tpos->keys.ports.ports == k->ports.ports && tpos->keys.basic.ip_proto == k->basic.ip_proto && tpos->keys.basic.n_proto == k->basic.n_proto) diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index 117c0968dd0b..73874b2575bf 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -346,10 +346,10 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd) break; } - fsp->h_u.tcp_ip4_spec.ip4src = n->keys.addrs.src; + fsp->h_u.tcp_ip4_spec.ip4src = flow_get_u32_src(&n->keys); fsp->m_u.tcp_ip4_spec.ip4src = (__u32)~0; - fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.addrs.dst; + fsp->h_u.tcp_ip4_spec.ip4dst = flow_get_u32_dst(&n->keys); fsp->m_u.tcp_ip4_spec.ip4dst = (__u32)~0; fsp->h_u.tcp_ip4_spec.psrc = n->keys.ports.src; diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index cba6a10b214a..306d4613abbb 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -12,7 +12,7 @@ */ struct flow_dissector_key_control { u16 thoff; - u16 padding; + u16 addr_type; }; /** @@ -28,18 +28,39 @@ struct flow_dissector_key_basic { }; /** - * struct flow_dissector_key_addrs: - * @src: source ip address in case of IPv4 - * For IPv6 it contains 32bit hash of src address - * @dst: destination ip address in case of IPv4 - * For IPv6 it contains 32bit hash of dst address + * struct flow_dissector_key_ipv4_addrs: + * @src: source ip address + * @dst: destination ip address */ -struct flow_dissector_key_addrs { +struct flow_dissector_key_ipv4_addrs { /* (src,dst) must be grouped, in the same way than in IP header */ __be32 src; __be32 dst; }; +/** + * struct flow_dissector_key_ipv6_addrs: + * @src: source ip address + * @dst: destination ip address + */ +struct flow_dissector_key_ipv6_addrs { + /* (src,dst) must be grouped, in the same way than in IP header */ + struct in6_addr src; + struct in6_addr dst; +}; + +/** + * struct flow_dissector_key_addrs: + * @v4addrs: IPv4 addresses + * @v6addrs: IPv6 addresses + */ +struct flow_dissector_key_addrs { + union { + struct flow_dissector_key_ipv4_addrs v4addrs; + struct flow_dissector_key_ipv6_addrs v6addrs; + }; +}; + /** * flow_dissector_key_tp_ports: * @ports: port numbers of Transport header @@ -56,16 +77,6 @@ struct flow_dissector_key_ports { }; }; -/** - * struct flow_dissector_key_ipv6_addrs: - * @src: source ip address - * @dst: destination ip address - */ -struct flow_dissector_key_ipv6_addrs { - /* (src,dst) must be grouped, in the same way than in IP header */ - struct in6_addr src; - struct in6_addr dst; -}; /** * struct flow_dissector_key_eth_addrs: @@ -81,10 +92,10 @@ struct flow_dissector_key_eth_addrs { enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ - FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */ + FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ + FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ - FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_MAX, @@ -129,6 +140,9 @@ struct flow_keys { #define FLOW_KEYS_HASH_OFFSET \ offsetof(struct flow_keys, FLOW_KEYS_HASH_START_FIELD) +__be32 flow_get_u32_src(const struct flow_keys *flow); +__be32 flow_get_u32_dst(const struct flow_keys *flow); + extern struct flow_dissector flow_keys_dissector; extern struct flow_dissector flow_keys_buf_dissector; diff --git a/include/net/ip.h b/include/net/ip.h index 16cfc87fed6c..0750a186ea63 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -355,6 +355,20 @@ static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto) skb->len, proto, 0); } +/* copy IPv4 saddr & daddr to flow_keys, possibly using 64bit load/store + * Equivalent to : flow->v4addrs.src = iph->saddr; + * flow->v4addrs.dst = iph->daddr; + */ +static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow, + const struct iphdr *iph) +{ + BUILD_BUG_ON(offsetof(typeof(flow->addrs), v4addrs.dst) != + offsetof(typeof(flow->addrs), v4addrs.src) + + sizeof(flow->addrs.v4addrs.src)); + memcpy(&flow->addrs.v4addrs, &iph->saddr, sizeof(flow->addrs.v4addrs)); + flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; +} + static inline void inet_set_txhash(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); @@ -362,8 +376,9 @@ static inline void inet_set_txhash(struct sock *sk) memset(&keys, 0, sizeof(keys)); - keys.addrs.src = inet->inet_saddr; - keys.addrs.dst = inet->inet_daddr; + keys.addrs.v4addrs.src = inet->inet_saddr; + keys.addrs.v4addrs.dst = inet->inet_daddr; + keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; keys.ports.src = inet->inet_sport; keys.ports.dst = inet->inet_dport; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 474ca466a091..82dbdb092a5d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -692,6 +692,20 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, return hlimit; } +/* copy IPv6 saddr & daddr to flow_keys, possibly using 64bit load/store + * Equivalent to : flow->v6addrs.src = iph->saddr; + * flow->v6addrs.dst = iph->daddr; + */ +static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow, + const struct ipv6hdr *iph) +{ + BUILD_BUG_ON(offsetof(typeof(flow->addrs), v6addrs.dst) != + offsetof(typeof(flow->addrs), v6addrs.src) + + sizeof(flow->addrs.v6addrs.src)); + memcpy(&flow->addrs.v6addrs, &iph->saddr, sizeof(flow->addrs.v6addrs)); + flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; +} + #if IS_ENABLED(CONFIG_IPV6) static inline void ip6_set_txhash(struct sock *sk) { @@ -701,8 +715,11 @@ static inline void ip6_set_txhash(struct sock *sk) memset(&keys, 0, sizeof(keys)); - keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr); - keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); + memcpy(&keys.addrs.v6addrs.src, &np->saddr, + sizeof(keys.addrs.v6addrs.src)); + memcpy(&keys.addrs.v6addrs.dst, &sk->sk_v6_daddr, + sizeof(keys.addrs.v6addrs.dst)); + keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; keys.ports.src = inet->inet_sport; keys.ports.dst = inet->inet_dport; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 55b5f2962afa..ca9d22488cfb 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -178,10 +178,12 @@ ip: if (!skb_flow_dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) break; + key_addrs = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - target_container); - memcpy(key_addrs, &iph->saddr, sizeof(*key_addrs)); + FLOW_DISSECTOR_KEY_IPV4_ADDRS, target_container); + memcpy(&key_addrs->v4addrs, &iph->saddr, + sizeof(key_addrs->v4addrs)); + key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; break; } case htons(ETH_P_IPV6): { @@ -203,8 +205,11 @@ ipv6: FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, target_container); - key_addrs->src = (__force __be32)ipv6_addr_hash(&iph->saddr); - key_addrs->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); + key_addrs->v4addrs.src = + (__force __be32)ipv6_addr_hash(&iph->saddr); + key_addrs->v4addrs.dst = + (__force __be32)ipv6_addr_hash(&iph->daddr); + key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; goto flow_label; } if (skb_flow_dissector_uses_key(flow_dissector, @@ -216,6 +221,7 @@ ipv6: target_container); memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs)); + key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; goto flow_label; } break; @@ -292,8 +298,9 @@ flow_label: key_addrs = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, target_container); - key_addrs->src = hdr->srcnode; - key_addrs->dst = 0; + key_addrs->v4addrs.src = hdr->srcnode; + key_addrs->v4addrs.dst = 0; + key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } return true; } @@ -389,21 +396,88 @@ static inline void *flow_keys_hash_start(struct flow_keys *flow) static inline size_t flow_keys_hash_length(struct flow_keys *flow) { + size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs); BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); - return (sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) / sizeof(u32); + BUILD_BUG_ON(offsetof(typeof(*flow), addrs) != + sizeof(*flow) - sizeof(flow->addrs)); + + switch (flow->control.addr_type) { + case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + diff -= sizeof(flow->addrs.v4addrs); + break; + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: + diff -= sizeof(flow->addrs.v6addrs); + break; + } + return (sizeof(*flow) - diff) / sizeof(u32); +} + +__be32 flow_get_u32_src(const struct flow_keys *flow) +{ + switch (flow->control.addr_type) { + case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + return flow->addrs.v4addrs.src; + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: + return (__force __be32)ipv6_addr_hash( + &flow->addrs.v6addrs.src); + default: + return 0; + } +} +EXPORT_SYMBOL(flow_get_u32_src); + +__be32 flow_get_u32_dst(const struct flow_keys *flow) +{ + switch (flow->control.addr_type) { + case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + return flow->addrs.v4addrs.dst; + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: + return (__force __be32)ipv6_addr_hash( + &flow->addrs.v6addrs.dst); + default: + return 0; + } +} +EXPORT_SYMBOL(flow_get_u32_dst); + +static inline void __flow_hash_consistentify(struct flow_keys *keys) +{ + int addr_diff, i; + + switch (keys->control.addr_type) { + case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + addr_diff = (__force u32)keys->addrs.v4addrs.dst - + (__force u32)keys->addrs.v4addrs.src; + if ((addr_diff < 0) || + (addr_diff == 0 && + ((__force u16)keys->ports.dst < + (__force u16)keys->ports.src))) { + swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst); + swap(keys->ports.src, keys->ports.dst); + } + break; + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: + addr_diff = memcmp(&keys->addrs.v6addrs.dst, + &keys->addrs.v6addrs.src, + sizeof(keys->addrs.v6addrs.dst)); + if ((addr_diff < 0) || + (addr_diff == 0 && + ((__force u16)keys->ports.dst < + (__force u16)keys->ports.src))) { + for (i = 0; i < 4; i++) + swap(keys->addrs.v6addrs.src.s6_addr32[i], + keys->addrs.v6addrs.dst.s6_addr32[i]); + swap(keys->ports.src, keys->ports.dst); + } + break; + } } static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) { u32 hash; - /* get a consistent hash (same value on both flow directions) */ - if (((__force u32)keys->addrs.dst < (__force u32)keys->addrs.src) || - (((__force u32)keys->addrs.dst == (__force u32)keys->addrs.src) && - ((__force u16)keys->ports.dst < (__force u16)keys->ports.src))) { - swap(keys->addrs.dst, keys->addrs.src); - swap(keys->ports.src, keys->ports.dst); - } + __flow_hash_consistentify(keys); hash = __flow_hash_words((u32 *)flow_keys_hash_start(keys), flow_keys_hash_length(keys), keyval); @@ -451,8 +525,8 @@ void make_flow_keys_digest(struct flow_keys_digest *digest, data->n_proto = flow->basic.n_proto; data->ip_proto = flow->basic.ip_proto; data->ports = flow->ports.ports; - data->src = flow->addrs.src; - data->dst = flow->addrs.dst; + data->src = flow->addrs.v4addrs.src; + data->dst = flow->addrs.v4addrs.dst; } EXPORT_SYMBOL(make_flow_keys_digest); @@ -566,11 +640,15 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { }, { .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, - .offset = offsetof(struct flow_keys, addrs), + .offset = offsetof(struct flow_keys, addrs.v4addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v6addrs), }, { .key_id = FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, - .offset = offsetof(struct flow_keys, addrs), + .offset = offsetof(struct flow_keys, addrs.v4addrs), }, { .key_id = FLOW_DISSECTOR_KEY_PORTS, diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 7d0e239a6755..77e0f0e7a88e 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -133,7 +133,7 @@ u32 eth_get_headlen(void *data, unsigned int len) /* parse any remaining L2/L3 headers, check for L4 */ if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto, sizeof(*eth), len)) - return max_t(u32, keys.basic.thoff, sizeof(*eth)); + return max_t(u32, keys.control.thoff, sizeof(*eth)); /* parse for any L4 headers */ return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index b4359924846c..76bc3a20ffdb 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -68,15 +68,21 @@ static inline u32 addr_fold(void *addr) static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->addrs.src) - return ntohl(flow->addrs.src); + __be32 src = flow_get_u32_src(flow); + + if (src) + return ntohl(src); + return addr_fold(skb->sk); } static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->addrs.dst) - return ntohl(flow->addrs.dst); + __be32 dst = flow_get_u32_dst(flow); + + if (dst) + return ntohl(dst); + return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 5a7d66c59684..b92d3f49c23e 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -28,8 +28,9 @@ struct fl_flow_key { struct flow_dissector_key_control control; struct flow_dissector_key_basic basic; struct flow_dissector_key_eth_addrs eth; + struct flow_dissector_key_addrs ipaddrs; union { - struct flow_dissector_key_addrs ipv4; + struct flow_dissector_key_ipv4_addrs ipv4; struct flow_dissector_key_ipv6_addrs ipv6; }; struct flow_dissector_key_ports tp; @@ -260,14 +261,14 @@ static int fl_set_key(struct net *net, struct nlattr **tb, &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, sizeof(key->basic.ip_proto)); } - if (key->basic.n_proto == htons(ETH_P_IP)) { + if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC, &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK, sizeof(key->ipv4.src)); fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST, &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK, sizeof(key->ipv4.dst)); - } else if (key->basic.n_proto == htons(ETH_P_IPV6)) { + } else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC, &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK, sizeof(key->ipv6.src)); @@ -610,7 +611,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, sizeof(key->basic.ip_proto))) goto nla_put_failure; - if (key->basic.n_proto == htons(ETH_P_IP) && + if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC, &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK, sizeof(key->ipv4.src)) || @@ -618,7 +619,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK, sizeof(key->ipv4.dst)))) goto nla_put_failure; - else if (key->basic.n_proto == htons(ETH_P_IPV6) && + else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS && (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC, &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK, sizeof(key->ipv6.src)) || -- cgit v1.2.3 From 9f24908901c5f4b1e3b07548106b1790af933476 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:41 -0700 Subject: net: Add keys for TIPC address Add a new flow key for TIPC addresses. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 10 ++++++++++ net/core/flow_dissector.c | 18 +++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 306d4613abbb..3ee606a54775 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -49,6 +49,14 @@ struct flow_dissector_key_ipv6_addrs { struct in6_addr dst; }; +/** + * struct flow_dissector_key_tipc_addrs: + * @srcnode: source node address + */ +struct flow_dissector_key_tipc_addrs { + __be32 srcnode; +}; + /** * struct flow_dissector_key_addrs: * @v4addrs: IPv4 addresses @@ -58,6 +66,7 @@ struct flow_dissector_key_addrs { union { struct flow_dissector_key_ipv4_addrs v4addrs; struct flow_dissector_key_ipv6_addrs v6addrs; + struct flow_dissector_key_tipc_addrs tipcaddrs; }; }; @@ -97,6 +106,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ + FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index ca9d22488cfb..91861c3d45a7 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -294,13 +294,12 @@ flow_label: key_control->thoff = (u16)nhoff; if (skb_flow_dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) { + FLOW_DISSECTOR_KEY_TIPC_ADDRS)) { key_addrs = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, + FLOW_DISSECTOR_KEY_TIPC_ADDRS, target_container); - key_addrs->v4addrs.src = hdr->srcnode; - key_addrs->v4addrs.dst = 0; - key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + key_addrs->tipcaddrs.srcnode = hdr->srcnode; + key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS; } return true; } @@ -408,6 +407,9 @@ static inline size_t flow_keys_hash_length(struct flow_keys *flow) case FLOW_DISSECTOR_KEY_IPV6_ADDRS: diff -= sizeof(flow->addrs.v6addrs); break; + case FLOW_DISSECTOR_KEY_TIPC_ADDRS: + diff -= sizeof(flow->addrs.tipcaddrs); + break; } return (sizeof(*flow) - diff) / sizeof(u32); } @@ -420,6 +422,8 @@ __be32 flow_get_u32_src(const struct flow_keys *flow) case FLOW_DISSECTOR_KEY_IPV6_ADDRS: return (__force __be32)ipv6_addr_hash( &flow->addrs.v6addrs.src); + case FLOW_DISSECTOR_KEY_TIPC_ADDRS: + return flow->addrs.tipcaddrs.srcnode; default: return 0; } @@ -650,6 +654,10 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { .key_id = FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, .offset = offsetof(struct flow_keys, addrs.v4addrs), }, + { + .key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS, + .offset = offsetof(struct flow_keys, addrs.tipcaddrs), + }, { .key_id = FLOW_DISSECTOR_KEY_PORTS, .offset = offsetof(struct flow_keys, ports), -- cgit v1.2.3 From 45b47fd00ca14df869979dbbe14324625ec93552 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:42 -0700 Subject: net: Get rid of IPv6 hash addresses flow keys We don't need to return the IPv6 address hash as part of flow keys. In general, using the IPv6 address hash is risky in a hash value since the underlying use of xor provides no entropy. If someone really needs the hash value they can get it from the full IPv6 addresses in flow keys (e.g. from flow_get_u32_src). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 1 - net/core/flow_dissector.c | 17 ----------------- 2 files changed, 18 deletions(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 3ee606a54775..59f00f90fc6b 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -103,7 +103,6 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ - FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 91861c3d45a7..5348a468de78 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -199,19 +199,6 @@ ipv6: ip_proto = iph->nexthdr; nhoff += sizeof(struct ipv6hdr); - if (skb_flow_dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) { - key_addrs = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, - target_container); - - key_addrs->v4addrs.src = - (__force __be32)ipv6_addr_hash(&iph->saddr); - key_addrs->v4addrs.dst = - (__force __be32)ipv6_addr_hash(&iph->daddr); - key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; - goto flow_label; - } if (skb_flow_dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs; @@ -650,10 +637,6 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, .offset = offsetof(struct flow_keys, addrs.v6addrs), }, - { - .key_id = FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, - .offset = offsetof(struct flow_keys, addrs.v4addrs), - }, { .key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS, .offset = offsetof(struct flow_keys, addrs.tipcaddrs), -- cgit v1.2.3 From d34af823ff401c312541aa613c49ea4b872bde9e Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:43 -0700 Subject: net: Add VLAN ID to flow_keys In flow_dissector set vlan_id in flow_keys when VLAN is found. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 6 ++++++ net/core/flow_dissector.c | 14 ++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 59f00f90fc6b..08480fbb9035 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -27,6 +27,10 @@ struct flow_dissector_key_basic { u8 padding; }; +struct flow_dissector_key_tags { + u32 vlan_id:12; +}; + /** * struct flow_dissector_key_ipv4_addrs: * @src: source ip address @@ -106,6 +110,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ + FLOW_DISSECTOR_KEY_VLANID, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_MAX, }; @@ -142,6 +147,7 @@ struct flow_keys { struct flow_dissector_key_control control; #define FLOW_KEYS_HASH_START_FIELD basic struct flow_dissector_key_basic basic; + struct flow_dissector_key_tags tags; struct flow_dissector_key_ports ports; struct flow_dissector_key_addrs addrs; }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 5348a468de78..5c66cb2e66df 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -126,6 +126,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_ports *key_ports; + struct flow_dissector_key_tags *key_tags; u8 ip_proto; if (!data) { @@ -246,6 +247,15 @@ flow_label: if (!vlan) return false; + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_VLANID)) { + key_tags = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_VLANID, + target_container); + + key_tags->vlan_id = skb_vlan_tag_get_id(skb); + } + proto = vlan->h_vlan_encapsulated_proto; nhoff += sizeof(*vlan); goto again; @@ -645,6 +655,10 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { .key_id = FLOW_DISSECTOR_KEY_PORTS, .offset = offsetof(struct flow_keys, ports), }, + { + .key_id = FLOW_DISSECTOR_KEY_VLANID, + .offset = offsetof(struct flow_keys, tags), + }, }; static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { -- cgit v1.2.3 From 87ee9e52ffeb168803a76cc07734425227cc2268 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:44 -0700 Subject: net: Add IPv6 flow label to flow_keys In flow_dissector set the flow label in flow_keys for IPv6. This also removes the shortcircuiting of flow dissection when a non-zero label is present, the flow label can be considered to provide additional entropy for a hash. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 4 +++- net/core/flow_dissector.c | 29 ++++++++++------------------- 2 files changed, 13 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 08480fbb9035..14d8483d836e 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -28,7 +28,8 @@ struct flow_dissector_key_basic { }; struct flow_dissector_key_tags { - u32 vlan_id:12; + u32 vlan_id:12, + flow_label:20; }; /** @@ -111,6 +112,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ FLOW_DISSECTOR_KEY_VLANID, /* struct flow_dissector_key_flow_tags */ + FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 5c66cb2e66df..70f0567b6be9 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -210,30 +210,17 @@ ipv6: memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs)); key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; - goto flow_label; } - break; -flow_label: + flow_label = ip6_flowlabel(iph); if (flow_label) { - /* Awesome, IPv6 packet has a flow label so we can - * use that to represent the ports without any - * further dissection. - */ - - key_basic->n_proto = proto; - key_basic->ip_proto = ip_proto; - key_control->thoff = (u16)nhoff; - if (skb_flow_dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS)) { - key_ports = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS, - target_container); - key_ports->ports = flow_label; + FLOW_DISSECTOR_KEY_FLOW_LABEL)) { + key_tags = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_FLOW_LABEL, + target_container); + key_tags->flow_label = ntohl(flow_label); } - - return true; } break; @@ -659,6 +646,10 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { .key_id = FLOW_DISSECTOR_KEY_VLANID, .offset = offsetof(struct flow_keys, tags), }, + { + .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL, + .offset = offsetof(struct flow_keys, tags), + }, }; static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { -- cgit v1.2.3 From 1fdd512c92003cf2d671ba22753d13302bf8cd1d Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:45 -0700 Subject: net: Add GRE keyid in flow_keys In flow dissector if a GRE header contains a keyid this is saved in the new keyid field of flow_keys. The GRE keyid is then represented in the flow hash function input. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 6 ++++++ net/core/flow_dissector.c | 24 +++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 14d8483d836e..6c5e8d262607 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -32,6 +32,10 @@ struct flow_dissector_key_tags { flow_label:20; }; +struct flow_dissector_key_keyid { + __be32 keyid; +}; + /** * struct flow_dissector_key_ipv4_addrs: * @src: source ip address @@ -113,6 +117,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ FLOW_DISSECTOR_KEY_VLANID, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ + FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MAX, }; @@ -150,6 +155,7 @@ struct flow_keys { #define FLOW_KEYS_HASH_START_FIELD basic struct flow_dissector_key_basic basic; struct flow_dissector_key_tags tags; + struct flow_dissector_key_keyid keyid; struct flow_dissector_key_ports ports; struct flow_dissector_key_addrs addrs; }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 70f0567b6be9..7ddc0a7b3da5 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -127,6 +127,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_ports *key_ports; struct flow_dissector_key_tags *key_tags; + struct flow_dissector_key_keyid *key_keyid; u8 ip_proto; if (!data) { @@ -315,8 +316,25 @@ ipv6: nhoff += 4; if (hdr->flags & GRE_CSUM) nhoff += 4; - if (hdr->flags & GRE_KEY) + if (hdr->flags & GRE_KEY) { + const __be32 *keyid; + __be32 _keyid; + + keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid), + data, hlen, &_keyid); + + if (!keyid) + return false; + + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_GRE_KEYID)) { + key_keyid = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_GRE_KEYID, + target_container); + key_keyid->keyid = *keyid; + } nhoff += 4; + } if (hdr->flags & GRE_SEQ) nhoff += 4; if (proto == htons(ETH_P_TEB)) { @@ -650,6 +668,10 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL, .offset = offsetof(struct flow_keys, tags), }, + { + .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, + .offset = offsetof(struct flow_keys, keyid), + }, }; static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { -- cgit v1.2.3 From b3baa0fbd02a1a9d493d8cb92ae4a4491b9e9d13 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:46 -0700 Subject: mpls: Add MPLS entropy label in flow_keys In flow dissector if an MPLS header contains an entropy label this is saved in the new keyid field of flow_keys. The entropy label is then represented in the flow hash function input. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 1 + net/core/flow_dissector.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 6c5e8d262607..1a8c22419936 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -118,6 +118,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_VLANID, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ + FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 7ddc0a7b3da5..77e22e4fc898 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -288,6 +289,37 @@ ipv6: } return true; } + + case htons(ETH_P_MPLS_UC): + case htons(ETH_P_MPLS_MC): { + struct mpls_label *hdr, _hdr[2]; +mpls: + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, + hlen, &_hdr); + if (!hdr) + return false; + + if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) == + MPLS_LABEL_ENTROPY) { + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) { + key_keyid = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_MPLS_ENTROPY, + target_container); + key_keyid->keyid = hdr[1].entry & + htonl(MPLS_LS_LABEL_MASK); + } + + key_basic->n_proto = proto; + key_basic->ip_proto = ip_proto; + key_control->thoff = (u16)nhoff; + + return true; + } + + return true; + } + case htons(ETH_P_FCOE): key_control->thoff = (u16)(nhoff + FCOE_HEADER_LEN); /* fall through */ @@ -357,6 +389,9 @@ ipv6: case IPPROTO_IPV6: proto = htons(ETH_P_IPV6); goto ipv6; + case IPPROTO_MPLS: + proto = htons(ETH_P_MPLS_UC); + goto mpls; default: break; } -- cgit v1.2.3 From 01949d0109ee5fae33752f0db99a36f1619e1873 Mon Sep 17 00:00:00 2001 From: Haggai Abramonvsky Date: Thu, 4 Jun 2015 19:30:38 +0300 Subject: net/mlx5_core: Enable XRCs and SRQs when using ISSI > 0 When working in ISSI > 0 mode, the model exposed by the device for XRCs and SRQs is different. XRCs use XRC SRQs and plain SRQs are based on RPM (Receive Memory Pool). Add helper functions to create, modify, query, and arm XRC SRQs and RMPs. Signed-off-by: Haggai Abramovsky Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/srq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 4 +- drivers/net/ethernet/mellanox/mlx5/core/srq.c | 444 ++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 154 +++++++ drivers/net/ethernet/mellanox/mlx5/core/transobj.h | 11 + include/linux/mlx5/driver.h | 6 +- include/linux/mlx5/mlx5_ifc.h | 16 +- 7 files changed, 564 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index e8e8e942fa4a..e008505e96e9 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -302,7 +302,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn); in->ctx.db_record = cpu_to_be64(srq->db.dma); - err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen); + err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen, is_xrc); kvfree(in); if (err) { mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 87e9e606596a..07540f732df1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o vport.o transobj.o \ + mad.o transobj.o +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o vport.o \ en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ en_txrx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index f9d25dcd03c1..c48f504ccbeb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -37,6 +37,7 @@ #include #include #include "mlx5_core.h" +#include "transobj.h" void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) { @@ -62,6 +63,74 @@ void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) complete(&srq->free); } +static int get_pas_size(void *srqc) +{ + u32 log_page_size = MLX5_GET(srqc, srqc, log_page_size) + 12; + u32 log_srq_size = MLX5_GET(srqc, srqc, log_srq_size); + u32 log_rq_stride = MLX5_GET(srqc, srqc, log_rq_stride); + u32 page_offset = MLX5_GET(srqc, srqc, page_offset); + u32 po_quanta = 1 << (log_page_size - 6); + u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); + u32 page_size = 1 << log_page_size; + u32 rq_sz_po = rq_sz + (page_offset * po_quanta); + u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size; + + return rq_num_pas * sizeof(u64); +} + +static void rmpc_srqc_reformat(void *srqc, void *rmpc, bool srqc_to_rmpc) +{ + void *wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + if (srqc_to_rmpc) { + switch (MLX5_GET(srqc, srqc, state)) { + case MLX5_SRQC_STATE_GOOD: + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + break; + case MLX5_SRQC_STATE_ERROR: + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_ERR); + break; + default: + pr_warn("%s: %d: Unknown srq state = 0x%x\n", __func__, + __LINE__, MLX5_GET(srqc, srqc, state)); + MLX5_SET(rmpc, rmpc, state, MLX5_GET(srqc, srqc, state)); + } + + MLX5_SET(wq, wq, wq_signature, MLX5_GET(srqc, srqc, wq_signature)); + MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(srqc, srqc, log_page_size)); + MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(srqc, srqc, log_rq_stride) + 4); + MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(srqc, srqc, log_srq_size)); + MLX5_SET(wq, wq, page_offset, MLX5_GET(srqc, srqc, page_offset)); + MLX5_SET(wq, wq, lwm, MLX5_GET(srqc, srqc, lwm)); + MLX5_SET(wq, wq, pd, MLX5_GET(srqc, srqc, pd)); + MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(srqc, srqc, dbr_addr)); + } else { + switch (MLX5_GET(rmpc, rmpc, state)) { + case MLX5_RMPC_STATE_RDY: + MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_GOOD); + break; + case MLX5_RMPC_STATE_ERR: + MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_ERROR); + break; + default: + pr_warn("%s: %d: Unknown rmp state = 0x%x\n", + __func__, __LINE__, + MLX5_GET(rmpc, rmpc, state)); + MLX5_SET(srqc, srqc, state, + MLX5_GET(rmpc, rmpc, state)); + } + + MLX5_SET(srqc, srqc, wq_signature, MLX5_GET(wq, wq, wq_signature)); + MLX5_SET(srqc, srqc, log_page_size, MLX5_GET(wq, wq, log_wq_pg_sz)); + MLX5_SET(srqc, srqc, log_rq_stride, MLX5_GET(wq, wq, log_wq_stride) - 4); + MLX5_SET(srqc, srqc, log_srq_size, MLX5_GET(wq, wq, log_wq_sz)); + MLX5_SET(srqc, srqc, page_offset, MLX5_GET(wq, wq, page_offset)); + MLX5_SET(srqc, srqc, lwm, MLX5_GET(wq, wq, lwm)); + MLX5_SET(srqc, srqc, pd, MLX5_GET(wq, wq, pd)); + MLX5_SET64(srqc, srqc, dbr_addr, MLX5_GET64(wq, wq, dbr_addr)); + } +} + struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) { struct mlx5_srq_table *table = &dev->priv.srq_table; @@ -79,26 +148,311 @@ struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) } EXPORT_SYMBOL(mlx5_core_get_srq); -int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int inlen) +static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, int inlen) { struct mlx5_create_srq_mbox_out out; - struct mlx5_srq_table *table = &dev->priv.srq_table; - struct mlx5_destroy_srq_mbox_in din; - struct mlx5_destroy_srq_mbox_out dout; int err; memset(&out, 0, sizeof(out)); + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_SRQ); - err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); - if (err) - return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); + err = mlx5_cmd_exec_check_status(dev, (u32 *)in, inlen, (u32 *)(&out), + sizeof(out)); srq->srqn = be32_to_cpu(out.srqn) & 0xffffff; + return err; +} + +static int destroy_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + struct mlx5_destroy_srq_mbox_in in; + struct mlx5_destroy_srq_mbox_out out; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); + in.srqn = cpu_to_be32(srq->srqn); + + return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in), + (u32 *)(&out), sizeof(out)); +} + +static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + struct mlx5_arm_srq_mbox_in in; + struct mlx5_arm_srq_mbox_out out; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ); + in.hdr.opmod = cpu_to_be16(!!is_srq); + in.srqn = cpu_to_be32(srq->srqn); + in.lwm = cpu_to_be16(lwm); + + return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), + sizeof(in), (u32 *)(&out), + sizeof(out)); +} + +static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_query_srq_mbox_out *out) +{ + struct mlx5_query_srq_mbox_in in; + + memset(&in, 0, sizeof(in)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ); + in.srqn = cpu_to_be32(srq->srqn); + + return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in), + (u32 *)out, sizeof(*out)); +} + +static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, + int srq_inlen) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; + void *create_in; + void *srqc; + void *xrc_srqc; + void *pas; + int pas_size; + int inlen; + int err; + + srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry); + pas_size = get_pas_size(srqc); + inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; + create_in = mlx5_vzalloc(inlen); + if (!create_in) + return -ENOMEM; + + xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, + xrc_srq_context_entry); + pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); + + memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc)); + memcpy(pas, in->pas, pas_size); + /* 0xffffff means we ask to work with cqe version 0 */ + MLX5_SET(xrc_srqc, xrc_srqc, user_index, 0xffffff); + MLX5_SET(create_xrc_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_XRC_SRQ); + + memset(create_out, 0, sizeof(create_out)); + err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out, + sizeof(create_out)); + if (err) + goto out; + + srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); +out: + kvfree(create_in); + return err; +} + +static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]; + u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)]; + + memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); + memset(xrcsrq_out, 0, sizeof(xrcsrq_out)); + + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + + return mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, u16 lwm) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]; + u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)]; + + memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); + memset(xrcsrq_out, 0, sizeof(xrcsrq_out)); + + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); + + return mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_query_srq_mbox_out *out) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; + u32 *xrcsrq_out; + void *srqc; + void *xrc_srqc; + int err; + + xrcsrq_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + if (!xrcsrq_out) + return -ENOMEM; + memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); + + MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_QUERY_XRC_SRQ); + MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + err = mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, + MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + if (err) + goto out; + + xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, + xrc_srq_context_entry); + srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); + memcpy(srqc, xrc_srqc, MLX5_ST_SZ_BYTES(srqc)); + +out: + kvfree(xrcsrq_out); + return err; +} + +static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, int srq_inlen) +{ + void *create_in; + void *rmpc; + void *srqc; + int pas_size; + int inlen; + int err; + + srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry); + pas_size = get_pas_size(srqc); + inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; + create_in = mlx5_vzalloc(inlen); + if (!create_in) + return -ENOMEM; + + rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); + + memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); + rmpc_srqc_reformat(srqc, rmpc, true); + + err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); + + kvfree(create_in); + return err; +} + +static int destroy_rmp_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + return mlx5_core_destroy_rmp(dev, srq->srqn); +} + +static int arm_rmp_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + u16 lwm) +{ + void *in; + void *rmpc; + void *wq; + void *bitmask; + int err; + + in = mlx5_vzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in)); + if (!in) + return -ENOMEM; + + rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); + bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(wq, wq, lwm, lwm); + MLX5_SET(rmp_bitmask, bitmask, lwm, 1); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + + err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); + + kvfree(in); + return err; +} + +static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_query_srq_mbox_out *out) +{ + u32 *rmp_out; + void *rmpc; + void *srqc; + int err; + + rmp_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_rmp_out)); + if (!rmp_out) + return -ENOMEM; + + err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out); + if (err) + goto out; + + srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); + rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); + rmpc_srqc_reformat(srqc, rmpc, false); + +out: + kvfree(rmp_out); + return err; +} + +static int create_srq_split(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, + int inlen, int is_xrc) +{ + if (!dev->issi) + return create_srq_cmd(dev, srq, in, inlen); + else if (srq->common.res == MLX5_RES_XSRQ) + return create_xrc_srq_cmd(dev, srq, in, inlen); + else + return create_rmp_cmd(dev, srq, in, inlen); +} + +static int destroy_srq_split(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + if (!dev->issi) + return destroy_srq_cmd(dev, srq); + else if (srq->common.res == MLX5_RES_XSRQ) + return destroy_xrc_srq_cmd(dev, srq); + else + return destroy_rmp_cmd(dev, srq); +} + +int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, int inlen, + int is_xrc) +{ + int err; + struct mlx5_srq_table *table = &dev->priv.srq_table; + + srq->common.res = is_xrc ? MLX5_RES_XSRQ : MLX5_RES_SRQ; + + err = create_srq_split(dev, srq, in, inlen, is_xrc); + if (err) + return err; + atomic_set(&srq->refcount, 1); init_completion(&srq->free); @@ -107,25 +461,20 @@ int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, spin_unlock_irq(&table->lock); if (err) { mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn); - goto err_cmd; + goto err_destroy_srq_split; } return 0; -err_cmd: - memset(&din, 0, sizeof(din)); - memset(&dout, 0, sizeof(dout)); - din.srqn = cpu_to_be32(srq->srqn); - din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); - mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout)); +err_destroy_srq_split: + destroy_srq_split(dev, srq); + return err; } EXPORT_SYMBOL(mlx5_core_create_srq); int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) { - struct mlx5_destroy_srq_mbox_in in; - struct mlx5_destroy_srq_mbox_out out; struct mlx5_srq_table *table = &dev->priv.srq_table; struct mlx5_core_srq *tmp; int err; @@ -142,17 +491,10 @@ int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) return -EINVAL; } - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); - in.srqn = cpu_to_be32(srq->srqn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + err = destroy_srq_split(dev, srq); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - if (atomic_dec_and_test(&srq->refcount)) complete(&srq->free); wait_for_completion(&srq->free); @@ -164,48 +506,24 @@ EXPORT_SYMBOL(mlx5_core_destroy_srq); int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, struct mlx5_query_srq_mbox_out *out) { - struct mlx5_query_srq_mbox_in in; - int err; - - memset(&in, 0, sizeof(in)); - memset(out, 0, sizeof(*out)); - - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ); - in.srqn = cpu_to_be32(srq->srqn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out)); - if (err) - return err; - - if (out->hdr.status) - return mlx5_cmd_status_to_err(&out->hdr); - - return err; + if (!dev->issi) + return query_srq_cmd(dev, srq, out); + else if (srq->common.res == MLX5_RES_XSRQ) + return query_xrc_srq_cmd(dev, srq, out); + else + return query_rmp_cmd(dev, srq, out); } EXPORT_SYMBOL(mlx5_core_query_srq); int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq) { - struct mlx5_arm_srq_mbox_in in; - struct mlx5_arm_srq_mbox_out out; - int err; - - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ); - in.hdr.opmod = cpu_to_be16(!!is_srq); - in.srqn = cpu_to_be32(srq->srqn); - in.lwm = cpu_to_be16(lwm); - - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return err; + if (!dev->issi) + return arm_srq_cmd(dev, srq, lwm, is_srq); + else if (srq->common.res == MLX5_RES_XSRQ) + return arm_xrc_srq_cmd(dev, srq, lwm); + else + return arm_rmp_cmd(dev, srq, lwm); } EXPORT_SYMBOL(mlx5_core_arm_srq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index d918816ac4d8..7a120283de2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -169,3 +169,157 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); } + +int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn) +{ + u32 out[MLX5_ST_SZ_DW(create_rmp_out)]; + int err; + + MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *rmpn = MLX5_GET(create_rmp_out, out, rmpn); + + return err; +} + +int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_rmp_out)]; + + MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); +} + +int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, in, rmpn, rmpn); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_rmp_in)]; + int outlen = MLX5_ST_SZ_BYTES(query_rmp_out); + + memset(in, 0, sizeof(in)); + MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP); + MLX5_SET(query_rmp_in, in, rmpn, rmpn); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen); +} + +int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm) +{ + void *in; + void *rmpc; + void *wq; + void *bitmask; + int err; + + in = mlx5_vzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in)); + if (!in) + return -ENOMEM; + + rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); + bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, rmpn, rmpn); + MLX5_SET(wq, wq, lwm, lwm); + MLX5_SET(rmp_bitmask, bitmask, lwm, 1); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + + err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); + + kvfree(in); + + return err; +} + +int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *xsrqn) +{ + u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; + int err; + + MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn); + + return err; +} + +int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; + void *srqc; + void *xrc_srqc; + int err; + + memset(in, 0, sizeof(in)); + MLX5_SET(query_xrc_srq_in, in, opcode, MLX5_CMD_OP_QUERY_XRC_SRQ); + MLX5_SET(query_xrc_srq_in, in, xrc_srqn, xsrqn); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, + MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + if (!err) { + xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, out, + xrc_srq_context_entry); + srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); + memcpy(srqc, xrc_srqc, MLX5_ST_SZ_BYTES(srqc)); + } + + return err; +} + +int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm) +{ + u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]; + u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn); + MLX5_SET(arm_xrc_srq_in, in, lwm, lwm); + MLX5_SET(arm_xrc_srq_in, in, op_mod, + MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h index b71d77f61a1d..90322c11361f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h @@ -47,5 +47,16 @@ void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn); int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tisn); void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn); +int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn); +int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen); +int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn); +int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); +int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); +int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn); +int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn); +int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); +int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); #endif /* __TRANSOBJ_H__ */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7fa26f03acc1..ba9f212c94bb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -339,6 +339,8 @@ struct mlx5_core_mr { enum mlx5_res_type { MLX5_RES_QP, + MLX5_RES_SRQ, + MLX5_RES_XSRQ, }; struct mlx5_core_rsc_common { @@ -348,6 +350,7 @@ struct mlx5_core_rsc_common { }; struct mlx5_core_srq { + struct mlx5_core_rsc_common common; /* must be first */ u32 srqn; int max; int max_gs; @@ -640,7 +643,8 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *head); int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int inlen); + struct mlx5_create_srq_mbox_in *in, int inlen, + int is_xrc); int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq); int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, struct mlx5_query_srq_mbox_out *out); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index b27e9f6e090a..dbe2b32c0539 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2022,12 +2022,9 @@ struct mlx5_ifc_srqc_bits { u8 reserved_9[0x40]; - u8 db_record_addr_h[0x20]; - - u8 db_record_addr_l[0x1e]; - u8 reserved_10[0x2]; + u8 dbr_addr[0x40]; - u8 reserved_11[0x80]; + u8 reserved_10[0x80]; }; enum { @@ -4167,6 +4164,13 @@ struct mlx5_ifc_modify_rmp_out_bits { u8 reserved_1[0x40]; }; +struct mlx5_ifc_rmp_bitmask_bits { + u8 reserved[0x20]; + + u8 reserved1[0x1f]; + u8 lwm[0x1]; +}; + struct mlx5_ifc_modify_rmp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; @@ -4180,7 +4184,7 @@ struct mlx5_ifc_modify_rmp_in_bits { u8 reserved_3[0x20]; - u8 modify_bitmask[0x40]; + struct mlx5_ifc_rmp_bitmask_bits bitmask; u8 reserved_4[0x40]; -- cgit v1.2.3 From d18a9470f89727f870db944a36223bf1bb15bdc1 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:40 +0300 Subject: net/mlx5_core: Make the vport helpers available for the IB driver too Move the vport header file to be under include/linux/mlx5, such that the mlx5 IB can use it as well. Also add nic_ prefix to the vport NIC commands to differeniate between HCA vport commands and NIC vport commands. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 6 ++-- drivers/net/ethernet/mellanox/mlx5/core/vport.h | 41 ----------------------- include/linux/mlx5/vport.h | 41 +++++++++++++++++++++++ 5 files changed, 47 insertions(+), 45 deletions(-) delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/vport.h create mode 100644 include/linux/mlx5/vport.h (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index cbb3c7cb53f7..e9edb7210de1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -35,7 +35,7 @@ #include #include #include -#include "vport.h" +#include #include "wq.h" #include "transobj.h" #include "mlx5_core.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1b592913d4d7..edba09cca600 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1715,7 +1715,7 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5_query_vport_mac_address(priv->mdev, netdev->dev_addr); + mlx5_query_nic_vport_mac_address(priv->mdev, netdev->dev_addr); } static void mlx5e_build_netdev(struct net_device *netdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index ba374b9a6c87..32c4e03f6d3c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -33,7 +33,7 @@ #include #include #include -#include "vport.h" +#include #include "mlx5_core.h" u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod) @@ -55,8 +55,9 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod) return MLX5_GET(query_vport_state_out, out, state); } +EXPORT_SYMBOL(mlx5_query_vport_state); -void mlx5_query_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) +void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) { u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; u32 *out; @@ -82,3 +83,4 @@ void mlx5_query_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) kvfree(out); } +EXPORT_SYMBOL(mlx5_query_nic_vport_mac_address); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.h b/drivers/net/ethernet/mellanox/mlx5/core/vport.h deleted file mode 100644 index c05ca2c3419d..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __MLX5_VPORT_H__ -#define __MLX5_VPORT_H__ - -#include - -u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); -void mlx5_query_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr); - -#endif /* __MLX5_VPORT_H__ */ diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h new file mode 100644 index 000000000000..99d0e9f85432 --- /dev/null +++ b/include/linux/mlx5/vport.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_VPORT_H__ +#define __MLX5_VPORT_H__ + +#include + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); +void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr); + +#endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3 From 707c4602cda6624940761b66a4119f1909492385 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:41 +0300 Subject: net/mlx5_core: Add new query HCA vport commands Added the implementation for the following commands: 1. QUERY_HCA_VPORT_GID 2. QUERY_HCA_VPORT_PKEY 3. QUERY_HCA_VPORT_CONTEXT They will be needed when we move to work with ISSI > 0 in the IB driver too. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 4 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 10 +- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 259 +++++++++++++++++++++++ include/linux/mlx5/device.h | 13 ++ include/linux/mlx5/driver.h | 45 ++++ include/linux/mlx5/mlx5_ifc.h | 23 +- include/linux/mlx5/vport.h | 14 ++ 7 files changed, 349 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 07540f732df1..26a68b8af2c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o vport.o \ + mad.o transobj.o vport.o +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o \ en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ en_txrx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 11c7216a2517..58354122dfe4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -284,14 +284,6 @@ static u16 to_fw_pkey_sz(u32 size) } } -static u16 to_sw_pkey_sz(int pkey_sz) -{ - if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) - return 0; - - return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; -} - int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, enum mlx5_cap_mode cap_mode) { @@ -386,7 +378,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) MLX5_ST_SZ_BYTES(cmd_hca_cap)); mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n", - to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)), + mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)), 128); /* we limit the size of the pkey table to 128 entries for now */ MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 32c4e03f6d3c..20150ffa8b16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -84,3 +84,262 @@ void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) kvfree(out); } EXPORT_SYMBOL(mlx5_query_nic_vport_mac_address); + +int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 gid_index, + union ib_gid *gid) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_in); + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_out); + int is_group_manager; + void *out = NULL; + void *in = NULL; + union ib_gid *tmp; + int tbsz; + int nout; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + tbsz = mlx5_get_gid_table_len(MLX5_CAP_GEN(dev, gid_table_size)); + mlx5_core_dbg(dev, "vf_num %d, index %d, gid_table_size %d\n", + vf_num, gid_index, tbsz); + + if (gid_index > tbsz && gid_index != 0xffff) + return -EINVAL; + + if (gid_index == 0xffff) + nout = tbsz; + else + nout = 1; + + out_sz += nout * sizeof(*gid); + + in = kzalloc(in_sz, GFP_KERNEL); + out = kzalloc(out_sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(query_hca_vport_gid_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_GID); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_gid_in, in, vport_number, vf_num); + MLX5_SET(query_hca_vport_gid_in, in, other_vport, 1); + } else { + err = -EPERM; + goto out; + } + } + MLX5_SET(query_hca_vport_gid_in, in, gid_index, gid_index); + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_gid_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); + if (err) + goto out; + + err = mlx5_cmd_status_to_err_v2(out); + if (err) + goto out; + + tmp = out + MLX5_ST_SZ_BYTES(query_hca_vport_gid_out); + gid->global.subnet_prefix = tmp->global.subnet_prefix; + gid->global.interface_id = tmp->global.interface_id; + +out: + kfree(in); + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_gid); + +int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 pkey_index, + u16 *pkey) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_in); + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_out); + int is_group_manager; + void *out = NULL; + void *in = NULL; + void *pkarr; + int nout; + int tbsz; + int err; + int i; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + + tbsz = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)); + if (pkey_index > tbsz && pkey_index != 0xffff) + return -EINVAL; + + if (pkey_index == 0xffff) + nout = tbsz; + else + nout = 1; + + out_sz += nout * MLX5_ST_SZ_BYTES(pkey); + + in = kzalloc(in_sz, GFP_KERNEL); + out = kzalloc(out_sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(query_hca_vport_pkey_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_pkey_in, in, vport_number, vf_num); + MLX5_SET(query_hca_vport_pkey_in, in, other_vport, 1); + } else { + err = -EPERM; + goto out; + } + } + MLX5_SET(query_hca_vport_pkey_in, in, pkey_index, pkey_index); + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_pkey_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); + if (err) + goto out; + + err = mlx5_cmd_status_to_err_v2(out); + if (err) + goto out; + + pkarr = MLX5_ADDR_OF(query_hca_vport_pkey_out, out, pkey); + for (i = 0; i < nout; i++, pkey++, pkarr += MLX5_ST_SZ_BYTES(pkey)) + *pkey = MLX5_GET_PR(pkey, pkarr, pkey); + +out: + kfree(in); + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_pkey); + +int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, + u8 other_vport, u8 port_num, + u16 vf_num, + struct mlx5_hca_vport_context *rep) +{ + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_context_out); + int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)]; + int is_group_manager; + void *out; + void *ctx; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_hca_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT); + + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_context_in, in, other_vport, 1); + MLX5_SET(query_hca_vport_context_in, in, vport_number, vf_num); + } else { + err = -EPERM; + goto ex; + } + } + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_context_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); + if (err) + goto ex; + err = mlx5_cmd_status_to_err_v2(out); + if (err) + goto ex; + + ctx = MLX5_ADDR_OF(query_hca_vport_context_out, out, hca_vport_context); + rep->field_select = MLX5_GET_PR(hca_vport_context, ctx, field_select); + rep->sm_virt_aware = MLX5_GET_PR(hca_vport_context, ctx, sm_virt_aware); + rep->has_smi = MLX5_GET_PR(hca_vport_context, ctx, has_smi); + rep->has_raw = MLX5_GET_PR(hca_vport_context, ctx, has_raw); + rep->policy = MLX5_GET_PR(hca_vport_context, ctx, vport_state_policy); + rep->phys_state = MLX5_GET_PR(hca_vport_context, ctx, + port_physical_state); + rep->vport_state = MLX5_GET_PR(hca_vport_context, ctx, vport_state); + rep->port_physical_state = MLX5_GET_PR(hca_vport_context, ctx, + port_physical_state); + rep->port_guid = MLX5_GET64_PR(hca_vport_context, ctx, port_guid); + rep->node_guid = MLX5_GET64_PR(hca_vport_context, ctx, node_guid); + rep->cap_mask1 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask1); + rep->cap_mask1_perm = MLX5_GET_PR(hca_vport_context, ctx, + cap_mask1_field_select); + rep->cap_mask2 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask2); + rep->cap_mask2_perm = MLX5_GET_PR(hca_vport_context, ctx, + cap_mask2_field_select); + rep->lid = MLX5_GET_PR(hca_vport_context, ctx, lid); + rep->init_type_reply = MLX5_GET_PR(hca_vport_context, ctx, + init_type_reply); + rep->lmc = MLX5_GET_PR(hca_vport_context, ctx, lmc); + rep->subnet_timeout = MLX5_GET_PR(hca_vport_context, ctx, + subnet_timeout); + rep->sm_lid = MLX5_GET_PR(hca_vport_context, ctx, sm_lid); + rep->sm_sl = MLX5_GET_PR(hca_vport_context, ctx, sm_sl); + rep->qkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx, + qkey_violation_counter); + rep->pkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx, + pkey_violation_counter); + rep->grh_required = MLX5_GET_PR(hca_vport_context, ctx, grh_required); + rep->sys_image_guid = MLX5_GET64_PR(hca_vport_context, ctx, + system_image_guid); + +ex: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_context); + +int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, + __be64 *sys_image_guid) +{ + struct mlx5_hca_vport_context *rep; + int err; + + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep); + if (!err) + *sys_image_guid = rep->sys_image_guid; + + kfree(rep); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_system_image_guid); + +int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, + u64 *node_guid) +{ + struct mlx5_hca_vport_context *rep; + int err; + + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep); + if (!err) + *node_guid = rep->node_guid; + + kfree(rep); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index b288c538347a..b2c43508a737 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -99,6 +99,12 @@ __mlx5_mask(typ, fld)) #define MLX5_GET64(typ, p, fld) be64_to_cpu(*((__be64 *)(p) + __mlx5_64_off(typ, fld))) +#define MLX5_GET64_PR(typ, p, fld) ({ \ + u64 ___t = MLX5_GET64(typ, p, fld); \ + pr_debug(#fld " = 0x%llx\n", ___t); \ + ___t; \ +}) + enum { MLX5_MAX_COMMANDS = 32, MLX5_CMD_DATA_BLOCK_SIZE = 512, @@ -1172,4 +1178,11 @@ enum { MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40, }; +static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) +{ + if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) + return 0; + return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; +} + #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ba9f212c94bb..8ab8b8af5c32 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -553,6 +553,41 @@ struct mlx5_pas { u8 log_sz; }; +enum port_state_policy { + MLX5_AAA_000 +}; + +enum phy_port_state { + MLX5_AAA_111 +}; + +struct mlx5_hca_vport_context { + u32 field_select; + bool sm_virt_aware; + bool has_smi; + bool has_raw; + enum port_state_policy policy; + enum phy_port_state phys_state; + enum ib_port_state vport_state; + u8 port_physical_state; + u64 sys_image_guid; + u64 port_guid; + u64 node_guid; + u32 cap_mask1; + u32 cap_mask1_perm; + u32 cap_mask2; + u32 cap_mask2_perm; + u16 lid; + u8 init_type_reply; /* bitmask: see ib spec 14.2.5.6 InitTypeReply */ + u8 lmc; + u8 subnet_timeout; + u16 sm_lid; + u8 sm_sl; + u16 qkey_violation_counter; + u16 pkey_violation_counter; + bool grh_required; +}; + static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset) { return buf->direct.buf + offset; @@ -792,4 +827,14 @@ struct mlx5_profile { } mr_cache[MAX_MR_CACHE_ENTRIES]; }; +static inline int mlx5_get_gid_table_len(u16 param) +{ + if (param > 4) { + pr_warn("gid table length is zero\n"); + return 0; + } + + return 8 * (1 << param); +} + #endif /* MLX5_DRIVER_H */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index dbe2b32c0539..f06d054ad021 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2221,12 +2221,15 @@ struct mlx5_ifc_hca_vport_context_bits { u8 has_smi[0x1]; u8 has_raw[0x1]; u8 grh_required[0x1]; - u8 reserved_1[0x10]; - u8 port_state_policy[0x4]; - u8 phy_port_state[0x4]; + u8 reserved_1[0xc]; + u8 port_physical_state[0x4]; + u8 vport_state_policy[0x4]; + u8 port_state[0x4]; u8 vport_state[0x4]; - u8 reserved_2[0x60]; + u8 reserved_2[0x20]; + + u8 system_image_guid[0x40]; u8 port_guid[0x40]; @@ -3490,7 +3493,8 @@ struct mlx5_ifc_query_hca_vport_pkey_in_bits { u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_2[0xb]; + u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x10]; @@ -3519,7 +3523,8 @@ struct mlx5_ifc_query_hca_vport_gid_in_bits { u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_2[0xb]; + u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x10]; @@ -3545,7 +3550,8 @@ struct mlx5_ifc_query_hca_vport_context_in_bits { u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_2[0xb]; + u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x20]; @@ -4243,7 +4249,8 @@ struct mlx5_ifc_modify_hca_vport_context_in_bits { u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_2[0xb]; + u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x20]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 99d0e9f85432..67882a834efb 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -37,5 +37,19 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr); +int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 gid_index, + union ib_gid *gid); +int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 pkey_index, + u16 *pkey); +int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, + u8 other_vport, u8 port_num, + u16 vf_num, + struct mlx5_hca_vport_context *rep); +int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, + __be64 *sys_image_guid); +int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, + u64 *node_guid); #endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3 From 211e6c80e5a68ef39a81484583e8efbf9774627d Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:42 +0300 Subject: net/mlx5_core: Get vendor-id using the query adapter command Add two wrapper functions to the query adapter command: 1. mlx5_query_board_id -- replaces the old mlx5_cmd_query_adapter. 2. mlx5_core_query_vendor_id -- retrieves the vendor_id from the query_adapter command. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 59 ++++++++++++++++------ drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 7 ++- 5 files changed, 53 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 801ccadd709a..ba87442ef776 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -35,34 +35,63 @@ #include #include "mlx5_core.h" -int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev) +int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out, int outlen) { - struct mlx5_cmd_query_adapter_mbox_out *out; - struct mlx5_cmd_query_adapter_mbox_in in; + u32 in[MLX5_ST_SZ_DW(query_adapter_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen); +} + +int mlx5_query_board_id(struct mlx5_core_dev *dev) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_adapter_out); int err; - out = kzalloc(sizeof(*out), GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; - memset(&in, 0, sizeof(in)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_ADAPTER); - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out)); + err = mlx5_cmd_query_adapter(dev, out, outlen); if (err) - goto out_out; + goto out; - if (out->hdr.status) { - err = mlx5_cmd_status_to_err(&out->hdr); - goto out_out; - } + memcpy(dev->board_id, + MLX5_ADDR_OF(query_adapter_out, out, + query_adapter_struct.vsd_contd_psid), + MLX5_FLD_SZ_BYTES(query_adapter_out, + query_adapter_struct.vsd_contd_psid)); - memcpy(dev->board_id, out->vsd_psid, sizeof(out->vsd_psid)); - -out_out: +out: kfree(out); + return err; +} + +int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_adapter_out); + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + err = mlx5_cmd_query_adapter(mdev, out, outlen); + if (err) + goto out; + + *vendor_id = MLX5_GET(query_adapter_out, out, + query_adapter_struct.ieee_vendor_id); +out: + kfree(out); return err; } +EXPORT_SYMBOL(mlx5_core_query_vendor_id); int mlx5_query_hca_caps(struct mlx5_core_dev *dev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 58354122dfe4..afad529838de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -768,9 +768,9 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) goto err_stop_poll; } - err = mlx5_cmd_query_adapter(dev); + err = mlx5_query_board_id(dev); if (err) { - dev_err(&pdev->dev, "query adapter failed\n"); + dev_err(&pdev->dev, "query board id failed\n"); goto err_stop_poll; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 6983c1047255..fc88ecaecb4b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -78,7 +78,7 @@ static inline int mlx5_cmd_exec_check_status(struct mlx5_core_dev *dev, u32 *in, } int mlx5_query_hca_caps(struct mlx5_core_dev *dev); -int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev); +int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8ab8b8af5c32..b90fb9336d21 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -817,6 +817,7 @@ struct mlx5_interface { void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol); int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); +int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); struct mlx5_profile { u64 mask; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f06d054ad021..6d2f6fee041c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2470,9 +2470,12 @@ union mlx5_ifc_cong_control_roce_ecn_auto_bits { }; struct mlx5_ifc_query_adapter_param_block_bits { - u8 reserved_0[0xe0]; + u8 reserved_0[0xc0]; - u8 reserved_1[0x10]; + u8 reserved_1[0x8]; + u8 ieee_vendor_id[0x18]; + + u8 reserved_2[0x10]; u8 vsd_vendor_id[0x10]; u8 vsd[208][0x8]; -- cgit v1.2.3 From e760152d08da78aa160e68ac90bf8f3f10aff462 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:43 +0300 Subject: net/mlx5_core: Use port number in the query port mtu helpers Extend the function prototypes for max and operational mtu to take the local port number. In the Ethernet driver is this hard coded to one, since ConnectX4 Ethernet devices are always function-per-port. The IB driver also serves older devices (ConnectIB) which isn't such, and hence the part can vary. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/port.c | 15 +++++++++------ include/linux/mlx5/driver.h | 6 ++++-- 3 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index edba09cca600..7348c5173aa9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1386,7 +1386,7 @@ int mlx5e_open_locked(struct net_device *netdev) return err; } - err = mlx5_query_port_oper_mtu(mdev, &actual_mtu); + err = mlx5_query_port_oper_mtu(mdev, &actual_mtu, 1); if (err) { netdev_err(netdev, "%s: mlx5_query_port_oper_mtu failed %d\n", __func__, err); @@ -1614,7 +1614,7 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) int max_mtu; int err = 0; - err = mlx5_query_port_max_mtu(mdev, &max_mtu); + err = mlx5_query_port_max_mtu(mdev, &max_mtu, 1); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 7d3d0f9f328d..d9498aae5ab5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -213,7 +213,8 @@ int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status) } static int mlx5_query_port_mtu(struct mlx5_core_dev *dev, - int *admin_mtu, int *max_mtu, int *oper_mtu) + int *admin_mtu, int *max_mtu, int *oper_mtu, + u8 local_port) { u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; @@ -221,7 +222,7 @@ static int mlx5_query_port_mtu(struct mlx5_core_dev *dev, memset(in, 0, sizeof(in)); - MLX5_SET(pmtu_reg, in, local_port, 1); + MLX5_SET(pmtu_reg, in, local_port, local_port); err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PMTU, 0, 0); @@ -253,14 +254,16 @@ int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu) } EXPORT_SYMBOL_GPL(mlx5_set_port_mtu); -int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu) +int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, + u8 local_port) { - return mlx5_query_port_mtu(dev, NULL, max_mtu, NULL); + return mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, local_port); } EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu); -int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu) +int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, + u8 local_port) { - return mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu); + return mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, local_port); } EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b90fb9336d21..cd09784b6999 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -751,8 +751,10 @@ int mlx5_set_port_status(struct mlx5_core_dev *dev, int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status); int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu); -int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu); -int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu); +int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, + u8 local_port); +int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, + u8 local_port); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -- cgit v1.2.3 From a05bdefa4081d43f9c86c3bb693d0492a21590da Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:44 +0300 Subject: net/mlx5_core: Use port number when querying port ptys Until now, mlx5_query_port_ptys always queried port number one. Added new argument in the function's prototype so we can also query the second port. This will be needed when thr helper will be invoked from the IB driver on non FPP (Function-Per-Port) devices. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/port.c | 8 ++++---- include/linux/mlx5/driver.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index de7aec8abca1..388938482ff9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -543,7 +543,7 @@ static int mlx5e_get_settings(struct net_device *netdev, u32 eth_proto_oper; int err; - err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN); + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); if (err) { netdev_err(netdev, "%s: query port ptys failed: %d\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index d9498aae5ab5..cbbce40d853c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -104,13 +104,13 @@ int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps) EXPORT_SYMBOL_GPL(mlx5_set_port_caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, - int ptys_size, int proto_mask) + int ptys_size, int proto_mask, u8 local_port) { u32 in[MLX5_ST_SZ_DW(ptys_reg)]; int err; memset(in, 0, sizeof(in)); - MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, local_port, local_port); MLX5_SET(ptys_reg, in, proto_mask, proto_mask); err = mlx5_core_access_reg(dev, in, sizeof(in), ptys, @@ -126,7 +126,7 @@ int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, u32 out[MLX5_ST_SZ_DW(ptys_reg)]; int err; - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask); + err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); if (err) return err; @@ -145,7 +145,7 @@ int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, u32 out[MLX5_ST_SZ_DW(ptys_reg)]; int err; - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask); + err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); if (err) return err; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index cd09784b6999..e4b814f64014 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -739,7 +739,7 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, - int ptys_size, int proto_mask); + int ptys_size, int proto_mask, u8 local_port); int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, u32 *proto_cap, int proto_mask); int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, -- cgit v1.2.3 From a124d13ef59e09941fc0924fd7c29ae6d7cd77a3 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:45 +0300 Subject: net/mlx5_core: Add more query port helpers Add the following helpers: 1. mlx5_query_port_proto_oper -- queries the port speed port mask 2. mlx5_query_port_link_width_oper - queries the port link with bitmask 3. mlx5_query_port_vl_hw_cap - queries the Virtual Lanes supported on this port These helpers will be used from the IB driver when working in ISSI > 0 mode. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 67 ++++++++++++++++++++++++++ include/linux/mlx5/driver.h | 8 +++ 2 files changed, 75 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index cbbce40d853c..619d3baf19ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -158,6 +158,42 @@ int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin); +int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, + u8 *link_width_oper, u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, local_port); + if (err) + return err; + + *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper); + +int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, + u8 *proto_oper, int proto_mask, + u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, local_port); + if (err) + return err; + + if (proto_mask == MLX5_PTYS_EN) + *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + else + *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_proto_oper); + int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, int proto_mask) { @@ -267,3 +303,34 @@ int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, return mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, local_port); } EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); + +static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, + int pvlc_size, u8 local_port) +{ + u32 in[MLX5_ST_SZ_DW(pvlc_reg)]; + int err; + + memset(in, 0, sizeof(in)); + MLX5_SET(ptys_reg, in, local_port, local_port); + + err = mlx5_core_access_reg(dev, in, sizeof(in), pvlc, + pvlc_size, MLX5_REG_PVLC, 0, 0); + + return err; +} + +int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, + u8 *vl_hw_cap, u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(pvlc_reg)]; + int err; + + err = mlx5_query_port_pvlc(dev, out, sizeof(out), local_port); + if (err) + return err; + + *vl_hw_cap = MLX5_GET(pvlc_reg, out, vl_hw_cap); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e4b814f64014..6093bde16b94 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -107,6 +107,7 @@ enum { MLX5_REG_PUDE = 0x5009, MLX5_REG_PMPE = 0x5010, MLX5_REG_PELC = 0x500e, + MLX5_REG_PVLC = 0x500f, MLX5_REG_PMLP = 0, /* TBD */ MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, @@ -744,6 +745,11 @@ int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, u32 *proto_cap, int proto_mask); int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, u32 *proto_admin, int proto_mask); +int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, + u8 *link_width_oper, u8 local_port); +int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, + u8 *proto_oper, int proto_mask, + u8 local_port); int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, int proto_mask); int mlx5_set_port_status(struct mlx5_core_dev *dev, @@ -755,6 +761,8 @@ int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 local_port); int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, u8 local_port); +int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, + u8 *vl_hw_cap, u8 local_port); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -- cgit v1.2.3 From 90c337da1524863838658078ec34241f45d8394d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 6 Jun 2015 21:17:57 -0700 Subject: inet: add IP_BIND_ADDRESS_NO_PORT to overcome bind(0) limitations When an application needs to force a source IP on an active TCP socket it has to use bind(IP, port=x). As most applications do not want to deal with already used ports, x is often set to 0, meaning the kernel is in charge to find an available port. But kernel does not know yet if this socket is going to be a listener or be connected. It has very limited choices (no full knowledge of final 4-tuple for a connect()) With limited ephemeral port range (about 32K ports), it is very easy to fill the space. This patch adds a new SOL_IP socket option, asking kernel to ignore the 0 port provided by application in bind(IP, port=0) and only remember the given IP address. The port will be automatically chosen at connect() time, in a way that allows sharing a source port as long as the 4-tuples are unique. This new feature is available for both IPv4 and IPv6 (Thanks Neal) Tested: Wrote a test program and checked its behavior on IPv4 and IPv6. strace(1) shows sequences of bind(IP=127.0.0.2, port=0) followed by connect(). Also getsockname() show that the port is still 0 right after bind() but properly allocated after connect(). socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 5 setsockopt(5, SOL_IP, IP_BIND_ADDRESS_NO_PORT, [1], 4) = 0 bind(5, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("127.0.0.2")}, 16) = 0 getsockname(5, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("127.0.0.2")}, [16]) = 0 connect(5, {sa_family=AF_INET, sin_port=htons(53174), sin_addr=inet_addr("127.0.0.3")}, 16) = 0 getsockname(5, {sa_family=AF_INET, sin_port=htons(38050), sin_addr=inet_addr("127.0.0.2")}, [16]) = 0 IPv6 test : socket(PF_INET6, SOCK_STREAM, IPPROTO_IP) = 7 setsockopt(7, SOL_IP, IP_BIND_ADDRESS_NO_PORT, [1], 4) = 0 bind(7, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = 0 getsockname(7, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, [28]) = 0 connect(7, {sa_family=AF_INET6, sin6_port=htons(57300), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = 0 getsockname(7, {sa_family=AF_INET6, sin6_port=htons(60964), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, [28]) = 0 I was able to bind()/connect() a million concurrent IPv4 sockets, instead of ~32000 before patch. lpaa23:~# ulimit -n 1000010 lpaa23:~# ./bind --connect --num-flows=1000000 & 1000000 sockets lpaa23:~# grep TCP /proc/net/sockstat TCP: inuse 2000063 orphan 0 tw 47 alloc 2000157 mem 66 Check that a given source port is indeed used by many different connections : lpaa23:~# ss -t src :40000 | head -10 State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 0 127.0.0.2:40000 127.0.202.33:44983 ESTAB 0 0 127.0.0.2:40000 127.2.27.240:44983 ESTAB 0 0 127.0.0.2:40000 127.2.98.5:44983 ESTAB 0 0 127.0.0.2:40000 127.0.124.196:44983 ESTAB 0 0 127.0.0.2:40000 127.2.139.38:44983 ESTAB 0 0 127.0.0.2:40000 127.1.59.80:44983 ESTAB 0 0 127.0.0.2:40000 127.3.6.228:44983 ESTAB 0 0 127.0.0.2:40000 127.0.38.53:44983 ESTAB 0 0 127.0.0.2:40000 127.1.197.10:44983 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 1 + include/uapi/linux/in.h | 1 + net/ipv4/af_inet.c | 3 ++- net/ipv4/ip_sockglue.c | 7 +++++++ net/ipv6/af_inet6.c | 3 ++- 5 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index b6c3737da4e9..47eb67b08abd 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -187,6 +187,7 @@ struct inet_sock { transparent:1, mc_all:1, nodefrag:1; + __u8 bind_address_no_port:1; __u8 rcv_tos; __u8 convert_csum; int uc_index; diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 641338bef651..83d6236a2f08 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -112,6 +112,7 @@ struct in_addr { #define IP_MINTTL 21 #define IP_NODEFRAG 22 #define IP_CHECKSUM 23 +#define IP_BIND_ADDRESS_NO_PORT 24 /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6ad0f7a711c9..cc858ef44451 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -488,7 +488,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) inet->inet_saddr = 0; /* Use device */ /* Make sure we are allowed to bind here. */ - if (sk->sk_prot->get_port(sk, snum)) { + if ((snum || !inet->bind_address_no_port) && + sk->sk_prot->get_port(sk, snum)) { inet->inet_saddr = inet->inet_rcv_saddr = 0; err = -EADDRINUSE; goto out_release_sock; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 7cfb0893f263..04ae2992a5cd 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -582,6 +582,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_TRANSPARENT: case IP_MINTTL: case IP_NODEFRAG: + case IP_BIND_ADDRESS_NO_PORT: case IP_UNICAST_IF: case IP_MULTICAST_TTL: case IP_MULTICAST_ALL: @@ -732,6 +733,9 @@ static int do_ip_setsockopt(struct sock *sk, int level, } inet->nodefrag = val ? 1 : 0; break; + case IP_BIND_ADDRESS_NO_PORT: + inet->bind_address_no_port = val ? 1 : 0; + break; case IP_MTU_DISCOVER: if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) goto e_inval; @@ -1324,6 +1328,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_NODEFRAG: val = inet->nodefrag; break; + case IP_BIND_ADDRESS_NO_PORT: + val = inet->bind_address_no_port; + break; case IP_MTU_DISCOVER: val = inet->pmtudisc; break; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index f3866c0b6cfe..7de52b65173f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -362,7 +362,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) np->saddr = addr->sin6_addr; /* Make sure we are allowed to bind here. */ - if (sk->sk_prot->get_port(sk, snum)) { + if ((snum || !inet->bind_address_no_port) && + sk->sk_prot->get_port(sk, snum)) { inet_reset_saddr(sk); err = -EADDRINUSE; goto out; -- cgit v1.2.3 From ed65963ba0a2bdc330b1d7183f930d1c6a0a6685 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 6 Jun 2015 17:30:46 +0200 Subject: mac802154: remove unneeded vif struct This patch removes the virtual interface structure from sub if data struct, because it isn't used anywhere. This structure could be useful for give per interface information at softmac driver layer. Nevertheless there exist no use case currently and it contains the interface type information currently. This information is also stored inside wpan dev which is now used to check on the wpan dev interface type. Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Acked-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/at86rf230.c | 1 - drivers/net/ieee802154/cc2520.c | 1 - include/net/mac802154.h | 8 -------- net/mac802154/ieee802154_i.h | 2 -- net/mac802154/iface.c | 9 ++++----- net/mac802154/rx.c | 4 ++-- 6 files changed, 6 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 2f25a5ed8247..6e20e9c94b3c 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -1602,7 +1602,6 @@ static int at86rf230_probe(struct spi_device *spi) lp->spi = spi; lp->slp_tr = slp_tr; hw->parent = &spi->dev; - hw->vif_data_size = sizeof(*lp); ieee802154_random_extended_addr(&hw->phy->perm_extended_addr); lp->regmap = devm_regmap_init_spi(spi, &at86rf230_regmap_spi_config); diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index 8141353221a1..a8bafd6bd5e4 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -742,7 +742,6 @@ static int cc2520_register(struct cc2520_private *priv) priv->hw->priv = priv; priv->hw->parent = &priv->spi->dev; priv->hw->extra_tx_headroom = 0; - priv->hw->vif_data_size = sizeof(*priv); ieee802154_random_extended_addr(&priv->hw->phy->perm_extended_addr); /* We do support only 2.4 Ghz */ diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 9605c7f7453f..80a9e60ceb0c 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -56,13 +56,6 @@ struct ieee802154_hw_addr_filt { u8 pan_coord; }; -struct ieee802154_vif { - int type; - - /* must be last */ - u8 drv_priv[0] __aligned(sizeof(void *)); -}; - struct ieee802154_hw { /* filled by the driver */ int extra_tx_headroom; @@ -73,7 +66,6 @@ struct ieee802154_hw { struct ieee802154_hw_addr_filt hw_filt; void *priv; struct wpan_phy *phy; - size_t vif_data_size; }; /* Checksum is in hardware and is omitted from a packet diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index eec668f3637f..34755d5751a4 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -92,8 +92,6 @@ struct ieee802154_sub_if_data { struct mutex sec_mtx; struct mac802154_llsec sec; - /* must be last, dynamically sized area in this! */ - struct ieee802154_vif vif; }; #define MAC802154_CHAN_NONE 0xff /* No channel is assigned */ diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 6ac023932ce0..3a67d35d4672 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -219,8 +219,8 @@ ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata, * exist really an use case if we need to support * multiple node types at the same time. */ - if (sdata->vif.type == NL802154_IFTYPE_NODE && - nsdata->vif.type == NL802154_IFTYPE_NODE) + if (wpan_dev->iftype == NL802154_IFTYPE_NODE && + nsdata->wpan_dev.iftype == NL802154_IFTYPE_NODE) return -EBUSY; /* check all phy mac sublayer settings are the same. @@ -243,7 +243,7 @@ static int mac802154_wpan_open(struct net_device *dev) struct ieee802154_local *local = sdata->local; struct wpan_dev *wpan_dev = &sdata->wpan_dev; - rc = ieee802154_check_concurrent_iface(sdata, sdata->vif.type); + rc = ieee802154_check_concurrent_iface(sdata, wpan_dev->iftype); if (rc < 0) return rc; @@ -467,7 +467,6 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata, u8 tmp; /* set some type-dependent values */ - sdata->vif.type = type; sdata->wpan_dev.iftype = type; get_random_bytes(&tmp, sizeof(tmp)); @@ -523,7 +522,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, ASSERT_RTNL(); - ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size, name, + ndev = alloc_netdev(sizeof(*sdata), name, name_assign_type, ieee802154_if_setup); if (!ndev) return ERR_PTR(-ENOMEM); diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index e0f10063cac3..1bdf98068608 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -202,7 +202,7 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local, } list_for_each_entry_rcu(sdata, &local->interfaces, list) { - if (sdata->vif.type != NL802154_IFTYPE_NODE || + if (sdata->wpan_dev.iftype != NL802154_IFTYPE_NODE || !netif_running(sdata->dev)) continue; @@ -227,7 +227,7 @@ ieee802154_monitors_rx(struct ieee802154_local *local, struct sk_buff *skb) skb->protocol = htons(ETH_P_IEEE802154); list_for_each_entry_rcu(sdata, &local->interfaces, list) { - if (sdata->vif.type != NL802154_IFTYPE_MONITOR) + if (sdata->wpan_dev.iftype != NL802154_IFTYPE_MONITOR) continue; if (!ieee802154_sdata_running(sdata)) -- cgit v1.2.3 From 6b70a43c7e0202cf285c864bc9f20f607c42e432 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 6 Jun 2015 17:30:47 +0200 Subject: mac802154: cleanup address filtering flags This patch changes the address filtering flags to enums and setting the flag values with the BIT macro. Additional this patch changes the commenting style for matching usual kernel style. Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 80a9e60ceb0c..845e4f88a33e 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -31,18 +31,29 @@ */ #define MAC802154_FRAME_HARD_HEADER_LEN (2 + 1 + 20 + 14) -/* The following flags are used to indicate changed address settings from +/** + * enum ieee802154_hw_addr_filt_flags - hardware address filtering flags + * + * The following flags are used to indicate changed address settings from * the stack to the hardware. + * + * @IEEE802154_AFILT_SADDR_CHANGED: Indicates that the short address will be + * change. + * + * @IEEE802154_AFILT_IEEEADDR_CHANGED: Indicates that the extended address + * will be change. + * + * @IEEE802154_AFILT_PANID_CHANGED: Indicates that the pan id will be change. + * + * @IEEE802154_AFILT_PANC_CHANGED: Indicates that the address filter will + * do frame address filtering as a pan coordinator. */ - -/* indicates that the Short Address changed */ -#define IEEE802154_AFILT_SADDR_CHANGED 0x00000001 -/* indicates that the IEEE Address changed */ -#define IEEE802154_AFILT_IEEEADDR_CHANGED 0x00000002 -/* indicates that the PAN ID changed */ -#define IEEE802154_AFILT_PANID_CHANGED 0x00000004 -/* indicates that PAN Coordinator status changed */ -#define IEEE802154_AFILT_PANC_CHANGED 0x00000008 +enum ieee802154_hw_addr_filt_flags { + IEEE802154_AFILT_SADDR_CHANGED = BIT(1), + IEEE802154_AFILT_IEEEADDR_CHANGED = BIT(2), + IEEE802154_AFILT_PANID_CHANGED = BIT(3), + IEEE802154_AFILT_PANC_CHANGED = BIT(4), +}; struct ieee802154_hw_addr_filt { __le16 pan_id; /* Each independent PAN selects a unique -- cgit v1.2.3 From f265be3d124a5b62e5a339685b6cfaa0292f1250 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 6 Jun 2015 17:30:48 +0200 Subject: mac802154: remove aack hw flag This patch removes the hardware auto acknowdledge flag which indicates that the transceiver supports this handling. This flag is never evaluated inside mac802154 and all transceivers should support this handling by default per hardware. Suggested-by: Lennert Buytenhek Cc: Alan Ott Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Acked-by: Stefan Schmidt Acked-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/at86rf230.c | 2 +- drivers/net/ieee802154/atusb.c | 2 +- drivers/net/ieee802154/cc2520.c | 3 +-- drivers/net/ieee802154/mrf24j40.c | 3 +-- include/net/mac802154.h | 2 -- 5 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 6e20e9c94b3c..1bf86d2df23f 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -1443,7 +1443,7 @@ at86rf230_detect_device(struct at86rf230_local *lp) return -EINVAL; } - lp->hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AACK | + lp->hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_CSMA_PARAMS | IEEE802154_HW_FRAME_RETRIES | IEEE802154_HW_AFILT | IEEE802154_HW_PROMISCUOUS; diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index 3d25678dfd95..80dfc725b8dc 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -628,7 +628,7 @@ static int atusb_probe(struct usb_interface *interface, hw->parent = &usb_dev->dev; hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AFILT | - IEEE802154_HW_AACK | IEEE802154_HW_PROMISCUOUS; + IEEE802154_HW_PROMISCUOUS; hw->phy->flags = WPAN_PHY_FLAG_TXPOWER; diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index a8bafd6bd5e4..15f263cfece5 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -746,8 +746,7 @@ static int cc2520_register(struct cc2520_private *priv) /* We do support only 2.4 Ghz */ priv->hw->phy->supported.channels[0] = 0x7FFF800; - priv->hw->flags = IEEE802154_HW_OMIT_CKSUM | IEEE802154_HW_AACK | - IEEE802154_HW_AFILT; + priv->hw->flags = IEEE802154_HW_OMIT_CKSUM | IEEE802154_HW_AFILT; priv->hw->phy->flags = WPAN_PHY_FLAG_TXPOWER; diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c index f2a1bd122a74..99c767695c7b 100644 --- a/drivers/net/ieee802154/mrf24j40.c +++ b/drivers/net/ieee802154/mrf24j40.c @@ -751,8 +751,7 @@ static int mrf24j40_probe(struct spi_device *spi) devrec->hw->priv = devrec; devrec->hw->parent = &devrec->spi->dev; devrec->hw->phy->supported.channels[0] = CHANNEL_MASK; - devrec->hw->flags = IEEE802154_HW_OMIT_CKSUM | IEEE802154_HW_AACK | - IEEE802154_HW_AFILT; + devrec->hw->flags = IEEE802154_HW_OMIT_CKSUM | IEEE802154_HW_AFILT; dev_dbg(printdev(devrec), "registered mrf24j40\n"); ret = ieee802154_register_hw(devrec->hw); diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 845e4f88a33e..a20ba28ad9c5 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -90,8 +90,6 @@ struct ieee802154_hw { /* Indicates that xmitter will add FCS on it's own. */ #define IEEE802154_HW_TX_OMIT_CKSUM 0x00000001 -/* Indicates that receiver will autorespond with ACK frames. */ -#define IEEE802154_HW_AACK 0x00000002 /* Indicates that transceiver will support listen before transmit. */ #define IEEE802154_HW_LBT 0x00000004 /* Indicates that transceiver will support csma (max_be, min_be, csma retries) -- cgit v1.2.3 From bcbfd2078d9b11277d9c9ce0c30ba73c750503c9 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 6 Jun 2015 17:30:49 +0200 Subject: mac802154: cleanup ieee802154 hardware flags This patch changes the ieee802154 hardware flags to enums and setting the flag values with the BIT macro. Additional this patch changes the commenting style for matching usual kernel style. Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 56 +++++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index a20ba28ad9c5..c21a700293b8 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -79,32 +79,48 @@ struct ieee802154_hw { struct wpan_phy *phy; }; -/* Checksum is in hardware and is omitted from a packet +/** + * enum ieee802154_hw_flags - hardware flags * - * These following flags are used to indicate hardware capabilities to + * These flags are used to indicate hardware capabilities to * the stack. Generally, flags here should have their meaning * done in a way that the simplest hardware doesn't need setting * any particular flags. There are some exceptions to this rule, * however, so you are advised to review these flags carefully. + * + * @IEEE802154_HW_TX_OMIT_CKSUM: Indicates that xmitter will add FCS on it's + * own. + * + * @IEEE802154_HW_LBT: Indicates that transceiver will support listen before + * transmit. + * + * @IEEE802154_HW_CSMA_PARAMS: Indicates that transceiver will support csma + * parameters (max_be, min_be, backoff exponents). + * + * @IEEE802154_HW_FRAME_RETRIES: Indicates that transceiver will support ARET + * frame retries setting. + * + * @IEEE802154_HW_AFILT: Indicates that transceiver will support hardware + * address filter setting. + * + * @IEEE802154_HW_PROMISCUOUS: Indicates that transceiver will support + * promiscuous mode setting. + * + * @IEEE802154_HW_RX_OMIT_CKSUM: Indicates that receiver omits FCS. + * + * @IEEE802154_HW_RX_DROP_BAD_CKSUM: Indicates that receiver will not filter + * frames with bad checksum. */ - -/* Indicates that xmitter will add FCS on it's own. */ -#define IEEE802154_HW_TX_OMIT_CKSUM 0x00000001 -/* Indicates that transceiver will support listen before transmit. */ -#define IEEE802154_HW_LBT 0x00000004 -/* Indicates that transceiver will support csma (max_be, min_be, csma retries) - * settings. */ -#define IEEE802154_HW_CSMA_PARAMS 0x00000008 -/* Indicates that transceiver will support ARET frame retries setting. */ -#define IEEE802154_HW_FRAME_RETRIES 0x00000010 -/* Indicates that transceiver will support hardware address filter setting. */ -#define IEEE802154_HW_AFILT 0x00000020 -/* Indicates that transceiver will support promiscuous mode setting. */ -#define IEEE802154_HW_PROMISCUOUS 0x00000040 -/* Indicates that receiver omits FCS. */ -#define IEEE802154_HW_RX_OMIT_CKSUM 0x00000080 -/* Indicates that receiver will not filter frames with bad checksum. */ -#define IEEE802154_HW_RX_DROP_BAD_CKSUM 0x00000100 +enum ieee802154_hw_flags { + IEEE802154_HW_TX_OMIT_CKSUM = BIT(1), + IEEE802154_HW_LBT = BIT(2), + IEEE802154_HW_CSMA_PARAMS = BIT(3), + IEEE802154_HW_FRAME_RETRIES = BIT(4), + IEEE802154_HW_AFILT = BIT(5), + IEEE802154_HW_PROMISCUOUS = BIT(6), + IEEE802154_HW_RX_OMIT_CKSUM = BIT(7), + IEEE802154_HW_RX_DROP_BAD_CKSUM = BIT(8), +}; /* Indicates that receiver omits FCS and xmitter will add FCS on it's own. */ #define IEEE802154_HW_OMIT_CKSUM (IEEE802154_HW_TX_OMIT_CKSUM | \ -- cgit v1.2.3 From 5661d431c6e65588e813c947117e6d18eb03422f Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 6 Jun 2015 17:30:50 +0200 Subject: mac802154: remove unused hw_filt attribute This patch removed an attribute from ieee802154_hw structure which is never used inside kernel. Address information are stored inside wpan_dev nowadays. Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index c21a700293b8..61d4b80bf766 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -74,7 +74,6 @@ struct ieee802154_hw { struct device *parent; /* filled by mac802154 core */ - struct ieee802154_hw_addr_filt hw_filt; void *priv; struct wpan_phy *phy; }; -- cgit v1.2.3 From af69a34548cb01aefef76aeb3565cebc7a9fb0d6 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 6 Jun 2015 17:30:51 +0200 Subject: mac802154: rearrange attribute in ieee802154_hw This patch removes the priv attribute in ieee802154_hw to the right section which is commented by attributes which needs to be filled by driver layer. Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 61d4b80bf766..d8e9e6f0e096 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -72,9 +72,9 @@ struct ieee802154_hw { int extra_tx_headroom; u32 flags; struct device *parent; + void *priv; /* filled by mac802154 core */ - void *priv; struct wpan_phy *phy; }; -- cgit v1.2.3 From a0825b03aed91dd25d31aaff1e6e85c322caf8b2 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 6 Jun 2015 17:30:52 +0200 Subject: mac802154: add missing structure comments This patch add missing comments to internal mac802154 structures. Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index d8e9e6f0e096..095c9d069968 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -55,18 +55,40 @@ enum ieee802154_hw_addr_filt_flags { IEEE802154_AFILT_PANC_CHANGED = BIT(4), }; +/** + * struct ieee802154_hw_addr_filt - hardware address filtering settings + * + * @pan_id: pan_id which should be set to the hardware address filter. + * + * @short_addr: short_addr which should be set to the hardware address filter. + * + * @ieee_addr: extended address which should be set to the hardware address + * filter. + * + * @pan_coord: boolean if hardware filtering should be operate as coordinator. + */ struct ieee802154_hw_addr_filt { - __le16 pan_id; /* Each independent PAN selects a unique - * identifier. This PAN id allows communication - * between devices within a network using short - * addresses and enables transmissions between - * devices across independent networks. - */ + __le16 pan_id; __le16 short_addr; __le64 ieee_addr; u8 pan_coord; }; +/** + * struct ieee802154_hw - ieee802154 hardware + * + * @extra_tx_headroom: headroom to reserve in each transmit skb for use by the + * driver (e.g. for transmit headers.) + * + * @flags: hardware flags, see &enum ieee802154_hw_flags + * + * @parent: parent device of the hardware. + * + * @priv: pointer to private area that was allocated for driver use along with + * this structure. + * + * @phy: This points to the &struct wpan_phy allocated for this 802.15.4 PHY. + */ struct ieee802154_hw { /* filled by the driver */ int extra_tx_headroom; -- cgit v1.2.3 From 623c1234a2da2235d6f0bcd09061b6f7776eee93 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 6 Jun 2015 17:30:53 +0200 Subject: mac802154: change pan_coord type to bool To indicate if it's a coordinator or not a bool is enough. There should no more values available which represent some other state. Signed-off-by: Alexander Aring Reviewed-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 095c9d069968..de1cdde92edc 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -71,7 +71,7 @@ struct ieee802154_hw_addr_filt { __le16 pan_id; __le16 short_addr; __le64 ieee_addr; - u8 pan_coord; + bool pan_coord; }; /** -- cgit v1.2.3 From d691f9e8d4405c334aa10d556e73c8bf44cb0e01 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 4 Jun 2015 10:11:54 -0700 Subject: bpf: allow programs to write to certain skb fields allow programs read/write skb->mark, tc_index fields and ((struct qdisc_skb_cb *)cb)->data. mark and tc_index are generically useful in TC. cb[0]-cb[4] are primarily used to pass arguments from one program to another called via bpf_tail_call() which can be seen in sockex3_kern.c example. All fields of 'struct __sk_buff' are readable to socket and tc_cls_act progs. mark, tc_index are writeable from tc_cls_act only. cb[0]-cb[4] are writeable by both sockets and tc_cls_act. Add verifier tests and improve sample code. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 +- include/uapi/linux/bpf.h | 2 + kernel/bpf/verifier.c | 37 +++++++++++++----- net/core/filter.c | 94 +++++++++++++++++++++++++++++++++++++++------ samples/bpf/sockex3_kern.c | 35 ++++++----------- samples/bpf/test_verifier.c | 84 +++++++++++++++++++++++++++++++++++++++- 6 files changed, 207 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ca854e5bb2f7..2235aee8096a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -105,7 +105,8 @@ struct bpf_verifier_ops { */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type); - u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off, + u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, + int src_reg, int ctx_off, struct bpf_insn *insn); }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 42aa19abab86..602f05b7a275 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -248,6 +248,8 @@ struct __sk_buff { __u32 priority; __u32 ingress_ifindex; __u32 ifindex; + __u32 tc_index; + __u32 cb[5]; }; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index cfd9a40b9a5a..039d866fd36a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1692,6 +1692,8 @@ static int do_check(struct verifier_env *env) } } else if (class == BPF_STX) { + enum bpf_reg_type dst_reg_type; + if (BPF_MODE(insn->code) == BPF_XADD) { err = check_xadd(env, insn); if (err) @@ -1700,11 +1702,6 @@ static int do_check(struct verifier_env *env) continue; } - if (BPF_MODE(insn->code) != BPF_MEM || - insn->imm != 0) { - verbose("BPF_STX uses reserved fields\n"); - return -EINVAL; - } /* check src1 operand */ err = check_reg_arg(regs, insn->src_reg, SRC_OP); if (err) @@ -1714,6 +1711,8 @@ static int do_check(struct verifier_env *env) if (err) return err; + dst_reg_type = regs[insn->dst_reg].type; + /* check that memory (dst_reg + off) is writeable */ err = check_mem_access(env, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, @@ -1721,6 +1720,15 @@ static int do_check(struct verifier_env *env) if (err) return err; + if (insn->imm == 0) { + insn->imm = dst_reg_type; + } else if (dst_reg_type != insn->imm && + (dst_reg_type == PTR_TO_CTX || + insn->imm == PTR_TO_CTX)) { + verbose("same insn cannot be used with different pointers\n"); + return -EINVAL; + } + } else if (class == BPF_ST) { if (BPF_MODE(insn->code) != BPF_MEM || insn->src_reg != BPF_REG_0) { @@ -1839,12 +1847,18 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { if (BPF_CLASS(insn->code) == BPF_LDX && - (BPF_MODE(insn->code) != BPF_MEM || - insn->imm != 0)) { + (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) { verbose("BPF_LDX uses reserved fields\n"); return -EINVAL; } + if (BPF_CLASS(insn->code) == BPF_STX && + ((BPF_MODE(insn->code) != BPF_MEM && + BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) { + verbose("BPF_STX uses reserved fields\n"); + return -EINVAL; + } + if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { struct bpf_map *map; struct fd f; @@ -1967,12 +1981,17 @@ static int convert_ctx_accesses(struct verifier_env *env) struct bpf_prog *new_prog; u32 cnt; int i; + enum bpf_access_type type; if (!env->prog->aux->ops->convert_ctx_access) return 0; for (i = 0; i < insn_cnt; i++, insn++) { - if (insn->code != (BPF_LDX | BPF_MEM | BPF_W)) + if (insn->code == (BPF_LDX | BPF_MEM | BPF_W)) + type = BPF_READ; + else if (insn->code == (BPF_STX | BPF_MEM | BPF_W)) + type = BPF_WRITE; + else continue; if (insn->imm != PTR_TO_CTX) { @@ -1982,7 +2001,7 @@ static int convert_ctx_accesses(struct verifier_env *env) } cnt = env->prog->aux->ops-> - convert_ctx_access(insn->dst_reg, insn->src_reg, + convert_ctx_access(type, insn->dst_reg, insn->src_reg, insn->off, insn_buf); if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { verbose("bpf verifier is misconfigured\n"); diff --git a/net/core/filter.c b/net/core/filter.c index 36a69e33d76b..d271c06bf01f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -46,6 +46,7 @@ #include #include #include +#include /** * sk_filter - run a packet through a socket filter @@ -1463,13 +1464,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) } } -static bool sk_filter_is_valid_access(int off, int size, - enum bpf_access_type type) +static bool __is_valid_access(int off, int size, enum bpf_access_type type) { - /* only read is allowed */ - if (type != BPF_READ) - return false; - /* check bounds */ if (off < 0 || off >= sizeof(struct __sk_buff)) return false; @@ -1485,8 +1481,42 @@ static bool sk_filter_is_valid_access(int off, int size, return true; } -static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, - struct bpf_insn *insn_buf) +static bool sk_filter_is_valid_access(int off, int size, + enum bpf_access_type type) +{ + if (type == BPF_WRITE) { + switch (off) { + case offsetof(struct __sk_buff, cb[0]) ... + offsetof(struct __sk_buff, cb[4]): + break; + default: + return false; + } + } + + return __is_valid_access(off, size, type); +} + +static bool tc_cls_act_is_valid_access(int off, int size, + enum bpf_access_type type) +{ + if (type == BPF_WRITE) { + switch (off) { + case offsetof(struct __sk_buff, mark): + case offsetof(struct __sk_buff, tc_index): + case offsetof(struct __sk_buff, cb[0]) ... + offsetof(struct __sk_buff, cb[4]): + break; + default: + return false; + } + } + return __is_valid_access(off, size, type); +} + +static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, + int src_reg, int ctx_off, + struct bpf_insn *insn_buf) { struct bpf_insn *insn = insn_buf; @@ -1538,7 +1568,15 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, break; case offsetof(struct __sk_buff, mark): - return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn); + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, mark)); + else + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, mark)); + break; case offsetof(struct __sk_buff, pkt_type): return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn); @@ -1553,6 +1591,38 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, case offsetof(struct __sk_buff, vlan_tci): return convert_skb_access(SKF_AD_VLAN_TAG, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, cb[0]) ... + offsetof(struct __sk_buff, cb[4]): + BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); + + ctx_off -= offsetof(struct __sk_buff, cb[0]); + ctx_off += offsetof(struct sk_buff, cb); + ctx_off += offsetof(struct qdisc_skb_cb, data); + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, ctx_off); + else + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off); + break; + + case offsetof(struct __sk_buff, tc_index): +#ifdef CONFIG_NET_SCHED + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, tc_index)); + else + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, tc_index)); + break; +#else + if (type == BPF_WRITE) + *insn++ = BPF_MOV64_REG(dst_reg, dst_reg); + else + *insn++ = BPF_MOV64_IMM(dst_reg, 0); + break; +#endif } return insn - insn_buf; @@ -1561,13 +1631,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, static const struct bpf_verifier_ops sk_filter_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, - .convert_ctx_access = sk_filter_convert_ctx_access, + .convert_ctx_access = bpf_net_convert_ctx_access, }; static const struct bpf_verifier_ops tc_cls_act_ops = { .get_func_proto = tc_cls_act_func_proto, - .is_valid_access = sk_filter_is_valid_access, - .convert_ctx_access = sk_filter_convert_ctx_access, + .is_valid_access = tc_cls_act_is_valid_access, + .convert_ctx_access = bpf_net_convert_ctx_access, }; static struct bpf_prog_type_list sk_filter_type __read_mostly = { diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c index 2625b987944f..41ae2fd21b13 100644 --- a/samples/bpf/sockex3_kern.c +++ b/samples/bpf/sockex3_kern.c @@ -89,7 +89,6 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) struct globals { struct flow_keys flow; - __u32 nhoff; }; struct bpf_map_def SEC("maps") percpu_map = { @@ -139,7 +138,7 @@ static void update_stats(struct __sk_buff *skb, struct globals *g) static __always_inline void parse_ip_proto(struct __sk_buff *skb, struct globals *g, __u32 ip_proto) { - __u32 nhoff = g->nhoff; + __u32 nhoff = skb->cb[0]; int poff; switch (ip_proto) { @@ -165,7 +164,7 @@ static __always_inline void parse_ip_proto(struct __sk_buff *skb, if (gre_flags & GRE_SEQ) nhoff += 4; - g->nhoff = nhoff; + skb->cb[0] = nhoff; parse_eth_proto(skb, gre_proto); break; } @@ -195,7 +194,7 @@ PROG(PARSE_IP)(struct __sk_buff *skb) if (!g) return 0; - nhoff = g->nhoff; + nhoff = skb->cb[0]; if (unlikely(ip_is_fragment(skb, nhoff))) return 0; @@ -210,7 +209,7 @@ PROG(PARSE_IP)(struct __sk_buff *skb) verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); nhoff += (verlen & 0xF) << 2; - g->nhoff = nhoff; + skb->cb[0] = nhoff; parse_ip_proto(skb, g, ip_proto); return 0; } @@ -223,7 +222,7 @@ PROG(PARSE_IPV6)(struct __sk_buff *skb) if (!g) return 0; - nhoff = g->nhoff; + nhoff = skb->cb[0]; ip_proto = load_byte(skb, nhoff + offsetof(struct ipv6hdr, nexthdr)); @@ -233,25 +232,21 @@ PROG(PARSE_IPV6)(struct __sk_buff *skb) nhoff + offsetof(struct ipv6hdr, daddr)); nhoff += sizeof(struct ipv6hdr); - g->nhoff = nhoff; + skb->cb[0] = nhoff; parse_ip_proto(skb, g, ip_proto); return 0; } PROG(PARSE_VLAN)(struct __sk_buff *skb) { - struct globals *g = this_cpu_globals(); __u32 nhoff, proto; - if (!g) - return 0; - - nhoff = g->nhoff; + nhoff = skb->cb[0]; proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, h_vlan_encapsulated_proto)); nhoff += sizeof(struct vlan_hdr); - g->nhoff = nhoff; + skb->cb[0] = nhoff; parse_eth_proto(skb, proto); @@ -260,17 +255,13 @@ PROG(PARSE_VLAN)(struct __sk_buff *skb) PROG(PARSE_MPLS)(struct __sk_buff *skb) { - struct globals *g = this_cpu_globals(); __u32 nhoff, label; - if (!g) - return 0; - - nhoff = g->nhoff; + nhoff = skb->cb[0]; label = load_word(skb, nhoff); nhoff += sizeof(struct mpls_label); - g->nhoff = nhoff; + skb->cb[0] = nhoff; if (label & MPLS_LS_S_MASK) { __u8 verlen = load_byte(skb, nhoff); @@ -288,14 +279,10 @@ PROG(PARSE_MPLS)(struct __sk_buff *skb) SEC("socket/0") int main_prog(struct __sk_buff *skb) { - struct globals *g = this_cpu_globals(); __u32 nhoff = ETH_HLEN; __u32 proto = load_half(skb, 12); - if (!g) - return 0; - - g->nhoff = nhoff; + skb->cb[0] = nhoff; parse_eth_proto(skb, proto); return 0; } diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index 12f3780af73f..693605997abc 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -29,6 +29,7 @@ struct bpf_test { ACCEPT, REJECT } result; + enum bpf_prog_type prog_type; }; static struct bpf_test tests[] = { @@ -743,6 +744,84 @@ static struct bpf_test tests[] = { .errstr = "different pointers", .result = REJECT, }, + { + "check skb->mark is not writeable by sockets", + .insns = { + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check skb->tc_index is not writeable by sockets", + .insns = { + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, tc_index)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check non-u32 access to cb", + .insns = { + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check out of range skb->cb access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[60])), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_ACT, + }, + { + "write skb fields from socket prog", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4])), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_index)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, cb[2])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "write skb fields from tc_cls_act prog", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_index)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tc_index)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, }; static int probe_filter_length(struct bpf_insn *fp) @@ -775,6 +854,7 @@ static int test(void) for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_insn *prog = tests[i].insns; + int prog_type = tests[i].prog_type; int prog_len = probe_filter_length(prog); int *fixup = tests[i].fixup; int map_fd = -1; @@ -789,8 +869,8 @@ static int test(void) } printf("#%d %s ", i, tests[i].descr); - prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, - prog_len * sizeof(struct bpf_insn), + prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER, + prog, prog_len * sizeof(struct bpf_insn), "GPL", 0); if (tests[i].result == ACCEPT) { -- cgit v1.2.3 From b80c0e78582d4a3a004dc1ade4eb06babc6a4eea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Jun 2015 18:30:43 -0700 Subject: tcp: get_cookie_sock() consolidation IPv4 and IPv6 share same implementation of get_cookie_sock(), and there is no point inlining it. We add tcp_ prefix to the common helper name and export it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +++ net/ipv4/syncookies.c | 10 +++++----- net/ipv6/syncookies.c | 19 +------------------ 3 files changed, 9 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 2bb2bad21d5c..978cebedd3fc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -469,6 +469,9 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size); void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ +struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst); int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, u32 cookie); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index df849e5a10f1..d70b1f603692 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -219,9 +219,9 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, } EXPORT_SYMBOL_GPL(__cookie_v4_check); -static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst) +struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst) { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; @@ -235,7 +235,7 @@ static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, } return child; } - +EXPORT_SYMBOL(tcp_get_cookie_sock); /* * when syncookies are in effect and tcp timestamps are enabled we stored @@ -391,7 +391,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst); - ret = get_cookie_sock(sk, skb, req, &rt->dst); + ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst); /* ip_queue_xmit() depends on our flow being setup * Normal sockets get it right from inet_csk_route_child_sock() */ diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 21bc2eb53c57..0909f4e0d53c 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -41,23 +41,6 @@ static __u16 const msstab[] = { 9000 - 60, }; -static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - struct sock *child; - - child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); - if (child) { - atomic_set(&req->rsk_refcnt, 1); - inet_csk_reqsk_queue_add(sk, req, child); - } else { - reqsk_free(req); - } - return child; -} - static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv6_cookie_scratch); @@ -264,7 +247,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst); - ret = get_cookie_sock(sk, skb, req, dst); + ret = tcp_get_cookie_sock(sk, skb, req, dst); out: return ret; out_free: -- cgit v1.2.3 From 7cf7fa529d0b6b514949cc67b39e3ce406c37006 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Sun, 7 Jun 2015 15:44:23 +0300 Subject: net/mlx5_core: Fix static checker warnings around system guid query flow Fix static checker warnings in the flow of system guid query. Fixes: 707c4602cda6 ('net/mlx5_core: Add new query HCA vport commands') Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 2 +- include/linux/mlx5/vport.h | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index ba87442ef776..9335e5ae18cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -35,7 +35,8 @@ #include #include "mlx5_core.h" -int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out, int outlen) +static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out, + int outlen) { u32 in[MLX5_ST_SZ_DW(query_adapter_in)]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 20150ffa8b16..b94177ebcf3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -307,7 +307,7 @@ ex: EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_context); int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, - __be64 *sys_image_guid) + u64 *sys_image_guid) { struct mlx5_hca_vport_context *rep; int err; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 67882a834efb..967e0fd06e89 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -48,7 +48,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, u16 vf_num, struct mlx5_hca_vport_context *rep); int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, - __be64 *sys_image_guid); + u64 *sys_image_guid); int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, u64 *node_guid); -- cgit v1.2.3 From a8077d6573530a91d5674a28cdedbed39c391ff0 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Sun, 7 Jun 2015 13:15:30 +0200 Subject: bcma: make calls to PCI hostmode functions config-safe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma_driver_pci.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 5ba6918ca20b..9657f11d48a7 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -246,7 +246,18 @@ static inline void bcma_core_pci_power_save(struct bcma_bus *bus, bool up) } #endif +#ifdef CONFIG_BCMA_DRIVER_PCI_HOSTMODE extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev); extern int bcma_core_pci_plat_dev_init(struct pci_dev *dev); +#else +static inline int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev) +{ + return -ENOTSUPP; +} +static inline int bcma_core_pci_plat_dev_init(struct pci_dev *dev) +{ + return -ENOTSUPP; +} +#endif #endif /* LINUX_BCMA_DRIVER_PCI_H_ */ -- cgit v1.2.3 From b6355e972aaab0173ce11a1650e7dba67f820918 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Sat, 6 Jun 2015 13:16:37 +0200 Subject: NFC: nci: Handle proprietary response and notifications Allow for drivers to explicitly define handlers for each proprietary notifications and responses they expect to support. Reviewed-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/net/nfc/nci_core.h | 14 +++++++++++++ net/nfc/nci/core.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++ net/nfc/nci/ntf.c | 10 +++++++++ net/nfc/nci/rsp.c | 10 +++++++++ 4 files changed, 86 insertions(+) (limited to 'include') diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index d4dcc7199fd7..c49688c09853 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -66,6 +66,12 @@ enum nci_state { struct nci_dev; +struct nci_prop_ops { + __u16 opcode; + int (*rsp)(struct nci_dev *dev, struct sk_buff *skb); + int (*ntf)(struct nci_dev *dev, struct sk_buff *skb); +}; + struct nci_ops { int (*open)(struct nci_dev *ndev); int (*close)(struct nci_dev *ndev); @@ -84,12 +90,16 @@ struct nci_ops { struct sk_buff *skb); void (*hci_cmd_received)(struct nci_dev *ndev, u8 pipe, u8 cmd, struct sk_buff *skb); + + struct nci_prop_ops *prop_ops; + size_t n_prop_ops; }; #define NCI_MAX_SUPPORTED_RF_INTERFACES 4 #define NCI_MAX_DISCOVERED_TARGETS 10 #define NCI_MAX_NUM_NFCEE 255 #define NCI_MAX_CONN_ID 7 +#define NCI_MAX_PROPRIETARY_CMD 64 struct nci_conn_info { struct list_head list; @@ -320,6 +330,10 @@ static inline void *nci_get_drvdata(struct nci_dev *ndev) void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb); void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb); +int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 opcode, + struct sk_buff *skb); +int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 opcode, + struct sk_buff *skb); void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb); int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload); int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 49ff32106080..56d57c93ea1a 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -28,6 +28,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ #include +#include #include #include #include @@ -961,6 +962,14 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, return NULL; ndev->ops = ops; + + if (ops->n_prop_ops > NCI_MAX_PROPRIETARY_CMD) { + pr_err("Too many proprietary commands: %zd\n", + ops->n_prop_ops); + ops->prop_ops = NULL; + ops->n_prop_ops = 0; + } + ndev->tx_headroom = tx_headroom; ndev->tx_tailroom = tx_tailroom; init_completion(&ndev->req_completion); @@ -1165,6 +1174,49 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) return 0; } +/* Proprietary commands API */ +static struct nci_prop_ops *prop_cmd_lookup(struct nci_dev *ndev, + __u16 opcode) +{ + size_t i; + struct nci_prop_ops *prop_op; + + if (!ndev->ops->prop_ops || !ndev->ops->n_prop_ops) + return NULL; + + for (i = 0; i < ndev->ops->n_prop_ops; i++) { + prop_op = &ndev->ops->prop_ops[i]; + if (prop_op->opcode == opcode) + return prop_op; + } + + return NULL; +} + +int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode, + struct sk_buff *skb) +{ + struct nci_prop_ops *prop_op; + + prop_op = prop_cmd_lookup(ndev, rsp_opcode); + if (!prop_op || !prop_op->rsp) + return -ENOTSUPP; + + return prop_op->rsp(ndev, skb); +} + +int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode, + struct sk_buff *skb) +{ + struct nci_prop_ops *prop_op; + + prop_op = prop_cmd_lookup(ndev, ntf_opcode); + if (!prop_op || !prop_op->ntf) + return -ENOTSUPP; + + return prop_op->ntf(ndev, skb); +} + /* ---- NCI TX Data worker thread ---- */ static void nci_tx_work(struct work_struct *work) diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 3218071072ac..5d1c2e391c56 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -758,6 +758,15 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) /* strip the nci control header */ skb_pull(skb, NCI_CTRL_HDR_SIZE); + if (nci_opcode_gid(ntf_opcode) == NCI_GID_PROPRIETARY) { + if (nci_prop_ntf_packet(ndev, ntf_opcode, skb)) { + pr_err("unsupported ntf opcode 0x%x\n", + ntf_opcode); + } + + goto end; + } + switch (ntf_opcode) { case NCI_OP_CORE_CONN_CREDITS_NTF: nci_core_conn_credits_ntf_packet(ndev, skb); @@ -796,5 +805,6 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) break; } +end: kfree_skb(skb); } diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c index 02486bc2ceea..408bd8f857ab 100644 --- a/net/nfc/nci/rsp.c +++ b/net/nfc/nci/rsp.c @@ -296,6 +296,15 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) /* strip the nci control header */ skb_pull(skb, NCI_CTRL_HDR_SIZE); + if (nci_opcode_gid(rsp_opcode) == NCI_GID_PROPRIETARY) { + if (nci_prop_rsp_packet(ndev, rsp_opcode, skb) == -ENOTSUPP) { + pr_err("unsupported rsp opcode 0x%x\n", + rsp_opcode); + } + + goto end; + } + switch (rsp_opcode) { case NCI_OP_CORE_RESET_RSP: nci_core_reset_rsp_packet(ndev, skb); @@ -346,6 +355,7 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) break; } +end: kfree_skb(skb); /* trigger the next cmd */ -- cgit v1.2.3 From c39daeee50eb0b95d3b91bda21b77955a459ee5f Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sat, 6 Jun 2015 13:16:40 +0200 Subject: NFC: nci: Add nci init ops for early device initialization Some device may need to execute some proprietary commands in order to "wake-up"; Before the nci state initialization. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/net/nfc/nci_core.h | 1 + net/nfc/nci/core.c | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index c49688c09853..886854a4ea91 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -73,6 +73,7 @@ struct nci_prop_ops { }; struct nci_ops { + int (*init)(struct nci_dev *ndev); int (*open)(struct nci_dev *ndev); int (*close)(struct nci_dev *ndev); int (*send)(struct nci_dev *ndev, struct sk_buff *skb); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index b900e6a2a284..458e58bb9cb1 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -344,8 +344,13 @@ static int nci_open_device(struct nci_dev *ndev) set_bit(NCI_INIT, &ndev->flags); - rc = __nci_request(ndev, nci_reset_req, 0, - msecs_to_jiffies(NCI_RESET_TIMEOUT)); + if (ndev->ops->init) + rc = ndev->ops->init(ndev); + + if (!rc) { + rc = __nci_request(ndev, nci_reset_req, 0, + msecs_to_jiffies(NCI_RESET_TIMEOUT)); + } if (!rc && ndev->ops->setup) { rc = ndev->ops->setup(ndev); -- cgit v1.2.3 From 759afb8d288ffbe9a1cdb20af037b5c072dc38b2 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sat, 6 Jun 2015 13:16:41 +0200 Subject: NFC: nci: Add nci_prop_cmd allowing to send proprietary nci cmd Handle allowing to send proprietary nci commands anywhere in the nci state machine. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/net/nfc/nci_core.h | 2 ++ net/nfc/nci/core.c | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 886854a4ea91..98f18a20dc77 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -275,6 +275,8 @@ int nci_request(struct nci_dev *ndev, void (*req)(struct nci_dev *ndev, unsigned long opt), unsigned long opt, __u32 timeout); +int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload); + int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb); int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 458e58bb9cb1..b5dc15044466 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -324,6 +324,32 @@ static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt) sizeof(struct nci_rf_deactivate_cmd), &cmd); } +struct nci_prop_cmd_param { + __u16 opcode; + size_t len; + __u8 *payload; +}; + +static void nci_prop_cmd_req(struct nci_dev *ndev, unsigned long opt) +{ + struct nci_prop_cmd_param *param = (struct nci_prop_cmd_param *)opt; + + nci_send_cmd(ndev, param->opcode, param->len, param->payload); +} + +int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload) +{ + struct nci_prop_cmd_param param; + + param.opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, oid); + param.len = len; + param.payload = payload; + + return __nci_request(ndev, nci_prop_cmd_req, (unsigned long)¶m, + msecs_to_jiffies(NCI_CMD_TIMEOUT)); +} +EXPORT_SYMBOL(nci_prop_cmd); + static int nci_open_device(struct nci_dev *ndev) { int rc = 0; -- cgit v1.2.3 From 8115dd5905318afcde713726064ec052b7d488cf Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 14 Oct 2014 01:42:23 +0200 Subject: NFC: Introduce vendor commands structures Together with inline routines to associate a vendor commands array with an NFC device. Vendor commands allow vendors to implement their very specific operations from driver code instead of adding new stack ops for non NFC generic commands. Vendors need to select their own unique IDs and use that as a namespace for defining sub commands. Signed-off-by: Samuel Ortiz --- include/net/nfc/hci.h | 7 +++++++ include/net/nfc/nci_core.h | 7 +++++++ include/net/nfc/nfc.h | 22 ++++++++++++++++++++++ 3 files changed, 36 insertions(+) (limited to 'include') diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index 020a814bc8ed..316694dafa5b 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -179,6 +179,13 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev); void nfc_hci_set_clientdata(struct nfc_hci_dev *hdev, void *clientdata); void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev); +static inline int nfc_hci_set_vendor_cmds(struct nfc_hci_dev *hdev, + struct nfc_vendor_cmd *cmds, + int n_cmds) +{ + return nfc_set_vendor_cmds(hdev->ndev, cmds, n_cmds); +} + void nfc_hci_driver_failure(struct nfc_hci_dev *hdev, int err); int nfc_hci_result_to_errno(u8 result); diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 98f18a20dc77..9d77ed556b78 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -331,6 +331,13 @@ static inline void *nci_get_drvdata(struct nci_dev *ndev) return ndev->driver_data; } +static inline int nci_set_vendor_cmds(struct nci_dev *ndev, + struct nfc_vendor_cmd *cmds, + int n_cmds) +{ + return nfc_set_vendor_cmds(ndev->nfc_dev, cmds, n_cmds); +} + void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb); void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb); int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 opcode, diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 7ac029c07546..f9e58ae45f9c 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -165,6 +165,12 @@ struct nfc_genl_data { struct mutex genl_data_mutex; }; +struct nfc_vendor_cmd { + __u32 vendor_id; + __u32 subcmd; + int (*doit)(struct nfc_dev *dev, void *data, size_t data_len); +}; + struct nfc_dev { int idx; u32 target_next_idx; @@ -193,6 +199,9 @@ struct nfc_dev { struct rfkill *rfkill; + struct nfc_vendor_cmd *vendor_cmds; + int n_vendor_cmds; + struct nfc_ops *ops; }; #define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev) @@ -296,4 +305,17 @@ struct nfc_se *nfc_find_se(struct nfc_dev *dev, u32 se_idx); void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb, u8 payload_type, u8 direction); +static inline int nfc_set_vendor_cmds(struct nfc_dev *dev, + struct nfc_vendor_cmd *cmds, + int n_cmds) +{ + if (dev->vendor_cmds || dev->n_vendor_cmds) + return -EINVAL; + + dev->vendor_cmds = cmds; + dev->n_vendor_cmds = n_cmds; + + return 0; +} + #endif /* __NET_NFC_H */ -- cgit v1.2.3 From 9e58095f9660f88d6a2febe87d5073a6b2e9c399 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 14 Oct 2014 02:19:46 +0200 Subject: NFC: netlink: Implement vendor command support Vendor commands are passed from userspace through the NFC_CMD_VENDOR netlink command, allowing driver and hardware specific operations implementations like for example RF tuning or production line calibration. Drivers will associate a set of vendor commands to a vendor id, which could typically be an OUI. The netlink kernel implementation will try to match the received vendor id and sub command attributes with the registered ones. When such match is found, the driver defined sub command routine is called. Signed-off-by: Samuel Ortiz --- include/uapi/linux/nfc.h | 10 +++++++++ net/nfc/netlink.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index c1e2e63cf9b5..dd3f75389076 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -86,6 +86,8 @@ * for this event is the application ID (AID). * @NFC_CMD_GET_SE: Dump all discovered secure elements from an NFC controller. * @NFC_CMD_SE_IO: Send/Receive APDUs to/from the selected secure element. + * @NFC_CMD_VENDOR: Vendor specific command, to be implemented directly + * from the driver in order to support hardware specific operations. */ enum nfc_commands { NFC_CMD_UNSPEC, @@ -117,6 +119,7 @@ enum nfc_commands { NFC_CMD_GET_SE, NFC_CMD_SE_IO, NFC_CMD_ACTIVATE_TARGET, + NFC_CMD_VENDOR, /* private: internal use only */ __NFC_CMD_AFTER_LAST }; @@ -153,6 +156,10 @@ enum nfc_commands { * @NFC_ATTR_APDU: Secure element APDU * @NFC_ATTR_TARGET_ISO15693_DSFID: ISO 15693 Data Storage Format Identifier * @NFC_ATTR_TARGET_ISO15693_UID: ISO 15693 Unique Identifier + * @NFC_ATTR_VENDOR_ID: NFC manufacturer unique ID, typically an OUI + * @NFC_ATTR_VENDOR_SUBCMD: Vendor specific sub command + * @NFC_ATTR_VENDOR_DATA: Vendor specific data, to be optionally passed + * to a vendor specific command implementation */ enum nfc_attrs { NFC_ATTR_UNSPEC, @@ -184,6 +191,9 @@ enum nfc_attrs { NFC_ATTR_TARGET_ISO15693_DSFID, NFC_ATTR_TARGET_ISO15693_UID, NFC_ATTR_SE_PARAMS, + NFC_ATTR_VENDOR_ID, + NFC_ATTR_VENDOR_SUBCMD, + NFC_ATTR_VENDOR_DATA, /* private: internal use only */ __NFC_ATTR_AFTER_LAST }; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 3763036710ae..f85f37ed19b2 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -5,6 +5,12 @@ * Lauro Ramos Venancio * Aloisio Almeida Jr * + * Vendor commands implementation based on net/wireless/nl80211.c + * which is: + * + * Copyright 2006-2010 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -1489,6 +1495,50 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info) return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx); } +static int nfc_genl_vendor_cmd(struct sk_buff *skb, + struct genl_info *info) +{ + struct nfc_dev *dev; + struct nfc_vendor_cmd *cmd; + u32 dev_idx, vid, subcmd; + u8 *data; + size_t data_len; + int i; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_VENDOR_ID] || + !info->attrs[NFC_ATTR_VENDOR_SUBCMD]) + return -EINVAL; + + dev_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + vid = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_ID]); + subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]); + + dev = nfc_get_device(dev_idx); + if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds) + return -ENODEV; + + data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]); + if (data) { + data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]); + if (data_len == 0) + return -EINVAL; + } else { + data_len = 0; + } + + for (i = 0; i < dev->n_vendor_cmds; i++) { + cmd = &dev->vendor_cmds[i]; + + if (cmd->vendor_id != vid || cmd->subcmd != subcmd) + continue; + + return cmd->doit(dev, data, data_len); + } + + return -EOPNOTSUPP; +} + static const struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, @@ -1579,6 +1629,11 @@ static const struct genl_ops nfc_genl_ops[] = { .doit = nfc_genl_activate_target, .policy = nfc_genl_policy, }, + { + .cmd = NFC_CMD_VENDOR, + .doit = nfc_genl_vendor_cmd, + .policy = nfc_genl_policy, + }, }; -- cgit v1.2.3 From 8b76ce34c43a569f981623485c1b6c700594678e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 8 Jun 2015 18:14:39 +0300 Subject: Bluetooth: Fix encryption key size handling for LTKs The encryption key size for LTKs is supposed to be applied only at the moment of encryption. When generating a Link Key (using LE SC) from the LTK the full non-shortened value should be used. This patch modifies the code to always keep the full value around and only apply the key size when passing the value to HCI. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_conn.c | 4 ++-- net/bluetooth/hci_event.c | 3 ++- net/bluetooth/smp.c | 15 +++------------ 4 files changed, 8 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a056c2bfeb81..24c0e4577a93 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1408,7 +1408,7 @@ void mgmt_smp_complete(struct hci_conn *conn, bool complete); u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, - __u8 ltk[16]); + __u8 ltk[16], __u8 key_size); void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *bdaddr_type); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index ee5e59839b02..2c48bf0b5afb 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -276,7 +276,7 @@ u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, } void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, - __u8 ltk[16]) + __u8 ltk[16], __u8 key_size) { struct hci_dev *hdev = conn->hdev; struct hci_cp_le_start_enc cp; @@ -288,7 +288,7 @@ void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, cp.handle = cpu_to_le16(conn->handle); cp.rand = rand; cp.ediv = ediv; - memcpy(cp.ltk, ltk, sizeof(cp.ltk)); + memcpy(cp.ltk, ltk, key_size); hci_send_cmd(hdev, HCI_OP_LE_START_ENC, sizeof(cp), &cp); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7b61be73650f..fcbfa4138eb1 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4955,7 +4955,8 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb) goto not_found; } - memcpy(cp.ltk, ltk->val, sizeof(ltk->val)); + memcpy(cp.ltk, ltk->val, ltk->enc_size); + memset(cp.ltk + ltk->enc_size, 0, sizeof(cp.ltk) - ltk->enc_size); cp.handle = cpu_to_le16(conn->handle); conn->pending_sec_level = smp_ltk_sec_level(ltk); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 659371af39e4..3921cba056d3 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -997,13 +997,10 @@ static u8 smp_random(struct smp_chan *smp) smp_s1(smp->tfm_aes, smp->tk, smp->rrnd, smp->prnd, stk); - memset(stk + smp->enc_key_size, 0, - SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size); - if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags)) return SMP_UNSPECIFIED; - hci_le_start_enc(hcon, ediv, rand, stk); + hci_le_start_enc(hcon, ediv, rand, stk, smp->enc_key_size); hcon->enc_key_size = smp->enc_key_size; set_bit(HCI_CONN_STK_ENCRYPT, &hcon->flags); } else { @@ -1016,9 +1013,6 @@ static u8 smp_random(struct smp_chan *smp) smp_s1(smp->tfm_aes, smp->tk, smp->prnd, smp->rrnd, stk); - memset(stk + smp->enc_key_size, 0, - SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size); - if (hcon->pending_sec_level == BT_SECURITY_HIGH) auth = 1; else @@ -1156,9 +1150,6 @@ static void sc_add_ltk(struct smp_chan *smp) else auth = 0; - memset(smp->tk + smp->enc_key_size, 0, - SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size); - smp->ltk = hci_add_ltk(hcon->hdev, &hcon->dst, hcon->dst_type, key_type, auth, smp->tk, smp->enc_key_size, 0, 0); @@ -2202,7 +2193,7 @@ static bool smp_ltk_encrypt(struct l2cap_conn *conn, u8 sec_level) if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags)) return true; - hci_le_start_enc(hcon, key->ediv, key->rand, key->val); + hci_le_start_enc(hcon, key->ediv, key->rand, key->val, key->enc_size); hcon->enc_key_size = key->enc_size; /* We never store STKs for master role, so clear this flag */ @@ -2750,7 +2741,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) sc_add_ltk(smp); if (hcon->out) { - hci_le_start_enc(hcon, 0, 0, smp->tk); + hci_le_start_enc(hcon, 0, 0, smp->tk, smp->enc_key_size); hcon->enc_key_size = smp->enc_key_size; } -- cgit v1.2.3 From dd895d7f21b244e7fd4c7477697e274de7e44ecb Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 9 Jun 2015 08:05:10 +0200 Subject: can: cangw: introduce optional uid to reference created routing jobs Similar to referencing iptables rules by their line number this UID allows to reference created routing jobs, e.g. to alter configured data modifications. The UID is an optional non-zero value which can be provided at routing job creation time. When the UID is set the UID replaces the data modification configuration as job identification attribute e.g. at job removal time. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/gw.h | 5 ++++ net/can/gw.c | 68 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 61 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h index 3e6184cf2f6d..5079b9d57e31 100644 --- a/include/uapi/linux/can/gw.h +++ b/include/uapi/linux/can/gw.h @@ -78,6 +78,7 @@ enum { CGW_FILTER, /* specify struct can_filter on source CAN device */ CGW_DELETED, /* number of deleted CAN frames (see max_hops param) */ CGW_LIM_HOPS, /* limit the number of hops of this specific rule */ + CGW_MOD_UID, /* user defined identifier for modification updates */ __CGW_MAX }; @@ -162,6 +163,10 @@ enum { * load time of the can-gw module). This value is used to reduce the number of * possible hops for this gateway rule to a value smaller then max_hops. * + * CGW_MOD_UID (length 4 bytes): + * Optional non-zero user defined routing job identifier to alter existing + * modification settings at runtime. + * * CGW_CS_XOR (length 4 bytes): * Set a simple XOR checksum starting with an initial value into * data[result-idx] using data[start-idx] .. data[end-idx] diff --git a/net/can/gw.c b/net/can/gw.c index a6f448e18ea8..455168718c2e 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -110,6 +110,7 @@ struct cf_mod { void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor); void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8); } csumfunc; + u32 uid; }; @@ -548,6 +549,11 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } + if (gwj->mod.uid) { + if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0) + goto cancel; + } + if (gwj->mod.csumfunc.crc8) { if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN, &gwj->mod.csum.crc8) < 0) @@ -619,6 +625,7 @@ static const struct nla_policy cgw_policy[CGW_MAX+1] = { [CGW_DST_IF] = { .type = NLA_U32 }, [CGW_FILTER] = { .len = sizeof(struct can_filter) }, [CGW_LIM_HOPS] = { .type = NLA_U8 }, + [CGW_MOD_UID] = { .type = NLA_U32 }, }; /* check for common and gwtype specific attributes */ @@ -761,6 +768,10 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, else mod->csumfunc.xor = cgw_csum_xor_neg; } + + if (tb[CGW_MOD_UID]) { + nla_memcpy(&mod->uid, tb[CGW_MOD_UID], sizeof(u32)); + } } if (gwtype == CGW_TYPE_CAN_CAN) { @@ -802,6 +813,8 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) { struct rtcanmsg *r; struct cgw_job *gwj; + struct cf_mod mod; + struct can_can_gw ccgw; u8 limhops = 0; int err = 0; @@ -819,6 +832,36 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) if (r->gwtype != CGW_TYPE_CAN_CAN) return -EINVAL; + err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); + if (err < 0) + return err; + + if (mod.uid) { + + ASSERT_RTNL(); + + /* check for updating an existing job with identical uid */ + hlist_for_each_entry(gwj, &cgw_list, list) { + + if (gwj->mod.uid != mod.uid) + continue; + + /* interfaces & filters must be identical */ + if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) + return -EINVAL; + + /* update modifications with disabled softirq & quit */ + local_bh_disable(); + memcpy(&gwj->mod, &mod, sizeof(mod)); + local_bh_enable(); + return 0; + } + } + + /* ifindex == 0 is not allowed for job creation */ + if (!ccgw.src_idx || !ccgw.dst_idx) + return -ENODEV; + gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL); if (!gwj) return -ENOMEM; @@ -828,18 +871,14 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) gwj->deleted_frames = 0; gwj->flags = r->flags; gwj->gwtype = r->gwtype; + gwj->limit_hops = limhops; - err = cgw_parse_attr(nlh, &gwj->mod, CGW_TYPE_CAN_CAN, &gwj->ccgw, - &limhops); - if (err < 0) - goto out; + /* insert already parsed information */ + memcpy(&gwj->mod, &mod, sizeof(mod)); + memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw)); err = -ENODEV; - /* ifindex == 0 is not allowed for job creation */ - if (!gwj->ccgw.src_idx || !gwj->ccgw.dst_idx) - goto out; - gwj->src.dev = __dev_get_by_index(&init_net, gwj->ccgw.src_idx); if (!gwj->src.dev) @@ -856,8 +895,6 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) if (gwj->dst.dev->type != ARPHRD_CAN) goto out; - gwj->limit_hops = limhops; - ASSERT_RTNL(); err = cgw_register_filter(gwj); @@ -931,8 +968,15 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh) if (gwj->limit_hops != limhops) continue; - if (memcmp(&gwj->mod, &mod, sizeof(mod))) - continue; + /* we have a match when uid is enabled and identical */ + if (gwj->mod.uid || mod.uid) { + if (gwj->mod.uid != mod.uid) + continue; + } else { + /* no uid => check for identical modifications */ + if (memcmp(&gwj->mod, &mod, sizeof(mod))) + continue; + } /* if (r->gwtype == CGW_TYPE_CAN_CAN) - is made sure here */ if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) -- cgit v1.2.3 From 9b4c33364eb653a824c58e637c73caa6feb9879c Mon Sep 17 00:00:00 2001 From: Arron Wang Date: Tue, 9 Jun 2015 17:47:22 +0800 Subject: Bluetooth: Make l2cap_recv_acldata() and sco_recv_scodata() return void The return value of l2cap_recv_acldata() and sco_recv_scodata() are not used, then change it to return void Signed-off-by: Arron Wang Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 4 ++-- net/bluetooth/l2cap_core.c | 5 ++--- net/bluetooth/sco.c | 5 ++--- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 24c0e4577a93..f175a51f9740 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -530,10 +530,10 @@ extern struct mutex hci_cb_list_lock; /* ----- HCI interface to upper protocols ----- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); int l2cap_disconn_ind(struct hci_conn *hcon); -int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags); +void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags); int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags); -int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb); +void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb); /* ----- Inquiry cache ----- */ #define INQUIRY_CACHE_AGE_MAX (HZ*30) /* 30 seconds */ diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index dad419782a12..07bd316d02ba 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7442,7 +7442,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) mutex_unlock(&conn->chan_lock); } -int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) +void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) { struct l2cap_conn *conn = hcon->l2cap_data; struct l2cap_hdr *hdr; @@ -7485,7 +7485,7 @@ int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) if (len == skb->len) { /* Complete frame received */ l2cap_recv_frame(conn, skb); - return 0; + return; } BT_DBG("Start: total len %d, frag len %d", len, skb->len); @@ -7544,7 +7544,6 @@ int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) drop: kfree_skb(skb); - return 0; } static struct hci_cb l2cap_cb = { diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 6b6e59dc54cf..688a040c5626 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1110,7 +1110,7 @@ static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) sco_conn_del(hcon, bt_to_errno(reason)); } -int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) +void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) { struct sco_conn *conn = hcon->sco_data; @@ -1121,12 +1121,11 @@ int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) if (skb->len) { sco_recv_frame(conn, skb); - return 0; + return; } drop: kfree_skb(skb); - return 0; } static struct hci_cb sco_cb = { -- cgit v1.2.3 From ff50e8afc537e66bb3daf5d1cd6628d6b76e7f06 Mon Sep 17 00:00:00 2001 From: Arron Wang Date: Tue, 9 Jun 2015 17:47:23 +0800 Subject: Bluetooth: Move SCO support under BT_BREDR config option SCO/eSCO link is supported by BR/EDR controller, it is suitable to move them under BT_BREDR config option Signed-off-by: Arron Wang Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 11 +++++++++++ include/net/bluetooth/hci_core.h | 12 ++++++++++++ net/bluetooth/Makefile | 3 ++- 3 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 7dba80546f16..38d8a34d3589 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -365,8 +365,19 @@ extern struct dentry *bt_debugfs; int l2cap_init(void); void l2cap_exit(void); +#if IS_ENABLED(CONFIG_BT_BREDR) int sco_init(void); void sco_exit(void); +#else +static inline int sco_init(void) +{ + return 0; +} + +static inline void sco_exit(void) +{ +} +#endif int mgmt_init(void); void mgmt_exit(void); diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f175a51f9740..3fbb793e634d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -532,8 +532,20 @@ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); int l2cap_disconn_ind(struct hci_conn *hcon); void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags); +#if IS_ENABLED(CONFIG_BT_BREDR) int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags); void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb); +#else +static inline int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, + __u8 *flags) +{ + return 0; +} + +static inline void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) +{ +} +#endif /* ----- Inquiry cache ----- */ #define INQUIRY_CACHE_AGE_MAX (HZ*30) /* 30 seconds */ diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 9a8ea232d28f..29c12ae72a66 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -12,9 +12,10 @@ obj-$(CONFIG_BT_6LOWPAN) += bluetooth_6lowpan.o bluetooth_6lowpan-y := 6lowpan.o bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ - hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \ + hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o lib.o \ a2mp.o amp.o ecc.o hci_request.o mgmt_util.o +bluetooth-$(CONFIG_BT_BREDR) += sco.o bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o -- cgit v1.2.3 From c2d3955ba322471181ba0e6636ea9bdd9f521239 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 2 Jun 2015 21:10:13 +0200 Subject: mac80211: remove obsolete sentence from documentation FIF_PROMISC_IN_BSS was removed in commit df1404650ccb ("mac80211: remove support for IFF_PROMISC"). Signed-off-by: Jakub Kicinski Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e09a32cb139f..faadb736ac4d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2584,8 +2584,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * * @FIF_OTHER_BSS: pass frames destined to other BSSes * - * @FIF_PSPOLL: pass PS Poll frames, if PROMISC_IN_BSS is not set then only - * those addressed to this station. + * @FIF_PSPOLL: pass PS Poll frames * * @FIF_PROBE_REQ: pass probe request frames */ -- cgit v1.2.3 From d446278c408bdebd4103090740ce908c5d6b5ab0 Mon Sep 17 00:00:00 2001 From: Christoffer Holmstedt Date: Wed, 10 Jun 2015 11:03:59 +0200 Subject: nl802154: fix misspelled enum Signed-off-by: Christoffer Holmstedt Signed-off-by: Marcel Holtmann --- include/net/nl802154.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/nl802154.h b/include/net/nl802154.h index 6fc231e60920..b0ab530d28cd 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -187,7 +187,7 @@ enum nl802154_wpan_phy_capability_attr { * @NL802154_CCA_ENERGY_CARRIER: Carrier sense with energy above threshold * @NL802154_CCA_ALOHA: CCA shall always report an idle medium * @NL802154_CCA_UWB_SHR: UWB preamble sense based on the SHR of a frame - * @NL802154_CCA_UWB_MULTIPEXED: UWB preamble sense based on the packet with + * @NL802154_CCA_UWB_MULTIPLEXED: UWB preamble sense based on the packet with * the multiplexed preamble * @__NL802154_CCA_ATTR_AFTER_LAST: Internal * @NL802154_CCA_ATTR_MAX: Maximum CCA attribute number @@ -199,7 +199,7 @@ enum nl802154_cca_modes { NL802154_CCA_ENERGY_CARRIER, NL802154_CCA_ALOHA, NL802154_CCA_UWB_SHR, - NL802154_CCA_UWB_MULTIPEXED, + NL802154_CCA_UWB_MULTIPLEXED, /* keep last */ __NL802154_CCA_ATTR_AFTER_LAST, -- cgit v1.2.3 From ed06aeefdac348cfb91a3db5fe1067e3202afd70 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 9 Jun 2015 22:26:05 +0200 Subject: nfc: st-nci: Rename st21nfcb to st-nci STMicroelectronics NFC NCI chips family is extending with the new ST21NFCC using the AMS AS39230 RF booster. The st21nfcb driver is relevant for this solution and might be with future products. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- .../devicetree/bindings/net/nfc/st-nci.txt | 33 + .../devicetree/bindings/net/nfc/st21nfcb.txt | 33 - drivers/nfc/Kconfig | 2 +- drivers/nfc/Makefile | 2 +- drivers/nfc/st-nci/Kconfig | 23 + drivers/nfc/st-nci/Makefile | 9 + drivers/nfc/st-nci/core.c | 179 ++++++ drivers/nfc/st-nci/i2c.c | 385 +++++++++++ drivers/nfc/st-nci/ndlc.c | 313 +++++++++ drivers/nfc/st-nci/ndlc.h | 60 ++ drivers/nfc/st-nci/st-nci.h | 50 ++ drivers/nfc/st-nci/st-nci_se.c | 714 +++++++++++++++++++++ drivers/nfc/st-nci/st-nci_se.h | 61 ++ drivers/nfc/st21nfcb/Kconfig | 22 - drivers/nfc/st21nfcb/Makefile | 9 - drivers/nfc/st21nfcb/i2c.c | 384 ----------- drivers/nfc/st21nfcb/ndlc.c | 313 --------- drivers/nfc/st21nfcb/ndlc.h | 60 -- drivers/nfc/st21nfcb/st21nfcb.c | 179 ------ drivers/nfc/st21nfcb/st21nfcb.h | 50 -- drivers/nfc/st21nfcb/st21nfcb_se.c | 713 -------------------- drivers/nfc/st21nfcb/st21nfcb_se.h | 61 -- include/linux/platform_data/st-nci.h | 29 + include/linux/platform_data/st21nfcb.h | 29 - include/linux/platform_data/st_nci.h | 29 + 25 files changed, 1887 insertions(+), 1855 deletions(-) create mode 100644 Documentation/devicetree/bindings/net/nfc/st-nci.txt delete mode 100644 Documentation/devicetree/bindings/net/nfc/st21nfcb.txt create mode 100644 drivers/nfc/st-nci/Kconfig create mode 100644 drivers/nfc/st-nci/Makefile create mode 100644 drivers/nfc/st-nci/core.c create mode 100644 drivers/nfc/st-nci/i2c.c create mode 100644 drivers/nfc/st-nci/ndlc.c create mode 100644 drivers/nfc/st-nci/ndlc.h create mode 100644 drivers/nfc/st-nci/st-nci.h create mode 100644 drivers/nfc/st-nci/st-nci_se.c create mode 100644 drivers/nfc/st-nci/st-nci_se.h delete mode 100644 drivers/nfc/st21nfcb/Kconfig delete mode 100644 drivers/nfc/st21nfcb/Makefile delete mode 100644 drivers/nfc/st21nfcb/i2c.c delete mode 100644 drivers/nfc/st21nfcb/ndlc.c delete mode 100644 drivers/nfc/st21nfcb/ndlc.h delete mode 100644 drivers/nfc/st21nfcb/st21nfcb.c delete mode 100644 drivers/nfc/st21nfcb/st21nfcb.h delete mode 100644 drivers/nfc/st21nfcb/st21nfcb_se.c delete mode 100644 drivers/nfc/st21nfcb/st21nfcb_se.h create mode 100644 include/linux/platform_data/st-nci.h delete mode 100644 include/linux/platform_data/st21nfcb.h create mode 100644 include/linux/platform_data/st_nci.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/net/nfc/st-nci.txt b/Documentation/devicetree/bindings/net/nfc/st-nci.txt new file mode 100644 index 000000000000..d707588ed734 --- /dev/null +++ b/Documentation/devicetree/bindings/net/nfc/st-nci.txt @@ -0,0 +1,33 @@ +* STMicroelectronics SAS. ST NCI NFC Controller + +Required properties: +- compatible: Should be "st,st21nfcb-i2c" or "st,st21nfcc-i2c". +- clock-frequency: I²C work frequency. +- reg: address on the bus +- interrupt-parent: phandle for the interrupt gpio controller +- interrupts: GPIO interrupt to which the chip is connected +- reset-gpios: Output GPIO pin used to reset the ST21NFCB + +Optional SoC Specific Properties: +- pinctrl-names: Contains only one value - "default". +- pintctrl-0: Specifies the pin control groups used for this controller. + +Example (for ARM-based BeagleBoard xM with ST21NFCB on I2C2): + +&i2c2 { + + status = "okay"; + + st21nfcb: st21nfcb@8 { + + compatible = "st,st21nfcb-i2c"; + + reg = <0x08>; + clock-frequency = <400000>; + + interrupt-parent = <&gpio5>; + interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; + + reset-gpios = <&gpio5 29 GPIO_ACTIVE_HIGH>; + }; +}; diff --git a/Documentation/devicetree/bindings/net/nfc/st21nfcb.txt b/Documentation/devicetree/bindings/net/nfc/st21nfcb.txt deleted file mode 100644 index bb237072dbe9..000000000000 --- a/Documentation/devicetree/bindings/net/nfc/st21nfcb.txt +++ /dev/null @@ -1,33 +0,0 @@ -* STMicroelectronics SAS. ST21NFCB NFC Controller - -Required properties: -- compatible: Should be "st,st21nfcb-i2c". -- clock-frequency: I²C work frequency. -- reg: address on the bus -- interrupt-parent: phandle for the interrupt gpio controller -- interrupts: GPIO interrupt to which the chip is connected -- reset-gpios: Output GPIO pin used to reset the ST21NFCB - -Optional SoC Specific Properties: -- pinctrl-names: Contains only one value - "default". -- pintctrl-0: Specifies the pin control groups used for this controller. - -Example (for ARM-based BeagleBoard xM with ST21NFCB on I2C2): - -&i2c2 { - - status = "okay"; - - st21nfcb: st21nfcb@8 { - - compatible = "st,st21nfcb-i2c"; - - reg = <0x08>; - clock-frequency = <400000>; - - interrupt-parent = <&gpio5>; - interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; - - reset-gpios = <&gpio5 29 GPIO_ACTIVE_HIGH>; - }; -}; diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig index 107714e4405f..722673cb785b 100644 --- a/drivers/nfc/Kconfig +++ b/drivers/nfc/Kconfig @@ -72,6 +72,6 @@ source "drivers/nfc/pn544/Kconfig" source "drivers/nfc/microread/Kconfig" source "drivers/nfc/nfcmrvl/Kconfig" source "drivers/nfc/st21nfca/Kconfig" -source "drivers/nfc/st21nfcb/Kconfig" +source "drivers/nfc/st-nci/Kconfig" source "drivers/nfc/nxp-nci/Kconfig" endmenu diff --git a/drivers/nfc/Makefile b/drivers/nfc/Makefile index 13b648baf175..368b6dfe71b3 100644 --- a/drivers/nfc/Makefile +++ b/drivers/nfc/Makefile @@ -12,5 +12,5 @@ obj-$(CONFIG_NFC_PORT100) += port100.o obj-$(CONFIG_NFC_MRVL) += nfcmrvl/ obj-$(CONFIG_NFC_TRF7970A) += trf7970a.o obj-$(CONFIG_NFC_ST21NFCA) += st21nfca/ -obj-$(CONFIG_NFC_ST21NFCB) += st21nfcb/ +obj-$(CONFIG_NFC_ST_NCI) += st-nci/ obj-$(CONFIG_NFC_NXP_NCI) += nxp-nci/ diff --git a/drivers/nfc/st-nci/Kconfig b/drivers/nfc/st-nci/Kconfig new file mode 100644 index 000000000000..fc3904c946ee --- /dev/null +++ b/drivers/nfc/st-nci/Kconfig @@ -0,0 +1,23 @@ +config NFC_ST_NCI + tristate "STMicroelectronics ST NCI NFC driver" + depends on NFC_NCI + default n + ---help--- + STMicroelectronics NFC NCI chips core driver. It implements the chipset + NCI logic and hooks into the NFC kernel APIs. Physical layers will + register against it. + + To compile this driver as a module, choose m here. The module will + be called st-nci. + Say N if unsure. + +config NFC_ST_NCI_I2C + tristate "NFC ST NCI i2c support" + depends on NFC_ST_NCI && I2C + ---help--- + This module adds support for an I2C interface to the + STMicroelectronics NFC NCI chips familly. + Select this if your platform is using the i2c bus. + + If you choose to build a module, it'll be called st-nci_i2c. + Say N if unsure. diff --git a/drivers/nfc/st-nci/Makefile b/drivers/nfc/st-nci/Makefile new file mode 100644 index 000000000000..0df157df3a94 --- /dev/null +++ b/drivers/nfc/st-nci/Makefile @@ -0,0 +1,9 @@ +# +# Makefile for ST21NFCB NCI based NFC driver +# + +st-nci-objs = ndlc.o core.o st-nci_se.o +obj-$(CONFIG_NFC_ST_NCI) += st-nci.o + +st-nci_i2c-objs = i2c.o +obj-$(CONFIG_NFC_ST_NCI_I2C) += st-nci_i2c.o diff --git a/drivers/nfc/st-nci/core.c b/drivers/nfc/st-nci/core.c new file mode 100644 index 000000000000..c419d3943973 --- /dev/null +++ b/drivers/nfc/st-nci/core.c @@ -0,0 +1,179 @@ +/* + * NCI based Driver for STMicroelectronics NFC Chip + * + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include "st-nci.h" +#include "st-nci_se.h" + +#define DRIVER_DESC "NCI NFC driver for ST_NCI" + +#define ST_NCI1_X_PROPRIETARY_ISO15693 0x83 + +static int st_nci_init(struct nci_dev *ndev) +{ + struct nci_mode_set_cmd cmd; + + cmd.cmd_type = ST_NCI_SET_NFC_MODE; + cmd.mode = 1; + + return nci_prop_cmd(ndev, ST_NCI_CORE_PROP, + sizeof(struct nci_mode_set_cmd), (__u8 *)&cmd); +} + +static int st_nci_open(struct nci_dev *ndev) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + int r; + + if (test_and_set_bit(ST_NCI_RUNNING, &info->flags)) + return 0; + + r = ndlc_open(info->ndlc); + if (r) + clear_bit(ST_NCI_RUNNING, &info->flags); + + return r; +} + +static int st_nci_close(struct nci_dev *ndev) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + + if (!test_bit(ST_NCI_RUNNING, &info->flags)) + return 0; + + ndlc_close(info->ndlc); + + clear_bit(ST_NCI_RUNNING, &info->flags); + + return 0; +} + +static int st_nci_send(struct nci_dev *ndev, struct sk_buff *skb) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + + skb->dev = (void *)ndev; + + if (!test_bit(ST_NCI_RUNNING, &info->flags)) + return -EBUSY; + + return ndlc_send(info->ndlc, skb); +} + +static __u32 st_nci_get_rfprotocol(struct nci_dev *ndev, + __u8 rf_protocol) +{ + return rf_protocol == ST_NCI1_X_PROPRIETARY_ISO15693 ? + NFC_PROTO_ISO15693_MASK : 0; +} + +static int st_nci_prop_rsp_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + __u8 status = skb->data[0]; + + nci_req_complete(ndev, status); + return 0; +} + +static struct nci_prop_ops st_nci_prop_ops[] = { + { + .opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, + ST_NCI_CORE_PROP), + .rsp = st_nci_prop_rsp_packet, + }, +}; + +static struct nci_ops st_nci_ops = { + .init = st_nci_init, + .open = st_nci_open, + .close = st_nci_close, + .send = st_nci_send, + .get_rfprotocol = st_nci_get_rfprotocol, + .discover_se = st_nci_discover_se, + .enable_se = st_nci_enable_se, + .disable_se = st_nci_disable_se, + .se_io = st_nci_se_io, + .hci_load_session = st_nci_hci_load_session, + .hci_event_received = st_nci_hci_event_received, + .hci_cmd_received = st_nci_hci_cmd_received, + .prop_ops = st_nci_prop_ops, + .n_prop_ops = ARRAY_SIZE(st_nci_prop_ops), +}; + +int st_nci_probe(struct llt_ndlc *ndlc, int phy_headroom, + int phy_tailroom) +{ + struct st_nci_info *info; + int r; + u32 protocols; + + info = devm_kzalloc(ndlc->dev, + sizeof(struct st_nci_info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + protocols = NFC_PROTO_JEWEL_MASK + | NFC_PROTO_MIFARE_MASK + | NFC_PROTO_FELICA_MASK + | NFC_PROTO_ISO14443_MASK + | NFC_PROTO_ISO14443_B_MASK + | NFC_PROTO_ISO15693_MASK + | NFC_PROTO_NFC_DEP_MASK; + + ndlc->ndev = nci_allocate_device(&st_nci_ops, protocols, + phy_headroom, phy_tailroom); + if (!ndlc->ndev) { + pr_err("Cannot allocate nfc ndev\n"); + return -ENOMEM; + } + info->ndlc = ndlc; + + nci_set_drvdata(ndlc->ndev, info); + + r = nci_register_device(ndlc->ndev); + if (r) { + pr_err("Cannot register nfc device to nci core\n"); + nci_free_device(ndlc->ndev); + return r; + } + + return st_nci_se_init(ndlc->ndev); +} +EXPORT_SYMBOL_GPL(st_nci_probe); + +void st_nci_remove(struct nci_dev *ndev) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + + ndlc_close(info->ndlc); + + nci_unregister_device(ndev); + nci_free_device(ndev); +} +EXPORT_SYMBOL_GPL(st_nci_remove); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c new file mode 100644 index 000000000000..06175ce769bb --- /dev/null +++ b/drivers/nfc/st-nci/i2c.c @@ -0,0 +1,385 @@ +/* + * I2C Link Layer for ST NCI NFC controller familly based Driver + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ndlc.h" + +#define DRIVER_DESC "NCI NFC driver for ST21NFCB" + +/* ndlc header */ +#define ST21NFCB_FRAME_HEADROOM 1 +#define ST21NFCB_FRAME_TAILROOM 0 + +#define ST_NCI_I2C_MIN_SIZE 4 /* PCB(1) + NCI Packet header(3) */ +#define ST_NCI_I2C_MAX_SIZE 250 /* req 4.2.1 */ + +#define ST_NCI_I2C_DRIVER_NAME "st_nci_i2c" + +static struct i2c_device_id st_nci_i2c_id_table[] = { + {ST_NCI_DRIVER_NAME, 0}, + {} +}; +MODULE_DEVICE_TABLE(i2c, st_nci_i2c_id_table); + +struct st_nci_i2c_phy { + struct i2c_client *i2c_dev; + struct llt_ndlc *ndlc; + + unsigned int gpio_reset; + unsigned int irq_polarity; +}; + +#define I2C_DUMP_SKB(info, skb) \ +do { \ + pr_debug("%s:\n", info); \ + print_hex_dump(KERN_DEBUG, "i2c: ", DUMP_PREFIX_OFFSET, \ + 16, 1, (skb)->data, (skb)->len, 0); \ +} while (0) + +static int st_nci_i2c_enable(void *phy_id) +{ + struct st_nci_i2c_phy *phy = phy_id; + + gpio_set_value(phy->gpio_reset, 0); + usleep_range(10000, 15000); + gpio_set_value(phy->gpio_reset, 1); + usleep_range(80000, 85000); + + if (phy->ndlc->powered == 0) + enable_irq(phy->i2c_dev->irq); + + return 0; +} + +static void st_nci_i2c_disable(void *phy_id) +{ + struct st_nci_i2c_phy *phy = phy_id; + + disable_irq_nosync(phy->i2c_dev->irq); +} + +/* + * Writing a frame must not return the number of written bytes. + * It must return either zero for success, or <0 for error. + * In addition, it must not alter the skb + */ +static int st_nci_i2c_write(void *phy_id, struct sk_buff *skb) +{ + int r = -1; + struct st_nci_i2c_phy *phy = phy_id; + struct i2c_client *client = phy->i2c_dev; + + I2C_DUMP_SKB("st_nci_i2c_write", skb); + + if (phy->ndlc->hard_fault != 0) + return phy->ndlc->hard_fault; + + r = i2c_master_send(client, skb->data, skb->len); + if (r < 0) { /* Retry, chip was in standby */ + usleep_range(1000, 4000); + r = i2c_master_send(client, skb->data, skb->len); + } + + if (r >= 0) { + if (r != skb->len) + r = -EREMOTEIO; + else + r = 0; + } + + return r; +} + +/* + * Reads an ndlc frame and returns it in a newly allocated sk_buff. + * returns: + * frame size : if received frame is complete (find ST21NFCB_SOF_EOF at + * end of read) + * -EAGAIN : if received frame is incomplete (not find ST21NFCB_SOF_EOF + * at end of read) + * -EREMOTEIO : i2c read error (fatal) + * -EBADMSG : frame was incorrect and discarded + * (value returned from st_nci_i2c_repack) + * -EIO : if no ST21NFCB_SOF_EOF is found after reaching + * the read length end sequence + */ +static int st_nci_i2c_read(struct st_nci_i2c_phy *phy, + struct sk_buff **skb) +{ + int r; + u8 len; + u8 buf[ST_NCI_I2C_MAX_SIZE]; + struct i2c_client *client = phy->i2c_dev; + + r = i2c_master_recv(client, buf, ST_NCI_I2C_MIN_SIZE); + if (r < 0) { /* Retry, chip was in standby */ + usleep_range(1000, 4000); + r = i2c_master_recv(client, buf, ST_NCI_I2C_MIN_SIZE); + } + + if (r != ST_NCI_I2C_MIN_SIZE) + return -EREMOTEIO; + + len = be16_to_cpu(*(__be16 *) (buf + 2)); + if (len > ST_NCI_I2C_MAX_SIZE) { + nfc_err(&client->dev, "invalid frame len\n"); + return -EBADMSG; + } + + *skb = alloc_skb(ST_NCI_I2C_MIN_SIZE + len, GFP_KERNEL); + if (*skb == NULL) + return -ENOMEM; + + skb_reserve(*skb, ST_NCI_I2C_MIN_SIZE); + skb_put(*skb, ST_NCI_I2C_MIN_SIZE); + memcpy((*skb)->data, buf, ST_NCI_I2C_MIN_SIZE); + + if (!len) + return 0; + + r = i2c_master_recv(client, buf, len); + if (r != len) { + kfree_skb(*skb); + return -EREMOTEIO; + } + + skb_put(*skb, len); + memcpy((*skb)->data + ST_NCI_I2C_MIN_SIZE, buf, len); + + I2C_DUMP_SKB("i2c frame read", *skb); + + return 0; +} + +/* + * Reads an ndlc frame from the chip. + * + * On ST21NFCB, IRQ goes in idle state when read starts. + */ +static irqreturn_t st_nci_irq_thread_fn(int irq, void *phy_id) +{ + struct st_nci_i2c_phy *phy = phy_id; + struct i2c_client *client; + struct sk_buff *skb = NULL; + int r; + + if (!phy || !phy->ndlc || irq != phy->i2c_dev->irq) { + WARN_ON_ONCE(1); + return IRQ_NONE; + } + + client = phy->i2c_dev; + dev_dbg(&client->dev, "IRQ\n"); + + if (phy->ndlc->hard_fault) + return IRQ_HANDLED; + + if (!phy->ndlc->powered) { + st_nci_i2c_disable(phy); + return IRQ_HANDLED; + } + + r = st_nci_i2c_read(phy, &skb); + if (r == -EREMOTEIO || r == -ENOMEM || r == -EBADMSG) + return IRQ_HANDLED; + + ndlc_recv(phy->ndlc, skb); + + return IRQ_HANDLED; +} + +static struct nfc_phy_ops i2c_phy_ops = { + .write = st_nci_i2c_write, + .enable = st_nci_i2c_enable, + .disable = st_nci_i2c_disable, +}; + +#ifdef CONFIG_OF +static int st_nci_i2c_of_request_resources(struct i2c_client *client) +{ + struct st_nci_i2c_phy *phy = i2c_get_clientdata(client); + struct device_node *pp; + int gpio; + int r; + + pp = client->dev.of_node; + if (!pp) + return -ENODEV; + + /* Get GPIO from device tree */ + gpio = of_get_named_gpio(pp, "reset-gpios", 0); + if (gpio < 0) { + nfc_err(&client->dev, + "Failed to retrieve reset-gpios from device tree\n"); + return gpio; + } + + /* GPIO request and configuration */ + r = devm_gpio_request_one(&client->dev, gpio, + GPIOF_OUT_INIT_HIGH, "clf_reset"); + if (r) { + nfc_err(&client->dev, "Failed to request reset pin\n"); + return r; + } + phy->gpio_reset = gpio; + + phy->irq_polarity = irq_get_trigger_type(client->irq); + + return 0; +} +#else +static int st_nci_i2c_of_request_resources(struct i2c_client *client) +{ + return -ENODEV; +} +#endif + +static int st_nci_i2c_request_resources(struct i2c_client *client) +{ + struct st_nci_nfc_platform_data *pdata; + struct st_nci_i2c_phy *phy = i2c_get_clientdata(client); + int r; + + pdata = client->dev.platform_data; + if (pdata == NULL) { + nfc_err(&client->dev, "No platform data\n"); + return -EINVAL; + } + + /* store for later use */ + phy->gpio_reset = pdata->gpio_reset; + phy->irq_polarity = pdata->irq_polarity; + + r = devm_gpio_request_one(&client->dev, + phy->gpio_reset, GPIOF_OUT_INIT_HIGH, "clf_reset"); + if (r) { + pr_err("%s : reset gpio_request failed\n", __FILE__); + return r; + } + + return 0; +} + +static int st_nci_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct st_nci_i2c_phy *phy; + struct st_nci_nfc_platform_data *pdata; + int r; + + dev_dbg(&client->dev, "%s\n", __func__); + dev_dbg(&client->dev, "IRQ: %d\n", client->irq); + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + nfc_err(&client->dev, "Need I2C_FUNC_I2C\n"); + return -ENODEV; + } + + phy = devm_kzalloc(&client->dev, sizeof(struct st_nci_i2c_phy), + GFP_KERNEL); + if (!phy) + return -ENOMEM; + + phy->i2c_dev = client; + + i2c_set_clientdata(client, phy); + + pdata = client->dev.platform_data; + if (!pdata && client->dev.of_node) { + r = st_nci_i2c_of_request_resources(client); + if (r) { + nfc_err(&client->dev, "No platform data\n"); + return r; + } + } else if (pdata) { + r = st_nci_i2c_request_resources(client); + if (r) { + nfc_err(&client->dev, + "Cannot get platform resources\n"); + return r; + } + } else { + nfc_err(&client->dev, + "st21nfcb platform resources not available\n"); + return -ENODEV; + } + + r = ndlc_probe(phy, &i2c_phy_ops, &client->dev, + ST21NFCB_FRAME_HEADROOM, ST21NFCB_FRAME_TAILROOM, + &phy->ndlc); + if (r < 0) { + nfc_err(&client->dev, "Unable to register ndlc layer\n"); + return r; + } + + r = devm_request_threaded_irq(&client->dev, client->irq, NULL, + st_nci_irq_thread_fn, + phy->irq_polarity | IRQF_ONESHOT, + ST_NCI_DRIVER_NAME, phy); + if (r < 0) + nfc_err(&client->dev, "Unable to register IRQ handler\n"); + + return r; +} + +static int st_nci_i2c_remove(struct i2c_client *client) +{ + struct st_nci_i2c_phy *phy = i2c_get_clientdata(client); + + dev_dbg(&client->dev, "%s\n", __func__); + + ndlc_remove(phy->ndlc); + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id of_st_nci_i2c_match[] = { + { .compatible = "st,st21nfcb-i2c", }, + { .compatible = "st,st21nfcb_i2c", }, + { .compatible = "st,st21nfcc-i2c", }, + {} +}; +MODULE_DEVICE_TABLE(of, of_st_nci_i2c_match); +#endif + +static struct i2c_driver st_nci_i2c_driver = { + .driver = { + .owner = THIS_MODULE, + .name = ST_NCI_I2C_DRIVER_NAME, + .of_match_table = of_match_ptr(of_st_nci_i2c_match), + }, + .probe = st_nci_i2c_probe, + .id_table = st_nci_i2c_id_table, + .remove = st_nci_i2c_remove, +}; + +module_i2c_driver(st_nci_i2c_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/nfc/st-nci/ndlc.c b/drivers/nfc/st-nci/ndlc.c new file mode 100644 index 000000000000..56c6a4cb4c96 --- /dev/null +++ b/drivers/nfc/st-nci/ndlc.c @@ -0,0 +1,313 @@ +/* + * Low Level Transport (NDLC) Driver for STMicroelectronics NFC Chip + * + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include + +#include "ndlc.h" +#include "st-nci.h" + +#define NDLC_TIMER_T1 100 +#define NDLC_TIMER_T1_WAIT 400 +#define NDLC_TIMER_T2 1200 + +#define PCB_TYPE_DATAFRAME 0x80 +#define PCB_TYPE_SUPERVISOR 0xc0 +#define PCB_TYPE_MASK PCB_TYPE_SUPERVISOR + +#define PCB_SYNC_ACK 0x20 +#define PCB_SYNC_NACK 0x10 +#define PCB_SYNC_WAIT 0x30 +#define PCB_SYNC_NOINFO 0x00 +#define PCB_SYNC_MASK PCB_SYNC_WAIT + +#define PCB_DATAFRAME_RETRANSMIT_YES 0x00 +#define PCB_DATAFRAME_RETRANSMIT_NO 0x04 +#define PCB_DATAFRAME_RETRANSMIT_MASK PCB_DATAFRAME_RETRANSMIT_NO + +#define PCB_SUPERVISOR_RETRANSMIT_YES 0x00 +#define PCB_SUPERVISOR_RETRANSMIT_NO 0x02 +#define PCB_SUPERVISOR_RETRANSMIT_MASK PCB_SUPERVISOR_RETRANSMIT_NO + +#define PCB_FRAME_CRC_INFO_PRESENT 0x08 +#define PCB_FRAME_CRC_INFO_NOTPRESENT 0x00 +#define PCB_FRAME_CRC_INFO_MASK PCB_FRAME_CRC_INFO_PRESENT + +#define NDLC_DUMP_SKB(info, skb) \ +do { \ + pr_debug("%s:\n", info); \ + print_hex_dump(KERN_DEBUG, "ndlc: ", DUMP_PREFIX_OFFSET, \ + 16, 1, skb->data, skb->len, 0); \ +} while (0) + +int ndlc_open(struct llt_ndlc *ndlc) +{ + /* toggle reset pin */ + ndlc->ops->enable(ndlc->phy_id); + ndlc->powered = 1; + return 0; +} +EXPORT_SYMBOL(ndlc_open); + +void ndlc_close(struct llt_ndlc *ndlc) +{ + struct nci_mode_set_cmd cmd; + + cmd.cmd_type = ST_NCI_SET_NFC_MODE; + cmd.mode = 0; + + /* toggle reset pin */ + ndlc->ops->enable(ndlc->phy_id); + + nci_prop_cmd(ndlc->ndev, ST_NCI_CORE_PROP, + sizeof(struct nci_mode_set_cmd), (__u8 *)&cmd); + + ndlc->powered = 0; + ndlc->ops->disable(ndlc->phy_id); +} +EXPORT_SYMBOL(ndlc_close); + +int ndlc_send(struct llt_ndlc *ndlc, struct sk_buff *skb) +{ + /* add ndlc header */ + u8 pcb = PCB_TYPE_DATAFRAME | PCB_DATAFRAME_RETRANSMIT_NO | + PCB_FRAME_CRC_INFO_NOTPRESENT; + + *skb_push(skb, 1) = pcb; + skb_queue_tail(&ndlc->send_q, skb); + + schedule_work(&ndlc->sm_work); + + return 0; +} +EXPORT_SYMBOL(ndlc_send); + +static void llt_ndlc_send_queue(struct llt_ndlc *ndlc) +{ + struct sk_buff *skb; + int r; + unsigned long time_sent; + + if (ndlc->send_q.qlen) + pr_debug("sendQlen=%d unackQlen=%d\n", + ndlc->send_q.qlen, ndlc->ack_pending_q.qlen); + + while (ndlc->send_q.qlen) { + skb = skb_dequeue(&ndlc->send_q); + NDLC_DUMP_SKB("ndlc frame written", skb); + r = ndlc->ops->write(ndlc->phy_id, skb); + if (r < 0) { + ndlc->hard_fault = r; + break; + } + time_sent = jiffies; + *(unsigned long *)skb->cb = time_sent; + + skb_queue_tail(&ndlc->ack_pending_q, skb); + + /* start timer t1 for ndlc aknowledge */ + ndlc->t1_active = true; + mod_timer(&ndlc->t1_timer, time_sent + + msecs_to_jiffies(NDLC_TIMER_T1)); + /* start timer t2 for chip availability */ + ndlc->t2_active = true; + mod_timer(&ndlc->t2_timer, time_sent + + msecs_to_jiffies(NDLC_TIMER_T2)); + } +} + +static void llt_ndlc_requeue_data_pending(struct llt_ndlc *ndlc) +{ + struct sk_buff *skb; + u8 pcb; + + while ((skb = skb_dequeue_tail(&ndlc->ack_pending_q))) { + pcb = skb->data[0]; + switch (pcb & PCB_TYPE_MASK) { + case PCB_TYPE_SUPERVISOR: + skb->data[0] = (pcb & ~PCB_SUPERVISOR_RETRANSMIT_MASK) | + PCB_SUPERVISOR_RETRANSMIT_YES; + break; + case PCB_TYPE_DATAFRAME: + skb->data[0] = (pcb & ~PCB_DATAFRAME_RETRANSMIT_MASK) | + PCB_DATAFRAME_RETRANSMIT_YES; + break; + default: + pr_err("UNKNOWN Packet Control Byte=%d\n", pcb); + kfree_skb(skb); + continue; + } + skb_queue_head(&ndlc->send_q, skb); + } +} + +static void llt_ndlc_rcv_queue(struct llt_ndlc *ndlc) +{ + struct sk_buff *skb; + u8 pcb; + unsigned long time_sent; + + if (ndlc->rcv_q.qlen) + pr_debug("rcvQlen=%d\n", ndlc->rcv_q.qlen); + + while ((skb = skb_dequeue(&ndlc->rcv_q)) != NULL) { + pcb = skb->data[0]; + skb_pull(skb, 1); + if ((pcb & PCB_TYPE_MASK) == PCB_TYPE_SUPERVISOR) { + switch (pcb & PCB_SYNC_MASK) { + case PCB_SYNC_ACK: + del_timer_sync(&ndlc->t1_timer); + del_timer_sync(&ndlc->t2_timer); + ndlc->t2_active = false; + ndlc->t1_active = false; + break; + case PCB_SYNC_NACK: + llt_ndlc_requeue_data_pending(ndlc); + llt_ndlc_send_queue(ndlc); + /* start timer t1 for ndlc aknowledge */ + time_sent = jiffies; + ndlc->t1_active = true; + mod_timer(&ndlc->t1_timer, time_sent + + msecs_to_jiffies(NDLC_TIMER_T1)); + break; + case PCB_SYNC_WAIT: + time_sent = jiffies; + ndlc->t1_active = true; + mod_timer(&ndlc->t1_timer, time_sent + + msecs_to_jiffies(NDLC_TIMER_T1_WAIT)); + break; + default: + pr_err("UNKNOWN Packet Control Byte=%d\n", pcb); + kfree_skb(skb); + break; + } + } else { + nci_recv_frame(ndlc->ndev, skb); + } + } +} + +static void llt_ndlc_sm_work(struct work_struct *work) +{ + struct llt_ndlc *ndlc = container_of(work, struct llt_ndlc, sm_work); + + llt_ndlc_send_queue(ndlc); + llt_ndlc_rcv_queue(ndlc); + + if (ndlc->t1_active && timer_pending(&ndlc->t1_timer) == 0) { + pr_debug + ("Handle T1(recv SUPERVISOR) elapsed (T1 now inactive)\n"); + ndlc->t1_active = false; + + llt_ndlc_requeue_data_pending(ndlc); + llt_ndlc_send_queue(ndlc); + } + + if (ndlc->t2_active && timer_pending(&ndlc->t2_timer) == 0) { + pr_debug("Handle T2(recv DATA) elapsed (T2 now inactive)\n"); + ndlc->t2_active = false; + ndlc->t1_active = false; + del_timer_sync(&ndlc->t1_timer); + del_timer_sync(&ndlc->t2_timer); + ndlc_close(ndlc); + ndlc->hard_fault = -EREMOTEIO; + } +} + +void ndlc_recv(struct llt_ndlc *ndlc, struct sk_buff *skb) +{ + if (skb == NULL) { + pr_err("NULL Frame -> link is dead\n"); + ndlc->hard_fault = -EREMOTEIO; + ndlc_close(ndlc); + } else { + NDLC_DUMP_SKB("incoming frame", skb); + skb_queue_tail(&ndlc->rcv_q, skb); + } + + schedule_work(&ndlc->sm_work); +} +EXPORT_SYMBOL(ndlc_recv); + +static void ndlc_t1_timeout(unsigned long data) +{ + struct llt_ndlc *ndlc = (struct llt_ndlc *)data; + + pr_debug("\n"); + + schedule_work(&ndlc->sm_work); +} + +static void ndlc_t2_timeout(unsigned long data) +{ + struct llt_ndlc *ndlc = (struct llt_ndlc *)data; + + pr_debug("\n"); + + schedule_work(&ndlc->sm_work); +} + +int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev, + int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id) +{ + struct llt_ndlc *ndlc; + + ndlc = devm_kzalloc(dev, sizeof(struct llt_ndlc), GFP_KERNEL); + if (!ndlc) + return -ENOMEM; + + ndlc->ops = phy_ops; + ndlc->phy_id = phy_id; + ndlc->dev = dev; + ndlc->powered = 0; + + *ndlc_id = ndlc; + + /* initialize timers */ + init_timer(&ndlc->t1_timer); + ndlc->t1_timer.data = (unsigned long)ndlc; + ndlc->t1_timer.function = ndlc_t1_timeout; + + init_timer(&ndlc->t2_timer); + ndlc->t2_timer.data = (unsigned long)ndlc; + ndlc->t2_timer.function = ndlc_t2_timeout; + + skb_queue_head_init(&ndlc->rcv_q); + skb_queue_head_init(&ndlc->send_q); + skb_queue_head_init(&ndlc->ack_pending_q); + + INIT_WORK(&ndlc->sm_work, llt_ndlc_sm_work); + + return st_nci_probe(ndlc, phy_headroom, phy_tailroom); +} +EXPORT_SYMBOL(ndlc_probe); + +void ndlc_remove(struct llt_ndlc *ndlc) +{ + st_nci_remove(ndlc->ndev); + + /* cancel timers */ + del_timer_sync(&ndlc->t1_timer); + del_timer_sync(&ndlc->t2_timer); + ndlc->t2_active = false; + ndlc->t1_active = false; + + skb_queue_purge(&ndlc->rcv_q); + skb_queue_purge(&ndlc->send_q); +} +EXPORT_SYMBOL(ndlc_remove); diff --git a/drivers/nfc/st-nci/ndlc.h b/drivers/nfc/st-nci/ndlc.h new file mode 100644 index 000000000000..6361005ef003 --- /dev/null +++ b/drivers/nfc/st-nci/ndlc.h @@ -0,0 +1,60 @@ +/* + * NCI based Driver for STMicroelectronics NFC Chip + * + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef __LOCAL_NDLC_H_ +#define __LOCAL_NDLC_H_ + +#include +#include + +/* Low Level Transport description */ +struct llt_ndlc { + struct nci_dev *ndev; + struct nfc_phy_ops *ops; + void *phy_id; + + struct timer_list t1_timer; + bool t1_active; + + struct timer_list t2_timer; + bool t2_active; + + struct sk_buff_head rcv_q; + struct sk_buff_head send_q; + struct sk_buff_head ack_pending_q; + + struct work_struct sm_work; + + struct device *dev; + + /* + * < 0 if hardware error occurred + * and prevents normal operation. + */ + int hard_fault; + int powered; +}; + +int ndlc_open(struct llt_ndlc *ndlc); +void ndlc_close(struct llt_ndlc *ndlc); +int ndlc_send(struct llt_ndlc *ndlc, struct sk_buff *skb); +void ndlc_recv(struct llt_ndlc *ndlc, struct sk_buff *skb); +int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev, + int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id); +void ndlc_remove(struct llt_ndlc *ndlc); +#endif /* __LOCAL_NDLC_H__ */ diff --git a/drivers/nfc/st-nci/st-nci.h b/drivers/nfc/st-nci/st-nci.h new file mode 100644 index 000000000000..850a2395deb7 --- /dev/null +++ b/drivers/nfc/st-nci/st-nci.h @@ -0,0 +1,50 @@ +/* + * NCI based Driver for STMicroelectronics NFC Chip + * + * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef __LOCAL_ST_NCI_H_ +#define __LOCAL_ST_NCI_H_ + +#include "st-nci_se.h" +#include "ndlc.h" + +/* Define private flags: */ +#define ST_NCI_RUNNING 1 + +#define ST_NCI_CORE_PROP 0x01 +#define ST_NCI_SET_NFC_MODE 0x02 + +struct nci_mode_set_cmd { + u8 cmd_type; + u8 mode; +} __packed; + +struct nci_mode_set_rsp { + u8 status; +} __packed; + +struct st_nci_info { + struct llt_ndlc *ndlc; + unsigned long flags; + struct st_nci_se_info se_info; +}; + +void st_nci_remove(struct nci_dev *ndev); +int st_nci_probe(struct llt_ndlc *ndlc, int phy_headroom, + int phy_tailroom); + +#endif /* __LOCAL_ST_NCI_H_ */ diff --git a/drivers/nfc/st-nci/st-nci_se.c b/drivers/nfc/st-nci/st-nci_se.c new file mode 100644 index 000000000000..97addfa96c6f --- /dev/null +++ b/drivers/nfc/st-nci/st-nci_se.c @@ -0,0 +1,714 @@ +/* + * Secure Element driver for STMicroelectronics NFC NCI chip + * + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "st-nci.h" +#include "st-nci_se.h" + +struct st_nci_pipe_info { + u8 pipe_state; + u8 src_host_id; + u8 src_gate_id; + u8 dst_host_id; + u8 dst_gate_id; +} __packed; + +/* Hosts */ +#define ST_NCI_HOST_CONTROLLER_ID 0x00 +#define ST_NCI_TERMINAL_HOST_ID 0x01 +#define ST_NCI_UICC_HOST_ID 0x02 +#define ST_NCI_ESE_HOST_ID 0xc0 + +/* Gates */ +#define ST_NCI_DEVICE_MGNT_GATE 0x01 +#define ST_NCI_APDU_READER_GATE 0xf0 +#define ST_NCI_CONNECTIVITY_GATE 0x41 + +/* Pipes */ +#define ST_NCI_DEVICE_MGNT_PIPE 0x02 + +/* Connectivity pipe only */ +#define ST_NCI_SE_COUNT_PIPE_UICC 0x01 +/* Connectivity + APDU Reader pipe */ +#define ST_NCI_SE_COUNT_PIPE_EMBEDDED 0x02 + +#define ST_NCI_SE_TO_HOT_PLUG 1000 /* msecs */ +#define ST_NCI_SE_TO_PIPES 2000 + +#define ST_NCI_EVT_HOT_PLUG_IS_INHIBITED(x) (x->data[0] & 0x80) + +#define NCI_HCI_APDU_PARAM_ATR 0x01 +#define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY 0x01 +#define NCI_HCI_ADMIN_PARAM_WHITELIST 0x03 +#define NCI_HCI_ADMIN_PARAM_HOST_LIST 0x04 + +#define ST_NCI_EVT_SE_HARD_RESET 0x20 +#define ST_NCI_EVT_TRANSMIT_DATA 0x10 +#define ST_NCI_EVT_WTX_REQUEST 0x11 +#define ST_NCI_EVT_SE_SOFT_RESET 0x11 +#define ST_NCI_EVT_SE_END_OF_APDU_TRANSFER 0x21 +#define ST_NCI_EVT_HOT_PLUG 0x03 + +#define ST_NCI_SE_MODE_OFF 0x00 +#define ST_NCI_SE_MODE_ON 0x01 + +#define ST_NCI_EVT_CONNECTIVITY 0x10 +#define ST_NCI_EVT_TRANSACTION 0x12 + +#define ST_NCI_DM_GETINFO 0x13 +#define ST_NCI_DM_GETINFO_PIPE_LIST 0x02 +#define ST_NCI_DM_GETINFO_PIPE_INFO 0x01 +#define ST_NCI_DM_PIPE_CREATED 0x02 +#define ST_NCI_DM_PIPE_OPEN 0x04 +#define ST_NCI_DM_RF_ACTIVE 0x80 +#define ST_NCI_DM_DISCONNECT 0x30 + +#define ST_NCI_DM_IS_PIPE_OPEN(p) \ + ((p & 0x0f) == (ST_NCI_DM_PIPE_CREATED | ST_NCI_DM_PIPE_OPEN)) + +#define ST_NCI_ATR_DEFAULT_BWI 0x04 + +/* + * WT = 2^BWI/10[s], convert into msecs and add a secure + * room by increasing by 2 this timeout + */ +#define ST_NCI_BWI_TO_TIMEOUT(x) ((1 << x) * 200) +#define ST_NCI_ATR_GET_Y_FROM_TD(x) (x >> 4) + +/* If TA is present bit 0 is set */ +#define ST_NCI_ATR_TA_PRESENT(x) (x & 0x01) +/* If TB is present bit 1 is set */ +#define ST_NCI_ATR_TB_PRESENT(x) (x & 0x02) + +#define ST_NCI_NUM_DEVICES 256 + +static DECLARE_BITMAP(dev_mask, ST_NCI_NUM_DEVICES); + +/* Here are the mandatory pipe for st_nci */ +static struct nci_hci_gate st_nci_gates[] = { + {NCI_HCI_ADMIN_GATE, NCI_HCI_ADMIN_PIPE, + ST_NCI_HOST_CONTROLLER_ID}, + {NCI_HCI_LINK_MGMT_GATE, NCI_HCI_LINK_MGMT_PIPE, + ST_NCI_HOST_CONTROLLER_ID}, + {ST_NCI_DEVICE_MGNT_GATE, ST_NCI_DEVICE_MGNT_PIPE, + ST_NCI_HOST_CONTROLLER_ID}, + + /* Secure element pipes are created by secure element host */ + {ST_NCI_CONNECTIVITY_GATE, NCI_HCI_DO_NOT_OPEN_PIPE, + ST_NCI_HOST_CONTROLLER_ID}, + {ST_NCI_APDU_READER_GATE, NCI_HCI_DO_NOT_OPEN_PIPE, + ST_NCI_HOST_CONTROLLER_ID}, +}; + +static u8 st_nci_se_get_bwi(struct nci_dev *ndev) +{ + int i; + u8 td; + struct st_nci_info *info = nci_get_drvdata(ndev); + + /* Bits 8 to 5 of the first TB for T=1 encode BWI from zero to nine */ + for (i = 1; i < ST_NCI_ESE_MAX_LENGTH; i++) { + td = ST_NCI_ATR_GET_Y_FROM_TD(info->se_info.atr[i]); + if (ST_NCI_ATR_TA_PRESENT(td)) + i++; + if (ST_NCI_ATR_TB_PRESENT(td)) { + i++; + return info->se_info.atr[i] >> 4; + } + } + return ST_NCI_ATR_DEFAULT_BWI; +} + +static void st_nci_se_get_atr(struct nci_dev *ndev) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + int r; + struct sk_buff *skb; + + r = nci_hci_get_param(ndev, ST_NCI_APDU_READER_GATE, + NCI_HCI_APDU_PARAM_ATR, &skb); + if (r < 0) + return; + + if (skb->len <= ST_NCI_ESE_MAX_LENGTH) { + memcpy(info->se_info.atr, skb->data, skb->len); + + info->se_info.wt_timeout = + ST_NCI_BWI_TO_TIMEOUT(st_nci_se_get_bwi(ndev)); + } + kfree_skb(skb); +} + +int st_nci_hci_load_session(struct nci_dev *ndev) +{ + int i, j, r; + struct sk_buff *skb_pipe_list, *skb_pipe_info; + struct st_nci_pipe_info *dm_pipe_info; + u8 pipe_list[] = { ST_NCI_DM_GETINFO_PIPE_LIST, + ST_NCI_TERMINAL_HOST_ID}; + u8 pipe_info[] = { ST_NCI_DM_GETINFO_PIPE_INFO, + ST_NCI_TERMINAL_HOST_ID, 0}; + + /* On ST_NCI device pipes number are dynamics + * If pipes are already created, hci_dev_up will fail. + * Doing a clear all pipe is a bad idea because: + * - It does useless EEPROM cycling + * - It might cause issue for secure elements support + * (such as removing connectivity or APDU reader pipe) + * A better approach on ST_NCI is to: + * - get a pipe list for each host. + * (eg: ST_NCI_HOST_CONTROLLER_ID for now). + * (TODO Later on UICC HOST and eSE HOST) + * - get pipe information + * - match retrieved pipe list in st_nci_gates + * ST_NCI_DEVICE_MGNT_GATE is a proprietary gate + * with ST_NCI_DEVICE_MGNT_PIPE. + * Pipe can be closed and need to be open. + */ + r = nci_hci_connect_gate(ndev, ST_NCI_HOST_CONTROLLER_ID, + ST_NCI_DEVICE_MGNT_GATE, + ST_NCI_DEVICE_MGNT_PIPE); + if (r < 0) + goto free_info; + + /* Get pipe list */ + r = nci_hci_send_cmd(ndev, ST_NCI_DEVICE_MGNT_GATE, + ST_NCI_DM_GETINFO, pipe_list, sizeof(pipe_list), + &skb_pipe_list); + if (r < 0) + goto free_info; + + /* Complete the existing gate_pipe table */ + for (i = 0; i < skb_pipe_list->len; i++) { + pipe_info[2] = skb_pipe_list->data[i]; + r = nci_hci_send_cmd(ndev, ST_NCI_DEVICE_MGNT_GATE, + ST_NCI_DM_GETINFO, pipe_info, + sizeof(pipe_info), &skb_pipe_info); + + if (r) + continue; + + /* + * Match pipe ID and gate ID + * Output format from ST21NFC_DM_GETINFO is: + * - pipe state (1byte) + * - source hid (1byte) + * - source gid (1byte) + * - destination hid (1byte) + * - destination gid (1byte) + */ + dm_pipe_info = (struct st_nci_pipe_info *)skb_pipe_info->data; + if (dm_pipe_info->dst_gate_id == ST_NCI_APDU_READER_GATE && + dm_pipe_info->src_host_id != ST_NCI_ESE_HOST_ID) { + pr_err("Unexpected apdu_reader pipe on host %x\n", + dm_pipe_info->src_host_id); + continue; + } + + for (j = 0; (j < ARRAY_SIZE(st_nci_gates)) && + (st_nci_gates[j].gate != dm_pipe_info->dst_gate_id); j++) + ; + + if (j < ARRAY_SIZE(st_nci_gates) && + st_nci_gates[j].gate == dm_pipe_info->dst_gate_id && + ST_NCI_DM_IS_PIPE_OPEN(dm_pipe_info->pipe_state)) { + st_nci_gates[j].pipe = pipe_info[2]; + + ndev->hci_dev->gate2pipe[st_nci_gates[j].gate] = + st_nci_gates[j].pipe; + ndev->hci_dev->pipes[st_nci_gates[j].pipe].gate = + st_nci_gates[j].gate; + ndev->hci_dev->pipes[st_nci_gates[j].pipe].host = + dm_pipe_info->src_host_id; + } + } + + memcpy(ndev->hci_dev->init_data.gates, st_nci_gates, + sizeof(st_nci_gates)); + +free_info: + kfree_skb(skb_pipe_info); + kfree_skb(skb_pipe_list); + return r; +} +EXPORT_SYMBOL_GPL(st_nci_hci_load_session); + +static void st_nci_hci_admin_event_received(struct nci_dev *ndev, + u8 event, struct sk_buff *skb) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + + switch (event) { + case ST_NCI_EVT_HOT_PLUG: + if (info->se_info.se_active) { + if (!ST_NCI_EVT_HOT_PLUG_IS_INHIBITED(skb)) { + del_timer_sync(&info->se_info.se_active_timer); + info->se_info.se_active = false; + complete(&info->se_info.req_completion); + } else { + mod_timer(&info->se_info.se_active_timer, + jiffies + + msecs_to_jiffies(ST_NCI_SE_TO_PIPES)); + } + } + break; + } +} + +static int st_nci_hci_apdu_reader_event_received(struct nci_dev *ndev, + u8 event, + struct sk_buff *skb) +{ + int r = 0; + struct st_nci_info *info = nci_get_drvdata(ndev); + + pr_debug("apdu reader gate event: %x\n", event); + + switch (event) { + case ST_NCI_EVT_TRANSMIT_DATA: + del_timer_sync(&info->se_info.bwi_timer); + info->se_info.bwi_active = false; + info->se_info.cb(info->se_info.cb_context, + skb->data, skb->len, 0); + break; + case ST_NCI_EVT_WTX_REQUEST: + mod_timer(&info->se_info.bwi_timer, jiffies + + msecs_to_jiffies(info->se_info.wt_timeout)); + break; + } + + kfree_skb(skb); + return r; +} + +/* + * Returns: + * <= 0: driver handled the event, skb consumed + * 1: driver does not handle the event, please do standard processing + */ +static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, + u8 host, u8 event, + struct sk_buff *skb) +{ + int r = 0; + struct device *dev = &ndev->nfc_dev->dev; + struct nfc_evt_transaction *transaction; + + pr_debug("connectivity gate event: %x\n", event); + + switch (event) { + case ST_NCI_EVT_CONNECTIVITY: + + break; + case ST_NCI_EVT_TRANSACTION: + /* According to specification etsi 102 622 + * 11.2.2.4 EVT_TRANSACTION Table 52 + * Description Tag Length + * AID 81 5 to 16 + * PARAMETERS 82 0 to 255 + */ + if (skb->len < NFC_MIN_AID_LENGTH + 2 && + skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) + return -EPROTO; + + transaction = (struct nfc_evt_transaction *)devm_kzalloc(dev, + skb->len - 2, GFP_KERNEL); + + transaction->aid_len = skb->data[1]; + memcpy(transaction->aid, &skb->data[2], transaction->aid_len); + + /* Check next byte is PARAMETERS tag (82) */ + if (skb->data[transaction->aid_len + 2] != + NFC_EVT_TRANSACTION_PARAMS_TAG) + return -EPROTO; + + transaction->params_len = skb->data[transaction->aid_len + 3]; + memcpy(transaction->params, skb->data + + transaction->aid_len + 4, transaction->params_len); + + r = nfc_se_transaction(ndev->nfc_dev, host, transaction); + break; + default: + return 1; + } + kfree_skb(skb); + return r; +} + +void st_nci_hci_event_received(struct nci_dev *ndev, u8 pipe, + u8 event, struct sk_buff *skb) +{ + u8 gate = ndev->hci_dev->pipes[pipe].gate; + u8 host = ndev->hci_dev->pipes[pipe].host; + + switch (gate) { + case NCI_HCI_ADMIN_GATE: + st_nci_hci_admin_event_received(ndev, event, skb); + break; + case ST_NCI_APDU_READER_GATE: + st_nci_hci_apdu_reader_event_received(ndev, event, skb); + break; + case ST_NCI_CONNECTIVITY_GATE: + st_nci_hci_connectivity_event_received(ndev, host, event, + skb); + break; + } +} +EXPORT_SYMBOL_GPL(st_nci_hci_event_received); + + +void st_nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, u8 cmd, + struct sk_buff *skb) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + u8 gate = ndev->hci_dev->pipes[pipe].gate; + + pr_debug("cmd: %x\n", cmd); + + switch (cmd) { + case NCI_HCI_ANY_OPEN_PIPE: + if (gate != ST_NCI_APDU_READER_GATE && + ndev->hci_dev->pipes[pipe].host != ST_NCI_UICC_HOST_ID) + ndev->hci_dev->count_pipes++; + + if (ndev->hci_dev->count_pipes == + ndev->hci_dev->expected_pipes) { + del_timer_sync(&info->se_info.se_active_timer); + info->se_info.se_active = false; + ndev->hci_dev->count_pipes = 0; + complete(&info->se_info.req_completion); + } + break; + } +} +EXPORT_SYMBOL_GPL(st_nci_hci_cmd_received); + +/* + * Remarks: On some early st_nci firmware, nci_nfcee_mode_set(0) + * is rejected + */ +static int st_nci_control_se(struct nci_dev *ndev, u8 se_idx, + u8 state) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + int r; + struct sk_buff *sk_host_list; + u8 host_id; + + switch (se_idx) { + case ST_NCI_UICC_HOST_ID: + ndev->hci_dev->count_pipes = 0; + ndev->hci_dev->expected_pipes = ST_NCI_SE_COUNT_PIPE_UICC; + break; + case ST_NCI_ESE_HOST_ID: + ndev->hci_dev->count_pipes = 0; + ndev->hci_dev->expected_pipes = ST_NCI_SE_COUNT_PIPE_EMBEDDED; + break; + default: + return -EINVAL; + } + + /* + * Wait for an EVT_HOT_PLUG in order to + * retrieve a relevant host list. + */ + reinit_completion(&info->se_info.req_completion); + r = nci_nfcee_mode_set(ndev, se_idx, NCI_NFCEE_ENABLE); + if (r != NCI_STATUS_OK) + return r; + + mod_timer(&info->se_info.se_active_timer, jiffies + + msecs_to_jiffies(ST_NCI_SE_TO_HOT_PLUG)); + info->se_info.se_active = true; + + /* Ignore return value and check in any case the host_list */ + wait_for_completion_interruptible(&info->se_info.req_completion); + + /* There might be some "collision" after receiving a HOT_PLUG event + * This may cause the CLF to not answer to the next hci command. + * There is no possible synchronization to prevent this. + * Adding a small delay is the only way to solve the issue. + */ + usleep_range(3000, 5000); + + r = nci_hci_get_param(ndev, NCI_HCI_ADMIN_GATE, + NCI_HCI_ADMIN_PARAM_HOST_LIST, &sk_host_list); + if (r != NCI_HCI_ANY_OK) + return r; + + host_id = sk_host_list->data[sk_host_list->len - 1]; + kfree_skb(sk_host_list); + if (state == ST_NCI_SE_MODE_ON && host_id == se_idx) + return se_idx; + else if (state == ST_NCI_SE_MODE_OFF && host_id != se_idx) + return se_idx; + + return -1; +} + +int st_nci_disable_se(struct nci_dev *ndev, u32 se_idx) +{ + int r; + + pr_debug("st_nci_disable_se\n"); + + if (se_idx == NFC_SE_EMBEDDED) { + r = nci_hci_send_event(ndev, ST_NCI_APDU_READER_GATE, + ST_NCI_EVT_SE_END_OF_APDU_TRANSFER, NULL, 0); + if (r < 0) + return r; + } + + return 0; +} +EXPORT_SYMBOL_GPL(st_nci_disable_se); + +int st_nci_enable_se(struct nci_dev *ndev, u32 se_idx) +{ + int r; + + pr_debug("st_nci_enable_se\n"); + + if (se_idx == ST_NCI_HCI_HOST_ID_ESE) { + r = nci_hci_send_event(ndev, ST_NCI_APDU_READER_GATE, + ST_NCI_EVT_SE_SOFT_RESET, NULL, 0); + if (r < 0) + return r; + } + + return 0; +} +EXPORT_SYMBOL_GPL(st_nci_enable_se); + +static int st_nci_hci_network_init(struct nci_dev *ndev) +{ + struct core_conn_create_dest_spec_params *dest_params; + struct dest_spec_params spec_params; + struct nci_conn_info *conn_info; + int r, dev_num; + + dest_params = + kzalloc(sizeof(struct core_conn_create_dest_spec_params) + + sizeof(struct dest_spec_params), GFP_KERNEL); + if (dest_params == NULL) { + r = -ENOMEM; + goto exit; + } + + dest_params->type = NCI_DESTINATION_SPECIFIC_PARAM_NFCEE_TYPE; + dest_params->length = sizeof(struct dest_spec_params); + spec_params.id = ndev->hci_dev->nfcee_id; + spec_params.protocol = NCI_NFCEE_INTERFACE_HCI_ACCESS; + memcpy(dest_params->value, &spec_params, + sizeof(struct dest_spec_params)); + r = nci_core_conn_create(ndev, NCI_DESTINATION_NFCEE, 1, + sizeof(struct core_conn_create_dest_spec_params) + + sizeof(struct dest_spec_params), + dest_params); + if (r != NCI_STATUS_OK) + goto free_dest_params; + + conn_info = ndev->hci_dev->conn_info; + if (!conn_info) + goto free_dest_params; + + memcpy(ndev->hci_dev->init_data.gates, st_nci_gates, + sizeof(st_nci_gates)); + + /* + * Session id must include the driver name + i2c bus addr + * persistent info to discriminate 2 identical chips + */ + dev_num = find_first_zero_bit(dev_mask, ST_NCI_NUM_DEVICES); + if (dev_num >= ST_NCI_NUM_DEVICES) { + r = -ENODEV; + goto free_dest_params; + } + + scnprintf(ndev->hci_dev->init_data.session_id, + sizeof(ndev->hci_dev->init_data.session_id), + "%s%2x", "ST21BH", dev_num); + + r = nci_hci_dev_session_init(ndev); + if (r != NCI_HCI_ANY_OK) + goto free_dest_params; + + r = nci_nfcee_mode_set(ndev, ndev->hci_dev->conn_info->id, + NCI_NFCEE_ENABLE); + if (r != NCI_STATUS_OK) + goto free_dest_params; + +free_dest_params: + kfree(dest_params); + +exit: + return r; +} + +int st_nci_discover_se(struct nci_dev *ndev) +{ + u8 param[2]; + int r; + int se_count = 0; + + pr_debug("st_nci_discover_se\n"); + + r = st_nci_hci_network_init(ndev); + if (r != 0) + return r; + + param[0] = ST_NCI_UICC_HOST_ID; + param[1] = ST_NCI_HCI_HOST_ID_ESE; + r = nci_hci_set_param(ndev, NCI_HCI_ADMIN_GATE, + NCI_HCI_ADMIN_PARAM_WHITELIST, + param, sizeof(param)); + if (r != NCI_HCI_ANY_OK) + return r; + + r = st_nci_control_se(ndev, ST_NCI_UICC_HOST_ID, + ST_NCI_SE_MODE_ON); + if (r == ST_NCI_UICC_HOST_ID) { + nfc_add_se(ndev->nfc_dev, ST_NCI_UICC_HOST_ID, NFC_SE_UICC); + se_count++; + } + + /* Try to enable eSE in order to check availability */ + r = st_nci_control_se(ndev, ST_NCI_HCI_HOST_ID_ESE, + ST_NCI_SE_MODE_ON); + if (r == ST_NCI_HCI_HOST_ID_ESE) { + nfc_add_se(ndev->nfc_dev, ST_NCI_HCI_HOST_ID_ESE, + NFC_SE_EMBEDDED); + se_count++; + st_nci_se_get_atr(ndev); + } + + return !se_count; +} +EXPORT_SYMBOL_GPL(st_nci_discover_se); + +int st_nci_se_io(struct nci_dev *ndev, u32 se_idx, + u8 *apdu, size_t apdu_length, + se_io_cb_t cb, void *cb_context) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + + pr_debug("\n"); + + switch (se_idx) { + case ST_NCI_HCI_HOST_ID_ESE: + info->se_info.cb = cb; + info->se_info.cb_context = cb_context; + mod_timer(&info->se_info.bwi_timer, jiffies + + msecs_to_jiffies(info->se_info.wt_timeout)); + info->se_info.bwi_active = true; + return nci_hci_send_event(ndev, ST_NCI_APDU_READER_GATE, + ST_NCI_EVT_TRANSMIT_DATA, apdu, + apdu_length); + default: + return -ENODEV; + } +} +EXPORT_SYMBOL(st_nci_se_io); + +static void st_nci_se_wt_timeout(unsigned long data) +{ + /* + * No answer from the secure element + * within the defined timeout. + * Let's send a reset request as recovery procedure. + * According to the situation, we first try to send a software reset + * to the secure element. If the next command is still not + * answering in time, we send to the CLF a secure element hardware + * reset request. + */ + /* hardware reset managed through VCC_UICC_OUT power supply */ + u8 param = 0x01; + struct st_nci_info *info = (struct st_nci_info *) data; + + pr_debug("\n"); + + info->se_info.bwi_active = false; + + if (!info->se_info.xch_error) { + info->se_info.xch_error = true; + nci_hci_send_event(info->ndlc->ndev, ST_NCI_APDU_READER_GATE, + ST_NCI_EVT_SE_SOFT_RESET, NULL, 0); + } else { + info->se_info.xch_error = false; + nci_hci_send_event(info->ndlc->ndev, ST_NCI_DEVICE_MGNT_GATE, + ST_NCI_EVT_SE_HARD_RESET, ¶m, 1); + } + info->se_info.cb(info->se_info.cb_context, NULL, 0, -ETIME); +} + +static void st_nci_se_activation_timeout(unsigned long data) +{ + struct st_nci_info *info = (struct st_nci_info *) data; + + pr_debug("\n"); + + info->se_info.se_active = false; + + complete(&info->se_info.req_completion); +} + +int st_nci_se_init(struct nci_dev *ndev) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + + init_completion(&info->se_info.req_completion); + /* initialize timers */ + init_timer(&info->se_info.bwi_timer); + info->se_info.bwi_timer.data = (unsigned long)info; + info->se_info.bwi_timer.function = st_nci_se_wt_timeout; + info->se_info.bwi_active = false; + + init_timer(&info->se_info.se_active_timer); + info->se_info.se_active_timer.data = (unsigned long)info; + info->se_info.se_active_timer.function = + st_nci_se_activation_timeout; + info->se_info.se_active = false; + + info->se_info.xch_error = false; + + info->se_info.wt_timeout = + ST_NCI_BWI_TO_TIMEOUT(ST_NCI_ATR_DEFAULT_BWI); + + return 0; +} +EXPORT_SYMBOL(st_nci_se_init); + +void st_nci_se_deinit(struct nci_dev *ndev) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + + if (info->se_info.bwi_active) + del_timer_sync(&info->se_info.bwi_timer); + if (info->se_info.se_active) + del_timer_sync(&info->se_info.se_active_timer); + + info->se_info.se_active = false; + info->se_info.bwi_active = false; +} +EXPORT_SYMBOL(st_nci_se_deinit); + diff --git a/drivers/nfc/st-nci/st-nci_se.h b/drivers/nfc/st-nci/st-nci_se.h new file mode 100644 index 000000000000..ea66e879d67f --- /dev/null +++ b/drivers/nfc/st-nci/st-nci_se.h @@ -0,0 +1,61 @@ +/* + * Secure Element Driver for STMicroelectronics NFC NCI Chip + * + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#ifndef __LOCAL_ST_NCI_SE_H_ +#define __LOCAL_ST_NCI_SE_H_ + +/* + * ref ISO7816-3 chap 8.1. the initial character TS is followed by a + * sequence of at most 32 characters. + */ +#define ST_NCI_ESE_MAX_LENGTH 33 +#define ST_NCI_HCI_HOST_ID_ESE 0xc0 + +struct st_nci_se_info { + u8 atr[ST_NCI_ESE_MAX_LENGTH]; + struct completion req_completion; + + struct timer_list bwi_timer; + int wt_timeout; /* in msecs */ + bool bwi_active; + + struct timer_list se_active_timer; + bool se_active; + + bool xch_error; + + se_io_cb_t cb; + void *cb_context; +}; + +int st_nci_se_init(struct nci_dev *ndev); +void st_nci_se_deinit(struct nci_dev *ndev); + +int st_nci_discover_se(struct nci_dev *ndev); +int st_nci_enable_se(struct nci_dev *ndev, u32 se_idx); +int st_nci_disable_se(struct nci_dev *ndev, u32 se_idx); +int st_nci_se_io(struct nci_dev *ndev, u32 se_idx, + u8 *apdu, size_t apdu_length, + se_io_cb_t cb, void *cb_context); +int st_nci_hci_load_session(struct nci_dev *ndev); +void st_nci_hci_event_received(struct nci_dev *ndev, u8 pipe, + u8 event, struct sk_buff *skb); +void st_nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, u8 cmd, + struct sk_buff *skb); + + +#endif /* __LOCAL_ST_NCI_SE_H_ */ diff --git a/drivers/nfc/st21nfcb/Kconfig b/drivers/nfc/st21nfcb/Kconfig deleted file mode 100644 index e0322dd03a70..000000000000 --- a/drivers/nfc/st21nfcb/Kconfig +++ /dev/null @@ -1,22 +0,0 @@ -config NFC_ST21NFCB - tristate "STMicroelectronics ST21NFCB NFC driver" - depends on NFC_NCI - default n - ---help--- - STMicroelectronics ST21NFCB core driver. It implements the chipset - NCI logic and hooks into the NFC kernel APIs. Physical layers will - register against it. - - To compile this driver as a module, choose m here. The module will - be called st21nfcb. - Say N if unsure. - -config NFC_ST21NFCB_I2C - tristate "NFC ST21NFCB i2c support" - depends on NFC_ST21NFCB && I2C - ---help--- - This module adds support for the STMicroelectronics st21nfcb i2c interface. - Select this if your platform is using the i2c bus. - - If you choose to build a module, it'll be called st21nfcb_i2c. - Say N if unsure. diff --git a/drivers/nfc/st21nfcb/Makefile b/drivers/nfc/st21nfcb/Makefile deleted file mode 100644 index ce659a9e5a1a..000000000000 --- a/drivers/nfc/st21nfcb/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -# -# Makefile for ST21NFCB NCI based NFC driver -# - -st21nfcb_nci-objs = ndlc.o st21nfcb.o st21nfcb_se.o -obj-$(CONFIG_NFC_ST21NFCB) += st21nfcb_nci.o - -st21nfcb_i2c-objs = i2c.o -obj-$(CONFIG_NFC_ST21NFCB_I2C) += st21nfcb_i2c.o diff --git a/drivers/nfc/st21nfcb/i2c.c b/drivers/nfc/st21nfcb/i2c.c deleted file mode 100644 index dbc0dfd8ae85..000000000000 --- a/drivers/nfc/st21nfcb/i2c.c +++ /dev/null @@ -1,384 +0,0 @@ -/* - * I2C Link Layer for ST21NFCB NCI based Driver - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ndlc.h" - -#define DRIVER_DESC "NCI NFC driver for ST21NFCB" - -/* ndlc header */ -#define ST21NFCB_FRAME_HEADROOM 1 -#define ST21NFCB_FRAME_TAILROOM 0 - -#define ST21NFCB_NCI_I2C_MIN_SIZE 4 /* PCB(1) + NCI Packet header(3) */ -#define ST21NFCB_NCI_I2C_MAX_SIZE 250 /* req 4.2.1 */ - -#define ST21NFCB_NCI_I2C_DRIVER_NAME "st21nfcb_nci_i2c" - -static struct i2c_device_id st21nfcb_nci_i2c_id_table[] = { - {ST21NFCB_NCI_DRIVER_NAME, 0}, - {} -}; -MODULE_DEVICE_TABLE(i2c, st21nfcb_nci_i2c_id_table); - -struct st21nfcb_i2c_phy { - struct i2c_client *i2c_dev; - struct llt_ndlc *ndlc; - - unsigned int gpio_reset; - unsigned int irq_polarity; -}; - -#define I2C_DUMP_SKB(info, skb) \ -do { \ - pr_debug("%s:\n", info); \ - print_hex_dump(KERN_DEBUG, "i2c: ", DUMP_PREFIX_OFFSET, \ - 16, 1, (skb)->data, (skb)->len, 0); \ -} while (0) - -static int st21nfcb_nci_i2c_enable(void *phy_id) -{ - struct st21nfcb_i2c_phy *phy = phy_id; - - gpio_set_value(phy->gpio_reset, 0); - usleep_range(10000, 15000); - gpio_set_value(phy->gpio_reset, 1); - usleep_range(80000, 85000); - - if (phy->ndlc->powered == 0) - enable_irq(phy->i2c_dev->irq); - - return 0; -} - -static void st21nfcb_nci_i2c_disable(void *phy_id) -{ - struct st21nfcb_i2c_phy *phy = phy_id; - - disable_irq_nosync(phy->i2c_dev->irq); -} - -/* - * Writing a frame must not return the number of written bytes. - * It must return either zero for success, or <0 for error. - * In addition, it must not alter the skb - */ -static int st21nfcb_nci_i2c_write(void *phy_id, struct sk_buff *skb) -{ - int r = -1; - struct st21nfcb_i2c_phy *phy = phy_id; - struct i2c_client *client = phy->i2c_dev; - - I2C_DUMP_SKB("st21nfcb_nci_i2c_write", skb); - - if (phy->ndlc->hard_fault != 0) - return phy->ndlc->hard_fault; - - r = i2c_master_send(client, skb->data, skb->len); - if (r < 0) { /* Retry, chip was in standby */ - usleep_range(1000, 4000); - r = i2c_master_send(client, skb->data, skb->len); - } - - if (r >= 0) { - if (r != skb->len) - r = -EREMOTEIO; - else - r = 0; - } - - return r; -} - -/* - * Reads an ndlc frame and returns it in a newly allocated sk_buff. - * returns: - * frame size : if received frame is complete (find ST21NFCB_SOF_EOF at - * end of read) - * -EAGAIN : if received frame is incomplete (not find ST21NFCB_SOF_EOF - * at end of read) - * -EREMOTEIO : i2c read error (fatal) - * -EBADMSG : frame was incorrect and discarded - * (value returned from st21nfcb_nci_i2c_repack) - * -EIO : if no ST21NFCB_SOF_EOF is found after reaching - * the read length end sequence - */ -static int st21nfcb_nci_i2c_read(struct st21nfcb_i2c_phy *phy, - struct sk_buff **skb) -{ - int r; - u8 len; - u8 buf[ST21NFCB_NCI_I2C_MAX_SIZE]; - struct i2c_client *client = phy->i2c_dev; - - r = i2c_master_recv(client, buf, ST21NFCB_NCI_I2C_MIN_SIZE); - if (r < 0) { /* Retry, chip was in standby */ - usleep_range(1000, 4000); - r = i2c_master_recv(client, buf, ST21NFCB_NCI_I2C_MIN_SIZE); - } - - if (r != ST21NFCB_NCI_I2C_MIN_SIZE) - return -EREMOTEIO; - - len = be16_to_cpu(*(__be16 *) (buf + 2)); - if (len > ST21NFCB_NCI_I2C_MAX_SIZE) { - nfc_err(&client->dev, "invalid frame len\n"); - return -EBADMSG; - } - - *skb = alloc_skb(ST21NFCB_NCI_I2C_MIN_SIZE + len, GFP_KERNEL); - if (*skb == NULL) - return -ENOMEM; - - skb_reserve(*skb, ST21NFCB_NCI_I2C_MIN_SIZE); - skb_put(*skb, ST21NFCB_NCI_I2C_MIN_SIZE); - memcpy((*skb)->data, buf, ST21NFCB_NCI_I2C_MIN_SIZE); - - if (!len) - return 0; - - r = i2c_master_recv(client, buf, len); - if (r != len) { - kfree_skb(*skb); - return -EREMOTEIO; - } - - skb_put(*skb, len); - memcpy((*skb)->data + ST21NFCB_NCI_I2C_MIN_SIZE, buf, len); - - I2C_DUMP_SKB("i2c frame read", *skb); - - return 0; -} - -/* - * Reads an ndlc frame from the chip. - * - * On ST21NFCB, IRQ goes in idle state when read starts. - */ -static irqreturn_t st21nfcb_nci_irq_thread_fn(int irq, void *phy_id) -{ - struct st21nfcb_i2c_phy *phy = phy_id; - struct i2c_client *client; - struct sk_buff *skb = NULL; - int r; - - if (!phy || !phy->ndlc || irq != phy->i2c_dev->irq) { - WARN_ON_ONCE(1); - return IRQ_NONE; - } - - client = phy->i2c_dev; - dev_dbg(&client->dev, "IRQ\n"); - - if (phy->ndlc->hard_fault) - return IRQ_HANDLED; - - if (!phy->ndlc->powered) { - st21nfcb_nci_i2c_disable(phy); - return IRQ_HANDLED; - } - - r = st21nfcb_nci_i2c_read(phy, &skb); - if (r == -EREMOTEIO || r == -ENOMEM || r == -EBADMSG) - return IRQ_HANDLED; - - ndlc_recv(phy->ndlc, skb); - - return IRQ_HANDLED; -} - -static struct nfc_phy_ops i2c_phy_ops = { - .write = st21nfcb_nci_i2c_write, - .enable = st21nfcb_nci_i2c_enable, - .disable = st21nfcb_nci_i2c_disable, -}; - -#ifdef CONFIG_OF -static int st21nfcb_nci_i2c_of_request_resources(struct i2c_client *client) -{ - struct st21nfcb_i2c_phy *phy = i2c_get_clientdata(client); - struct device_node *pp; - int gpio; - int r; - - pp = client->dev.of_node; - if (!pp) - return -ENODEV; - - /* Get GPIO from device tree */ - gpio = of_get_named_gpio(pp, "reset-gpios", 0); - if (gpio < 0) { - nfc_err(&client->dev, - "Failed to retrieve reset-gpios from device tree\n"); - return gpio; - } - - /* GPIO request and configuration */ - r = devm_gpio_request_one(&client->dev, gpio, - GPIOF_OUT_INIT_HIGH, "clf_reset"); - if (r) { - nfc_err(&client->dev, "Failed to request reset pin\n"); - return r; - } - phy->gpio_reset = gpio; - - phy->irq_polarity = irq_get_trigger_type(client->irq); - - return 0; -} -#else -static int st21nfcb_nci_i2c_of_request_resources(struct i2c_client *client) -{ - return -ENODEV; -} -#endif - -static int st21nfcb_nci_i2c_request_resources(struct i2c_client *client) -{ - struct st21nfcb_nfc_platform_data *pdata; - struct st21nfcb_i2c_phy *phy = i2c_get_clientdata(client); - int r; - - pdata = client->dev.platform_data; - if (pdata == NULL) { - nfc_err(&client->dev, "No platform data\n"); - return -EINVAL; - } - - /* store for later use */ - phy->gpio_reset = pdata->gpio_reset; - phy->irq_polarity = pdata->irq_polarity; - - r = devm_gpio_request_one(&client->dev, - phy->gpio_reset, GPIOF_OUT_INIT_HIGH, "clf_reset"); - if (r) { - pr_err("%s : reset gpio_request failed\n", __FILE__); - return r; - } - - return 0; -} - -static int st21nfcb_nci_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) -{ - struct st21nfcb_i2c_phy *phy; - struct st21nfcb_nfc_platform_data *pdata; - int r; - - dev_dbg(&client->dev, "%s\n", __func__); - dev_dbg(&client->dev, "IRQ: %d\n", client->irq); - - if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { - nfc_err(&client->dev, "Need I2C_FUNC_I2C\n"); - return -ENODEV; - } - - phy = devm_kzalloc(&client->dev, sizeof(struct st21nfcb_i2c_phy), - GFP_KERNEL); - if (!phy) - return -ENOMEM; - - phy->i2c_dev = client; - - i2c_set_clientdata(client, phy); - - pdata = client->dev.platform_data; - if (!pdata && client->dev.of_node) { - r = st21nfcb_nci_i2c_of_request_resources(client); - if (r) { - nfc_err(&client->dev, "No platform data\n"); - return r; - } - } else if (pdata) { - r = st21nfcb_nci_i2c_request_resources(client); - if (r) { - nfc_err(&client->dev, - "Cannot get platform resources\n"); - return r; - } - } else { - nfc_err(&client->dev, - "st21nfcb platform resources not available\n"); - return -ENODEV; - } - - r = ndlc_probe(phy, &i2c_phy_ops, &client->dev, - ST21NFCB_FRAME_HEADROOM, ST21NFCB_FRAME_TAILROOM, - &phy->ndlc); - if (r < 0) { - nfc_err(&client->dev, "Unable to register ndlc layer\n"); - return r; - } - - r = devm_request_threaded_irq(&client->dev, client->irq, NULL, - st21nfcb_nci_irq_thread_fn, - phy->irq_polarity | IRQF_ONESHOT, - ST21NFCB_NCI_DRIVER_NAME, phy); - if (r < 0) - nfc_err(&client->dev, "Unable to register IRQ handler\n"); - - return r; -} - -static int st21nfcb_nci_i2c_remove(struct i2c_client *client) -{ - struct st21nfcb_i2c_phy *phy = i2c_get_clientdata(client); - - dev_dbg(&client->dev, "%s\n", __func__); - - ndlc_remove(phy->ndlc); - - return 0; -} - -#ifdef CONFIG_OF -static const struct of_device_id of_st21nfcb_i2c_match[] = { - { .compatible = "st,st21nfcb-i2c", }, - { .compatible = "st,st21nfcb_i2c", }, - {} -}; -MODULE_DEVICE_TABLE(of, of_st21nfcb_i2c_match); -#endif - -static struct i2c_driver st21nfcb_nci_i2c_driver = { - .driver = { - .owner = THIS_MODULE, - .name = ST21NFCB_NCI_I2C_DRIVER_NAME, - .of_match_table = of_match_ptr(of_st21nfcb_i2c_match), - }, - .probe = st21nfcb_nci_i2c_probe, - .id_table = st21nfcb_nci_i2c_id_table, - .remove = st21nfcb_nci_i2c_remove, -}; - -module_i2c_driver(st21nfcb_nci_i2c_driver); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/nfc/st21nfcb/ndlc.c b/drivers/nfc/st21nfcb/ndlc.c deleted file mode 100644 index 91e81f37b3a6..000000000000 --- a/drivers/nfc/st21nfcb/ndlc.c +++ /dev/null @@ -1,313 +0,0 @@ -/* - * Low Level Transport (NDLC) Driver for STMicroelectronics NFC Chip - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#include -#include - -#include "ndlc.h" -#include "st21nfcb.h" - -#define NDLC_TIMER_T1 100 -#define NDLC_TIMER_T1_WAIT 400 -#define NDLC_TIMER_T2 1200 - -#define PCB_TYPE_DATAFRAME 0x80 -#define PCB_TYPE_SUPERVISOR 0xc0 -#define PCB_TYPE_MASK PCB_TYPE_SUPERVISOR - -#define PCB_SYNC_ACK 0x20 -#define PCB_SYNC_NACK 0x10 -#define PCB_SYNC_WAIT 0x30 -#define PCB_SYNC_NOINFO 0x00 -#define PCB_SYNC_MASK PCB_SYNC_WAIT - -#define PCB_DATAFRAME_RETRANSMIT_YES 0x00 -#define PCB_DATAFRAME_RETRANSMIT_NO 0x04 -#define PCB_DATAFRAME_RETRANSMIT_MASK PCB_DATAFRAME_RETRANSMIT_NO - -#define PCB_SUPERVISOR_RETRANSMIT_YES 0x00 -#define PCB_SUPERVISOR_RETRANSMIT_NO 0x02 -#define PCB_SUPERVISOR_RETRANSMIT_MASK PCB_SUPERVISOR_RETRANSMIT_NO - -#define PCB_FRAME_CRC_INFO_PRESENT 0x08 -#define PCB_FRAME_CRC_INFO_NOTPRESENT 0x00 -#define PCB_FRAME_CRC_INFO_MASK PCB_FRAME_CRC_INFO_PRESENT - -#define NDLC_DUMP_SKB(info, skb) \ -do { \ - pr_debug("%s:\n", info); \ - print_hex_dump(KERN_DEBUG, "ndlc: ", DUMP_PREFIX_OFFSET, \ - 16, 1, skb->data, skb->len, 0); \ -} while (0) - -int ndlc_open(struct llt_ndlc *ndlc) -{ - /* toggle reset pin */ - ndlc->ops->enable(ndlc->phy_id); - ndlc->powered = 1; - return 0; -} -EXPORT_SYMBOL(ndlc_open); - -void ndlc_close(struct llt_ndlc *ndlc) -{ - struct nci_mode_set_cmd cmd; - - cmd.cmd_type = ST21NFCB_NCI_SET_NFC_MODE; - cmd.mode = 0; - - /* toggle reset pin */ - ndlc->ops->enable(ndlc->phy_id); - - nci_prop_cmd(ndlc->ndev, ST21NFCB_NCI_CORE_PROP, - sizeof(struct nci_mode_set_cmd), (__u8 *)&cmd); - - ndlc->powered = 0; - ndlc->ops->disable(ndlc->phy_id); -} -EXPORT_SYMBOL(ndlc_close); - -int ndlc_send(struct llt_ndlc *ndlc, struct sk_buff *skb) -{ - /* add ndlc header */ - u8 pcb = PCB_TYPE_DATAFRAME | PCB_DATAFRAME_RETRANSMIT_NO | - PCB_FRAME_CRC_INFO_NOTPRESENT; - - *skb_push(skb, 1) = pcb; - skb_queue_tail(&ndlc->send_q, skb); - - schedule_work(&ndlc->sm_work); - - return 0; -} -EXPORT_SYMBOL(ndlc_send); - -static void llt_ndlc_send_queue(struct llt_ndlc *ndlc) -{ - struct sk_buff *skb; - int r; - unsigned long time_sent; - - if (ndlc->send_q.qlen) - pr_debug("sendQlen=%d unackQlen=%d\n", - ndlc->send_q.qlen, ndlc->ack_pending_q.qlen); - - while (ndlc->send_q.qlen) { - skb = skb_dequeue(&ndlc->send_q); - NDLC_DUMP_SKB("ndlc frame written", skb); - r = ndlc->ops->write(ndlc->phy_id, skb); - if (r < 0) { - ndlc->hard_fault = r; - break; - } - time_sent = jiffies; - *(unsigned long *)skb->cb = time_sent; - - skb_queue_tail(&ndlc->ack_pending_q, skb); - - /* start timer t1 for ndlc aknowledge */ - ndlc->t1_active = true; - mod_timer(&ndlc->t1_timer, time_sent + - msecs_to_jiffies(NDLC_TIMER_T1)); - /* start timer t2 for chip availability */ - ndlc->t2_active = true; - mod_timer(&ndlc->t2_timer, time_sent + - msecs_to_jiffies(NDLC_TIMER_T2)); - } -} - -static void llt_ndlc_requeue_data_pending(struct llt_ndlc *ndlc) -{ - struct sk_buff *skb; - u8 pcb; - - while ((skb = skb_dequeue_tail(&ndlc->ack_pending_q))) { - pcb = skb->data[0]; - switch (pcb & PCB_TYPE_MASK) { - case PCB_TYPE_SUPERVISOR: - skb->data[0] = (pcb & ~PCB_SUPERVISOR_RETRANSMIT_MASK) | - PCB_SUPERVISOR_RETRANSMIT_YES; - break; - case PCB_TYPE_DATAFRAME: - skb->data[0] = (pcb & ~PCB_DATAFRAME_RETRANSMIT_MASK) | - PCB_DATAFRAME_RETRANSMIT_YES; - break; - default: - pr_err("UNKNOWN Packet Control Byte=%d\n", pcb); - kfree_skb(skb); - continue; - } - skb_queue_head(&ndlc->send_q, skb); - } -} - -static void llt_ndlc_rcv_queue(struct llt_ndlc *ndlc) -{ - struct sk_buff *skb; - u8 pcb; - unsigned long time_sent; - - if (ndlc->rcv_q.qlen) - pr_debug("rcvQlen=%d\n", ndlc->rcv_q.qlen); - - while ((skb = skb_dequeue(&ndlc->rcv_q)) != NULL) { - pcb = skb->data[0]; - skb_pull(skb, 1); - if ((pcb & PCB_TYPE_MASK) == PCB_TYPE_SUPERVISOR) { - switch (pcb & PCB_SYNC_MASK) { - case PCB_SYNC_ACK: - del_timer_sync(&ndlc->t1_timer); - del_timer_sync(&ndlc->t2_timer); - ndlc->t2_active = false; - ndlc->t1_active = false; - break; - case PCB_SYNC_NACK: - llt_ndlc_requeue_data_pending(ndlc); - llt_ndlc_send_queue(ndlc); - /* start timer t1 for ndlc aknowledge */ - time_sent = jiffies; - ndlc->t1_active = true; - mod_timer(&ndlc->t1_timer, time_sent + - msecs_to_jiffies(NDLC_TIMER_T1)); - break; - case PCB_SYNC_WAIT: - time_sent = jiffies; - ndlc->t1_active = true; - mod_timer(&ndlc->t1_timer, time_sent + - msecs_to_jiffies(NDLC_TIMER_T1_WAIT)); - break; - default: - pr_err("UNKNOWN Packet Control Byte=%d\n", pcb); - kfree_skb(skb); - break; - } - } else { - nci_recv_frame(ndlc->ndev, skb); - } - } -} - -static void llt_ndlc_sm_work(struct work_struct *work) -{ - struct llt_ndlc *ndlc = container_of(work, struct llt_ndlc, sm_work); - - llt_ndlc_send_queue(ndlc); - llt_ndlc_rcv_queue(ndlc); - - if (ndlc->t1_active && timer_pending(&ndlc->t1_timer) == 0) { - pr_debug - ("Handle T1(recv SUPERVISOR) elapsed (T1 now inactive)\n"); - ndlc->t1_active = false; - - llt_ndlc_requeue_data_pending(ndlc); - llt_ndlc_send_queue(ndlc); - } - - if (ndlc->t2_active && timer_pending(&ndlc->t2_timer) == 0) { - pr_debug("Handle T2(recv DATA) elapsed (T2 now inactive)\n"); - ndlc->t2_active = false; - ndlc->t1_active = false; - del_timer_sync(&ndlc->t1_timer); - del_timer_sync(&ndlc->t2_timer); - ndlc_close(ndlc); - ndlc->hard_fault = -EREMOTEIO; - } -} - -void ndlc_recv(struct llt_ndlc *ndlc, struct sk_buff *skb) -{ - if (skb == NULL) { - pr_err("NULL Frame -> link is dead\n"); - ndlc->hard_fault = -EREMOTEIO; - ndlc_close(ndlc); - } else { - NDLC_DUMP_SKB("incoming frame", skb); - skb_queue_tail(&ndlc->rcv_q, skb); - } - - schedule_work(&ndlc->sm_work); -} -EXPORT_SYMBOL(ndlc_recv); - -static void ndlc_t1_timeout(unsigned long data) -{ - struct llt_ndlc *ndlc = (struct llt_ndlc *)data; - - pr_debug("\n"); - - schedule_work(&ndlc->sm_work); -} - -static void ndlc_t2_timeout(unsigned long data) -{ - struct llt_ndlc *ndlc = (struct llt_ndlc *)data; - - pr_debug("\n"); - - schedule_work(&ndlc->sm_work); -} - -int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev, - int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id) -{ - struct llt_ndlc *ndlc; - - ndlc = devm_kzalloc(dev, sizeof(struct llt_ndlc), GFP_KERNEL); - if (!ndlc) - return -ENOMEM; - - ndlc->ops = phy_ops; - ndlc->phy_id = phy_id; - ndlc->dev = dev; - ndlc->powered = 0; - - *ndlc_id = ndlc; - - /* initialize timers */ - init_timer(&ndlc->t1_timer); - ndlc->t1_timer.data = (unsigned long)ndlc; - ndlc->t1_timer.function = ndlc_t1_timeout; - - init_timer(&ndlc->t2_timer); - ndlc->t2_timer.data = (unsigned long)ndlc; - ndlc->t2_timer.function = ndlc_t2_timeout; - - skb_queue_head_init(&ndlc->rcv_q); - skb_queue_head_init(&ndlc->send_q); - skb_queue_head_init(&ndlc->ack_pending_q); - - INIT_WORK(&ndlc->sm_work, llt_ndlc_sm_work); - - return st21nfcb_nci_probe(ndlc, phy_headroom, phy_tailroom); -} -EXPORT_SYMBOL(ndlc_probe); - -void ndlc_remove(struct llt_ndlc *ndlc) -{ - st21nfcb_nci_remove(ndlc->ndev); - - /* cancel timers */ - del_timer_sync(&ndlc->t1_timer); - del_timer_sync(&ndlc->t2_timer); - ndlc->t2_active = false; - ndlc->t1_active = false; - - skb_queue_purge(&ndlc->rcv_q); - skb_queue_purge(&ndlc->send_q); -} -EXPORT_SYMBOL(ndlc_remove); diff --git a/drivers/nfc/st21nfcb/ndlc.h b/drivers/nfc/st21nfcb/ndlc.h deleted file mode 100644 index cf6a9d9f2983..000000000000 --- a/drivers/nfc/st21nfcb/ndlc.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * NCI based Driver for STMicroelectronics NFC Chip - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#ifndef __LOCAL_NDLC_H_ -#define __LOCAL_NDLC_H_ - -#include -#include - -/* Low Level Transport description */ -struct llt_ndlc { - struct nci_dev *ndev; - struct nfc_phy_ops *ops; - void *phy_id; - - struct timer_list t1_timer; - bool t1_active; - - struct timer_list t2_timer; - bool t2_active; - - struct sk_buff_head rcv_q; - struct sk_buff_head send_q; - struct sk_buff_head ack_pending_q; - - struct work_struct sm_work; - - struct device *dev; - - /* - * < 0 if hardware error occured - * and prevents normal operation. - */ - int hard_fault; - int powered; -}; - -int ndlc_open(struct llt_ndlc *ndlc); -void ndlc_close(struct llt_ndlc *ndlc); -int ndlc_send(struct llt_ndlc *ndlc, struct sk_buff *skb); -void ndlc_recv(struct llt_ndlc *ndlc, struct sk_buff *skb); -int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev, - int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id); -void ndlc_remove(struct llt_ndlc *ndlc); -#endif /* __LOCAL_NDLC_H__ */ diff --git a/drivers/nfc/st21nfcb/st21nfcb.c b/drivers/nfc/st21nfcb/st21nfcb.c deleted file mode 100644 index a16c3a3d3fff..000000000000 --- a/drivers/nfc/st21nfcb/st21nfcb.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * NCI based Driver for STMicroelectronics NFC Chip - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#include -#include -#include -#include -#include -#include - -#include "st21nfcb.h" -#include "st21nfcb_se.h" - -#define DRIVER_DESC "NCI NFC driver for ST21NFCB" - -#define ST21NFCB_NCI1_X_PROPRIETARY_ISO15693 0x83 - -static int st21nfcb_nci_init(struct nci_dev *ndev) -{ - struct nci_mode_set_cmd cmd; - - cmd.cmd_type = ST21NFCB_NCI_SET_NFC_MODE; - cmd.mode = 1; - - return nci_prop_cmd(ndev, ST21NFCB_NCI_CORE_PROP, - sizeof(struct nci_mode_set_cmd), (__u8 *)&cmd); -} - -static int st21nfcb_nci_open(struct nci_dev *ndev) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - int r; - - if (test_and_set_bit(ST21NFCB_NCI_RUNNING, &info->flags)) - return 0; - - r = ndlc_open(info->ndlc); - if (r) - clear_bit(ST21NFCB_NCI_RUNNING, &info->flags); - - return r; -} - -static int st21nfcb_nci_close(struct nci_dev *ndev) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - if (!test_bit(ST21NFCB_NCI_RUNNING, &info->flags)) - return 0; - - ndlc_close(info->ndlc); - - clear_bit(ST21NFCB_NCI_RUNNING, &info->flags); - - return 0; -} - -static int st21nfcb_nci_send(struct nci_dev *ndev, struct sk_buff *skb) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - skb->dev = (void *)ndev; - - if (!test_bit(ST21NFCB_NCI_RUNNING, &info->flags)) - return -EBUSY; - - return ndlc_send(info->ndlc, skb); -} - -static __u32 st21nfcb_nci_get_rfprotocol(struct nci_dev *ndev, - __u8 rf_protocol) -{ - return rf_protocol == ST21NFCB_NCI1_X_PROPRIETARY_ISO15693 ? - NFC_PROTO_ISO15693_MASK : 0; -} - -static int st21nfcb_nci_prop_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) -{ - __u8 status = skb->data[0]; - - nci_req_complete(ndev, status); - return 0; -} - -static struct nci_prop_ops st21nfcb_nci_prop_ops[] = { - { - .opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, - ST21NFCB_NCI_CORE_PROP), - .rsp = st21nfcb_nci_prop_rsp_packet, - }, -}; - -static struct nci_ops st21nfcb_nci_ops = { - .init = st21nfcb_nci_init, - .open = st21nfcb_nci_open, - .close = st21nfcb_nci_close, - .send = st21nfcb_nci_send, - .get_rfprotocol = st21nfcb_nci_get_rfprotocol, - .discover_se = st21nfcb_nci_discover_se, - .enable_se = st21nfcb_nci_enable_se, - .disable_se = st21nfcb_nci_disable_se, - .se_io = st21nfcb_nci_se_io, - .hci_load_session = st21nfcb_hci_load_session, - .hci_event_received = st21nfcb_hci_event_received, - .hci_cmd_received = st21nfcb_hci_cmd_received, - .prop_ops = st21nfcb_nci_prop_ops, - .n_prop_ops = ARRAY_SIZE(st21nfcb_nci_prop_ops), -}; - -int st21nfcb_nci_probe(struct llt_ndlc *ndlc, int phy_headroom, - int phy_tailroom) -{ - struct st21nfcb_nci_info *info; - int r; - u32 protocols; - - info = devm_kzalloc(ndlc->dev, - sizeof(struct st21nfcb_nci_info), GFP_KERNEL); - if (!info) - return -ENOMEM; - - protocols = NFC_PROTO_JEWEL_MASK - | NFC_PROTO_MIFARE_MASK - | NFC_PROTO_FELICA_MASK - | NFC_PROTO_ISO14443_MASK - | NFC_PROTO_ISO14443_B_MASK - | NFC_PROTO_ISO15693_MASK - | NFC_PROTO_NFC_DEP_MASK; - - ndlc->ndev = nci_allocate_device(&st21nfcb_nci_ops, protocols, - phy_headroom, phy_tailroom); - if (!ndlc->ndev) { - pr_err("Cannot allocate nfc ndev\n"); - return -ENOMEM; - } - info->ndlc = ndlc; - - nci_set_drvdata(ndlc->ndev, info); - - r = nci_register_device(ndlc->ndev); - if (r) { - pr_err("Cannot register nfc device to nci core\n"); - nci_free_device(ndlc->ndev); - return r; - } - - return st21nfcb_se_init(ndlc->ndev); -} -EXPORT_SYMBOL_GPL(st21nfcb_nci_probe); - -void st21nfcb_nci_remove(struct nci_dev *ndev) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - ndlc_close(info->ndlc); - - nci_unregister_device(ndev); - nci_free_device(ndev); -} -EXPORT_SYMBOL_GPL(st21nfcb_nci_remove); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/nfc/st21nfcb/st21nfcb.h b/drivers/nfc/st21nfcb/st21nfcb.h deleted file mode 100644 index 710636325c1f..000000000000 --- a/drivers/nfc/st21nfcb/st21nfcb.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * NCI based Driver for STMicroelectronics NFC Chip - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#ifndef __LOCAL_ST21NFCB_H_ -#define __LOCAL_ST21NFCB_H_ - -#include "st21nfcb_se.h" -#include "ndlc.h" - -/* Define private flags: */ -#define ST21NFCB_NCI_RUNNING 1 - -#define ST21NFCB_NCI_CORE_PROP 0x01 -#define ST21NFCB_NCI_SET_NFC_MODE 0x02 - -struct nci_mode_set_cmd { - u8 cmd_type; - u8 mode; -} __packed; - -struct nci_mode_set_rsp { - u8 status; -} __packed; - -struct st21nfcb_nci_info { - struct llt_ndlc *ndlc; - unsigned long flags; - struct st21nfcb_se_info se_info; -}; - -void st21nfcb_nci_remove(struct nci_dev *ndev); -int st21nfcb_nci_probe(struct llt_ndlc *ndlc, int phy_headroom, - int phy_tailroom); - -#endif /* __LOCAL_ST21NFCB_H_ */ diff --git a/drivers/nfc/st21nfcb/st21nfcb_se.c b/drivers/nfc/st21nfcb/st21nfcb_se.c deleted file mode 100644 index 24862a525fb5..000000000000 --- a/drivers/nfc/st21nfcb/st21nfcb_se.c +++ /dev/null @@ -1,713 +0,0 @@ -/* - * NCI based Driver for STMicroelectronics NFC Chip - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#include -#include -#include -#include -#include - -#include "st21nfcb.h" -#include "st21nfcb_se.h" - -struct st21nfcb_pipe_info { - u8 pipe_state; - u8 src_host_id; - u8 src_gate_id; - u8 dst_host_id; - u8 dst_gate_id; -} __packed; - -/* Hosts */ -#define ST21NFCB_HOST_CONTROLLER_ID 0x00 -#define ST21NFCB_TERMINAL_HOST_ID 0x01 -#define ST21NFCB_UICC_HOST_ID 0x02 -#define ST21NFCB_ESE_HOST_ID 0xc0 - -/* Gates */ -#define ST21NFCB_DEVICE_MGNT_GATE 0x01 -#define ST21NFCB_APDU_READER_GATE 0xf0 -#define ST21NFCB_CONNECTIVITY_GATE 0x41 - -/* Pipes */ -#define ST21NFCB_DEVICE_MGNT_PIPE 0x02 - -/* Connectivity pipe only */ -#define ST21NFCB_SE_COUNT_PIPE_UICC 0x01 -/* Connectivity + APDU Reader pipe */ -#define ST21NFCB_SE_COUNT_PIPE_EMBEDDED 0x02 - -#define ST21NFCB_SE_TO_HOT_PLUG 1000 /* msecs */ -#define ST21NFCB_SE_TO_PIPES 2000 - -#define ST21NFCB_EVT_HOT_PLUG_IS_INHIBITED(x) (x->data[0] & 0x80) - -#define NCI_HCI_APDU_PARAM_ATR 0x01 -#define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY 0x01 -#define NCI_HCI_ADMIN_PARAM_WHITELIST 0x03 -#define NCI_HCI_ADMIN_PARAM_HOST_LIST 0x04 - -#define ST21NFCB_EVT_SE_HARD_RESET 0x20 -#define ST21NFCB_EVT_TRANSMIT_DATA 0x10 -#define ST21NFCB_EVT_WTX_REQUEST 0x11 -#define ST21NFCB_EVT_SE_SOFT_RESET 0x11 -#define ST21NFCB_EVT_SE_END_OF_APDU_TRANSFER 0x21 -#define ST21NFCB_EVT_HOT_PLUG 0x03 - -#define ST21NFCB_SE_MODE_OFF 0x00 -#define ST21NFCB_SE_MODE_ON 0x01 - -#define ST21NFCB_EVT_CONNECTIVITY 0x10 -#define ST21NFCB_EVT_TRANSACTION 0x12 - -#define ST21NFCB_DM_GETINFO 0x13 -#define ST21NFCB_DM_GETINFO_PIPE_LIST 0x02 -#define ST21NFCB_DM_GETINFO_PIPE_INFO 0x01 -#define ST21NFCB_DM_PIPE_CREATED 0x02 -#define ST21NFCB_DM_PIPE_OPEN 0x04 -#define ST21NFCB_DM_RF_ACTIVE 0x80 -#define ST21NFCB_DM_DISCONNECT 0x30 - -#define ST21NFCB_DM_IS_PIPE_OPEN(p) \ - ((p & 0x0f) == (ST21NFCB_DM_PIPE_CREATED | ST21NFCB_DM_PIPE_OPEN)) - -#define ST21NFCB_ATR_DEFAULT_BWI 0x04 - -/* - * WT = 2^BWI/10[s], convert into msecs and add a secure - * room by increasing by 2 this timeout - */ -#define ST21NFCB_BWI_TO_TIMEOUT(x) ((1 << x) * 200) -#define ST21NFCB_ATR_GET_Y_FROM_TD(x) (x >> 4) - -/* If TA is present bit 0 is set */ -#define ST21NFCB_ATR_TA_PRESENT(x) (x & 0x01) -/* If TB is present bit 1 is set */ -#define ST21NFCB_ATR_TB_PRESENT(x) (x & 0x02) - -#define ST21NFCB_NUM_DEVICES 256 - -static DECLARE_BITMAP(dev_mask, ST21NFCB_NUM_DEVICES); - -/* Here are the mandatory pipe for st21nfcb */ -static struct nci_hci_gate st21nfcb_gates[] = { - {NCI_HCI_ADMIN_GATE, NCI_HCI_ADMIN_PIPE, - ST21NFCB_HOST_CONTROLLER_ID}, - {NCI_HCI_LINK_MGMT_GATE, NCI_HCI_LINK_MGMT_PIPE, - ST21NFCB_HOST_CONTROLLER_ID}, - {ST21NFCB_DEVICE_MGNT_GATE, ST21NFCB_DEVICE_MGNT_PIPE, - ST21NFCB_HOST_CONTROLLER_ID}, - - /* Secure element pipes are created by secure element host */ - {ST21NFCB_CONNECTIVITY_GATE, NCI_HCI_DO_NOT_OPEN_PIPE, - ST21NFCB_HOST_CONTROLLER_ID}, - {ST21NFCB_APDU_READER_GATE, NCI_HCI_DO_NOT_OPEN_PIPE, - ST21NFCB_HOST_CONTROLLER_ID}, -}; - -static u8 st21nfcb_se_get_bwi(struct nci_dev *ndev) -{ - int i; - u8 td; - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - /* Bits 8 to 5 of the first TB for T=1 encode BWI from zero to nine */ - for (i = 1; i < ST21NFCB_ESE_MAX_LENGTH; i++) { - td = ST21NFCB_ATR_GET_Y_FROM_TD(info->se_info.atr[i]); - if (ST21NFCB_ATR_TA_PRESENT(td)) - i++; - if (ST21NFCB_ATR_TB_PRESENT(td)) { - i++; - return info->se_info.atr[i] >> 4; - } - } - return ST21NFCB_ATR_DEFAULT_BWI; -} - -static void st21nfcb_se_get_atr(struct nci_dev *ndev) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - int r; - struct sk_buff *skb; - - r = nci_hci_get_param(ndev, ST21NFCB_APDU_READER_GATE, - NCI_HCI_APDU_PARAM_ATR, &skb); - if (r < 0) - return; - - if (skb->len <= ST21NFCB_ESE_MAX_LENGTH) { - memcpy(info->se_info.atr, skb->data, skb->len); - - info->se_info.wt_timeout = - ST21NFCB_BWI_TO_TIMEOUT(st21nfcb_se_get_bwi(ndev)); - } - kfree_skb(skb); -} - -int st21nfcb_hci_load_session(struct nci_dev *ndev) -{ - int i, j, r; - struct sk_buff *skb_pipe_list, *skb_pipe_info; - struct st21nfcb_pipe_info *dm_pipe_info; - u8 pipe_list[] = { ST21NFCB_DM_GETINFO_PIPE_LIST, - ST21NFCB_TERMINAL_HOST_ID}; - u8 pipe_info[] = { ST21NFCB_DM_GETINFO_PIPE_INFO, - ST21NFCB_TERMINAL_HOST_ID, 0}; - - /* On ST21NFCB device pipes number are dynamics - * If pipes are already created, hci_dev_up will fail. - * Doing a clear all pipe is a bad idea because: - * - It does useless EEPROM cycling - * - It might cause issue for secure elements support - * (such as removing connectivity or APDU reader pipe) - * A better approach on ST21NFCB is to: - * - get a pipe list for each host. - * (eg: ST21NFCB_HOST_CONTROLLER_ID for now). - * (TODO Later on UICC HOST and eSE HOST) - * - get pipe information - * - match retrieved pipe list in st21nfcb_gates - * ST21NFCB_DEVICE_MGNT_GATE is a proprietary gate - * with ST21NFCB_DEVICE_MGNT_PIPE. - * Pipe can be closed and need to be open. - */ - r = nci_hci_connect_gate(ndev, ST21NFCB_HOST_CONTROLLER_ID, - ST21NFCB_DEVICE_MGNT_GATE, - ST21NFCB_DEVICE_MGNT_PIPE); - if (r < 0) - goto free_info; - - /* Get pipe list */ - r = nci_hci_send_cmd(ndev, ST21NFCB_DEVICE_MGNT_GATE, - ST21NFCB_DM_GETINFO, pipe_list, sizeof(pipe_list), - &skb_pipe_list); - if (r < 0) - goto free_info; - - /* Complete the existing gate_pipe table */ - for (i = 0; i < skb_pipe_list->len; i++) { - pipe_info[2] = skb_pipe_list->data[i]; - r = nci_hci_send_cmd(ndev, ST21NFCB_DEVICE_MGNT_GATE, - ST21NFCB_DM_GETINFO, pipe_info, - sizeof(pipe_info), &skb_pipe_info); - - if (r) - continue; - - /* - * Match pipe ID and gate ID - * Output format from ST21NFC_DM_GETINFO is: - * - pipe state (1byte) - * - source hid (1byte) - * - source gid (1byte) - * - destination hid (1byte) - * - destination gid (1byte) - */ - dm_pipe_info = (struct st21nfcb_pipe_info *)skb_pipe_info->data; - if (dm_pipe_info->dst_gate_id == ST21NFCB_APDU_READER_GATE && - dm_pipe_info->src_host_id != ST21NFCB_ESE_HOST_ID) { - pr_err("Unexpected apdu_reader pipe on host %x\n", - dm_pipe_info->src_host_id); - continue; - } - - for (j = 0; (j < ARRAY_SIZE(st21nfcb_gates)) && - (st21nfcb_gates[j].gate != dm_pipe_info->dst_gate_id); j++) - ; - - if (j < ARRAY_SIZE(st21nfcb_gates) && - st21nfcb_gates[j].gate == dm_pipe_info->dst_gate_id && - ST21NFCB_DM_IS_PIPE_OPEN(dm_pipe_info->pipe_state)) { - st21nfcb_gates[j].pipe = pipe_info[2]; - - ndev->hci_dev->gate2pipe[st21nfcb_gates[j].gate] = - st21nfcb_gates[j].pipe; - ndev->hci_dev->pipes[st21nfcb_gates[j].pipe].gate = - st21nfcb_gates[j].gate; - ndev->hci_dev->pipes[st21nfcb_gates[j].pipe].host = - dm_pipe_info->src_host_id; - } - } - - memcpy(ndev->hci_dev->init_data.gates, st21nfcb_gates, - sizeof(st21nfcb_gates)); - -free_info: - kfree_skb(skb_pipe_info); - kfree_skb(skb_pipe_list); - return r; -} -EXPORT_SYMBOL_GPL(st21nfcb_hci_load_session); - -static void st21nfcb_hci_admin_event_received(struct nci_dev *ndev, - u8 event, struct sk_buff *skb) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - switch (event) { - case ST21NFCB_EVT_HOT_PLUG: - if (info->se_info.se_active) { - if (!ST21NFCB_EVT_HOT_PLUG_IS_INHIBITED(skb)) { - del_timer_sync(&info->se_info.se_active_timer); - info->se_info.se_active = false; - complete(&info->se_info.req_completion); - } else { - mod_timer(&info->se_info.se_active_timer, - jiffies + - msecs_to_jiffies(ST21NFCB_SE_TO_PIPES)); - } - } - break; - } -} - -static int st21nfcb_hci_apdu_reader_event_received(struct nci_dev *ndev, - u8 event, - struct sk_buff *skb) -{ - int r = 0; - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - pr_debug("apdu reader gate event: %x\n", event); - - switch (event) { - case ST21NFCB_EVT_TRANSMIT_DATA: - del_timer_sync(&info->se_info.bwi_timer); - info->se_info.bwi_active = false; - info->se_info.cb(info->se_info.cb_context, - skb->data, skb->len, 0); - break; - case ST21NFCB_EVT_WTX_REQUEST: - mod_timer(&info->se_info.bwi_timer, jiffies + - msecs_to_jiffies(info->se_info.wt_timeout)); - break; - } - - kfree_skb(skb); - return r; -} - -/* - * Returns: - * <= 0: driver handled the event, skb consumed - * 1: driver does not handle the event, please do standard processing - */ -static int st21nfcb_hci_connectivity_event_received(struct nci_dev *ndev, - u8 host, u8 event, - struct sk_buff *skb) -{ - int r = 0; - struct device *dev = &ndev->nfc_dev->dev; - struct nfc_evt_transaction *transaction; - - pr_debug("connectivity gate event: %x\n", event); - - switch (event) { - case ST21NFCB_EVT_CONNECTIVITY: - - break; - case ST21NFCB_EVT_TRANSACTION: - /* According to specification etsi 102 622 - * 11.2.2.4 EVT_TRANSACTION Table 52 - * Description Tag Length - * AID 81 5 to 16 - * PARAMETERS 82 0 to 255 - */ - if (skb->len < NFC_MIN_AID_LENGTH + 2 && - skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) - return -EPROTO; - - transaction = (struct nfc_evt_transaction *)devm_kzalloc(dev, - skb->len - 2, GFP_KERNEL); - - transaction->aid_len = skb->data[1]; - memcpy(transaction->aid, &skb->data[2], transaction->aid_len); - - /* Check next byte is PARAMETERS tag (82) */ - if (skb->data[transaction->aid_len + 2] != - NFC_EVT_TRANSACTION_PARAMS_TAG) - return -EPROTO; - - transaction->params_len = skb->data[transaction->aid_len + 3]; - memcpy(transaction->params, skb->data + - transaction->aid_len + 4, transaction->params_len); - - r = nfc_se_transaction(ndev->nfc_dev, host, transaction); - break; - default: - return 1; - } - kfree_skb(skb); - return r; -} - -void st21nfcb_hci_event_received(struct nci_dev *ndev, u8 pipe, - u8 event, struct sk_buff *skb) -{ - u8 gate = ndev->hci_dev->pipes[pipe].gate; - u8 host = ndev->hci_dev->pipes[pipe].host; - - switch (gate) { - case NCI_HCI_ADMIN_GATE: - st21nfcb_hci_admin_event_received(ndev, event, skb); - break; - case ST21NFCB_APDU_READER_GATE: - st21nfcb_hci_apdu_reader_event_received(ndev, event, skb); - break; - case ST21NFCB_CONNECTIVITY_GATE: - st21nfcb_hci_connectivity_event_received(ndev, host, event, - skb); - break; - } -} -EXPORT_SYMBOL_GPL(st21nfcb_hci_event_received); - - -void st21nfcb_hci_cmd_received(struct nci_dev *ndev, u8 pipe, u8 cmd, - struct sk_buff *skb) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - u8 gate = ndev->hci_dev->pipes[pipe].gate; - - pr_debug("cmd: %x\n", cmd); - - switch (cmd) { - case NCI_HCI_ANY_OPEN_PIPE: - if (gate != ST21NFCB_APDU_READER_GATE && - ndev->hci_dev->pipes[pipe].host != ST21NFCB_UICC_HOST_ID) - ndev->hci_dev->count_pipes++; - - if (ndev->hci_dev->count_pipes == - ndev->hci_dev->expected_pipes) { - del_timer_sync(&info->se_info.se_active_timer); - info->se_info.se_active = false; - ndev->hci_dev->count_pipes = 0; - complete(&info->se_info.req_completion); - } - break; - } -} -EXPORT_SYMBOL_GPL(st21nfcb_hci_cmd_received); - -/* - * Remarks: On some early st21nfcb firmware, nci_nfcee_mode_set(0) - * is rejected - */ -static int st21nfcb_nci_control_se(struct nci_dev *ndev, u8 se_idx, - u8 state) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - int r; - struct sk_buff *sk_host_list; - u8 host_id; - - switch (se_idx) { - case ST21NFCB_UICC_HOST_ID: - ndev->hci_dev->count_pipes = 0; - ndev->hci_dev->expected_pipes = ST21NFCB_SE_COUNT_PIPE_UICC; - break; - case ST21NFCB_ESE_HOST_ID: - ndev->hci_dev->count_pipes = 0; - ndev->hci_dev->expected_pipes = ST21NFCB_SE_COUNT_PIPE_EMBEDDED; - break; - default: - return -EINVAL; - } - - /* - * Wait for an EVT_HOT_PLUG in order to - * retrieve a relevant host list. - */ - reinit_completion(&info->se_info.req_completion); - r = nci_nfcee_mode_set(ndev, se_idx, NCI_NFCEE_ENABLE); - if (r != NCI_STATUS_OK) - return r; - - mod_timer(&info->se_info.se_active_timer, jiffies + - msecs_to_jiffies(ST21NFCB_SE_TO_HOT_PLUG)); - info->se_info.se_active = true; - - /* Ignore return value and check in any case the host_list */ - wait_for_completion_interruptible(&info->se_info.req_completion); - - /* There might be some "collision" after receiving a HOT_PLUG event - * This may cause the CLF to not answer to the next hci command. - * There is no possible synchronization to prevent this. - * Adding a small delay is the only way to solve the issue. - */ - usleep_range(3000, 5000); - - r = nci_hci_get_param(ndev, NCI_HCI_ADMIN_GATE, - NCI_HCI_ADMIN_PARAM_HOST_LIST, &sk_host_list); - if (r != NCI_HCI_ANY_OK) - return r; - - host_id = sk_host_list->data[sk_host_list->len - 1]; - kfree_skb(sk_host_list); - if (state == ST21NFCB_SE_MODE_ON && host_id == se_idx) - return se_idx; - else if (state == ST21NFCB_SE_MODE_OFF && host_id != se_idx) - return se_idx; - - return -1; -} - -int st21nfcb_nci_disable_se(struct nci_dev *ndev, u32 se_idx) -{ - int r; - - pr_debug("st21nfcb_nci_disable_se\n"); - - if (se_idx == NFC_SE_EMBEDDED) { - r = nci_hci_send_event(ndev, ST21NFCB_APDU_READER_GATE, - ST21NFCB_EVT_SE_END_OF_APDU_TRANSFER, NULL, 0); - if (r < 0) - return r; - } - - return 0; -} -EXPORT_SYMBOL_GPL(st21nfcb_nci_disable_se); - -int st21nfcb_nci_enable_se(struct nci_dev *ndev, u32 se_idx) -{ - int r; - - pr_debug("st21nfcb_nci_enable_se\n"); - - if (se_idx == ST21NFCB_HCI_HOST_ID_ESE) { - r = nci_hci_send_event(ndev, ST21NFCB_APDU_READER_GATE, - ST21NFCB_EVT_SE_SOFT_RESET, NULL, 0); - if (r < 0) - return r; - } - - return 0; -} -EXPORT_SYMBOL_GPL(st21nfcb_nci_enable_se); - -static int st21nfcb_hci_network_init(struct nci_dev *ndev) -{ - struct core_conn_create_dest_spec_params *dest_params; - struct dest_spec_params spec_params; - struct nci_conn_info *conn_info; - int r, dev_num; - - dest_params = - kzalloc(sizeof(struct core_conn_create_dest_spec_params) + - sizeof(struct dest_spec_params), GFP_KERNEL); - if (dest_params == NULL) { - r = -ENOMEM; - goto exit; - } - - dest_params->type = NCI_DESTINATION_SPECIFIC_PARAM_NFCEE_TYPE; - dest_params->length = sizeof(struct dest_spec_params); - spec_params.id = ndev->hci_dev->nfcee_id; - spec_params.protocol = NCI_NFCEE_INTERFACE_HCI_ACCESS; - memcpy(dest_params->value, &spec_params, sizeof(struct dest_spec_params)); - r = nci_core_conn_create(ndev, NCI_DESTINATION_NFCEE, 1, - sizeof(struct core_conn_create_dest_spec_params) + - sizeof(struct dest_spec_params), - dest_params); - if (r != NCI_STATUS_OK) - goto free_dest_params; - - conn_info = ndev->hci_dev->conn_info; - if (!conn_info) - goto free_dest_params; - - memcpy(ndev->hci_dev->init_data.gates, st21nfcb_gates, - sizeof(st21nfcb_gates)); - - /* - * Session id must include the driver name + i2c bus addr - * persistent info to discriminate 2 identical chips - */ - dev_num = find_first_zero_bit(dev_mask, ST21NFCB_NUM_DEVICES); - if (dev_num >= ST21NFCB_NUM_DEVICES) { - r = -ENODEV; - goto free_dest_params; - } - - scnprintf(ndev->hci_dev->init_data.session_id, - sizeof(ndev->hci_dev->init_data.session_id), - "%s%2x", "ST21BH", dev_num); - - r = nci_hci_dev_session_init(ndev); - if (r != NCI_HCI_ANY_OK) - goto free_dest_params; - - r = nci_nfcee_mode_set(ndev, ndev->hci_dev->conn_info->id, - NCI_NFCEE_ENABLE); - if (r != NCI_STATUS_OK) - goto free_dest_params; - -free_dest_params: - kfree(dest_params); - -exit: - return r; -} - -int st21nfcb_nci_discover_se(struct nci_dev *ndev) -{ - u8 param[2]; - int r; - int se_count = 0; - - pr_debug("st21nfcb_nci_discover_se\n"); - - r = st21nfcb_hci_network_init(ndev); - if (r != 0) - return r; - - param[0] = ST21NFCB_UICC_HOST_ID; - param[1] = ST21NFCB_HCI_HOST_ID_ESE; - r = nci_hci_set_param(ndev, NCI_HCI_ADMIN_GATE, - NCI_HCI_ADMIN_PARAM_WHITELIST, - param, sizeof(param)); - if (r != NCI_HCI_ANY_OK) - return r; - - r = st21nfcb_nci_control_se(ndev, ST21NFCB_UICC_HOST_ID, - ST21NFCB_SE_MODE_ON); - if (r == ST21NFCB_UICC_HOST_ID) { - nfc_add_se(ndev->nfc_dev, ST21NFCB_UICC_HOST_ID, NFC_SE_UICC); - se_count++; - } - - /* Try to enable eSE in order to check availability */ - r = st21nfcb_nci_control_se(ndev, ST21NFCB_HCI_HOST_ID_ESE, - ST21NFCB_SE_MODE_ON); - if (r == ST21NFCB_HCI_HOST_ID_ESE) { - nfc_add_se(ndev->nfc_dev, ST21NFCB_HCI_HOST_ID_ESE, - NFC_SE_EMBEDDED); - se_count++; - st21nfcb_se_get_atr(ndev); - } - - return !se_count; -} -EXPORT_SYMBOL_GPL(st21nfcb_nci_discover_se); - -int st21nfcb_nci_se_io(struct nci_dev *ndev, u32 se_idx, - u8 *apdu, size_t apdu_length, - se_io_cb_t cb, void *cb_context) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - pr_debug("\n"); - - switch (se_idx) { - case ST21NFCB_HCI_HOST_ID_ESE: - info->se_info.cb = cb; - info->se_info.cb_context = cb_context; - mod_timer(&info->se_info.bwi_timer, jiffies + - msecs_to_jiffies(info->se_info.wt_timeout)); - info->se_info.bwi_active = true; - return nci_hci_send_event(ndev, ST21NFCB_APDU_READER_GATE, - ST21NFCB_EVT_TRANSMIT_DATA, apdu, - apdu_length); - default: - return -ENODEV; - } -} -EXPORT_SYMBOL(st21nfcb_nci_se_io); - -static void st21nfcb_se_wt_timeout(unsigned long data) -{ - /* - * No answer from the secure element - * within the defined timeout. - * Let's send a reset request as recovery procedure. - * According to the situation, we first try to send a software reset - * to the secure element. If the next command is still not - * answering in time, we send to the CLF a secure element hardware - * reset request. - */ - /* hardware reset managed through VCC_UICC_OUT power supply */ - u8 param = 0x01; - struct st21nfcb_nci_info *info = (struct st21nfcb_nci_info *) data; - - pr_debug("\n"); - - info->se_info.bwi_active = false; - - if (!info->se_info.xch_error) { - info->se_info.xch_error = true; - nci_hci_send_event(info->ndlc->ndev, ST21NFCB_APDU_READER_GATE, - ST21NFCB_EVT_SE_SOFT_RESET, NULL, 0); - } else { - info->se_info.xch_error = false; - nci_hci_send_event(info->ndlc->ndev, ST21NFCB_DEVICE_MGNT_GATE, - ST21NFCB_EVT_SE_HARD_RESET, ¶m, 1); - } - info->se_info.cb(info->se_info.cb_context, NULL, 0, -ETIME); -} - -static void st21nfcb_se_activation_timeout(unsigned long data) -{ - struct st21nfcb_nci_info *info = (struct st21nfcb_nci_info *) data; - - pr_debug("\n"); - - info->se_info.se_active = false; - - complete(&info->se_info.req_completion); -} - -int st21nfcb_se_init(struct nci_dev *ndev) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - init_completion(&info->se_info.req_completion); - /* initialize timers */ - init_timer(&info->se_info.bwi_timer); - info->se_info.bwi_timer.data = (unsigned long)info; - info->se_info.bwi_timer.function = st21nfcb_se_wt_timeout; - info->se_info.bwi_active = false; - - init_timer(&info->se_info.se_active_timer); - info->se_info.se_active_timer.data = (unsigned long)info; - info->se_info.se_active_timer.function = - st21nfcb_se_activation_timeout; - info->se_info.se_active = false; - - info->se_info.xch_error = false; - - info->se_info.wt_timeout = - ST21NFCB_BWI_TO_TIMEOUT(ST21NFCB_ATR_DEFAULT_BWI); - - return 0; -} -EXPORT_SYMBOL(st21nfcb_se_init); - -void st21nfcb_se_deinit(struct nci_dev *ndev) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - if (info->se_info.bwi_active) - del_timer_sync(&info->se_info.bwi_timer); - if (info->se_info.se_active) - del_timer_sync(&info->se_info.se_active_timer); - - info->se_info.se_active = false; - info->se_info.bwi_active = false; -} -EXPORT_SYMBOL(st21nfcb_se_deinit); - diff --git a/drivers/nfc/st21nfcb/st21nfcb_se.h b/drivers/nfc/st21nfcb/st21nfcb_se.h deleted file mode 100644 index 52a323872bea..000000000000 --- a/drivers/nfc/st21nfcb/st21nfcb_se.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * NCI based Driver for STMicroelectronics NFC Chip - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ -#ifndef __LOCAL_ST21NFCB_SE_H_ -#define __LOCAL_ST21NFCB_SE_H_ - -/* - * ref ISO7816-3 chap 8.1. the initial character TS is followed by a - * sequence of at most 32 characters. - */ -#define ST21NFCB_ESE_MAX_LENGTH 33 -#define ST21NFCB_HCI_HOST_ID_ESE 0xc0 - -struct st21nfcb_se_info { - u8 atr[ST21NFCB_ESE_MAX_LENGTH]; - struct completion req_completion; - - struct timer_list bwi_timer; - int wt_timeout; /* in msecs */ - bool bwi_active; - - struct timer_list se_active_timer; - bool se_active; - - bool xch_error; - - se_io_cb_t cb; - void *cb_context; -}; - -int st21nfcb_se_init(struct nci_dev *ndev); -void st21nfcb_se_deinit(struct nci_dev *ndev); - -int st21nfcb_nci_discover_se(struct nci_dev *ndev); -int st21nfcb_nci_enable_se(struct nci_dev *ndev, u32 se_idx); -int st21nfcb_nci_disable_se(struct nci_dev *ndev, u32 se_idx); -int st21nfcb_nci_se_io(struct nci_dev *ndev, u32 se_idx, - u8 *apdu, size_t apdu_length, - se_io_cb_t cb, void *cb_context); -int st21nfcb_hci_load_session(struct nci_dev *ndev); -void st21nfcb_hci_event_received(struct nci_dev *ndev, u8 pipe, - u8 event, struct sk_buff *skb); -void st21nfcb_hci_cmd_received(struct nci_dev *ndev, u8 pipe, u8 cmd, - struct sk_buff *skb); - - -#endif /* __LOCAL_ST21NFCB_NCI_H_ */ diff --git a/include/linux/platform_data/st-nci.h b/include/linux/platform_data/st-nci.h new file mode 100644 index 000000000000..d9d400a297bd --- /dev/null +++ b/include/linux/platform_data/st-nci.h @@ -0,0 +1,29 @@ +/* + * Driver include for ST NCI NFC chip family. + * + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _ST_NCI_H_ +#define _ST_NCI_H_ + +#define ST_NCI_DRIVER_NAME "st_nci" + +struct st_nci_nfc_platform_data { + unsigned int gpio_reset; + unsigned int irq_polarity; +}; + +#endif /* _ST_NCI_H_ */ diff --git a/include/linux/platform_data/st21nfcb.h b/include/linux/platform_data/st21nfcb.h deleted file mode 100644 index b023373d9874..000000000000 --- a/include/linux/platform_data/st21nfcb.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Driver include for the ST21NFCB NFC chip. - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#ifndef _ST21NFCB_NCI_H_ -#define _ST21NFCB_NCI_H_ - -#define ST21NFCB_NCI_DRIVER_NAME "st21nfcb_nci" - -struct st21nfcb_nfc_platform_data { - unsigned int gpio_reset; - unsigned int irq_polarity; -}; - -#endif /* _ST21NFCB_NCI_H_ */ diff --git a/include/linux/platform_data/st_nci.h b/include/linux/platform_data/st_nci.h new file mode 100644 index 000000000000..d9d400a297bd --- /dev/null +++ b/include/linux/platform_data/st_nci.h @@ -0,0 +1,29 @@ +/* + * Driver include for ST NCI NFC chip family. + * + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _ST_NCI_H_ +#define _ST_NCI_H_ + +#define ST_NCI_DRIVER_NAME "st_nci" + +struct st_nci_nfc_platform_data { + unsigned int gpio_reset; + unsigned int irq_polarity; +}; + +#endif /* _ST_NCI_H_ */ -- cgit v1.2.3 From 30686bf7f5b3c30831761e188a6e3cb33580fa48 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jun 2015 21:39:54 +0200 Subject: mac80211: convert HW flags to unsigned long bitmap As we're running out of hardware capability flags pretty quickly, convert them to use the regular test_bit() style unsigned long bitmaps. This introduces a number of helper functions/macros to set and to test the bits, along with new debugfs code. The occurrences of an explicit __clear_bit() are intentional, the drivers were never supposed to change their supported bits on the fly. We should investigate changing this to be a per-frame flag. Signed-off-by: Johannes Berg --- drivers/net/wireless/adm8211.c | 8 +- drivers/net/wireless/at76c50x-usb.c | 4 +- drivers/net/wireless/ath/ar5523/ar5523.c | 6 +- drivers/net/wireless/ath/ath10k/mac.c | 34 ++++---- drivers/net/wireless/ath/ath5k/base.c | 12 +-- drivers/net/wireless/ath/ath9k/htc_drv_init.c | 20 ++--- drivers/net/wireless/ath/ath9k/init.c | 24 +++--- drivers/net/wireless/ath/carl9170/fw.c | 2 +- drivers/net/wireless/ath/carl9170/main.c | 20 ++--- drivers/net/wireless/ath/wcn36xx/main.c | 12 +-- drivers/net/wireless/b43/main.c | 4 +- drivers/net/wireless/b43legacy/main.c | 5 +- .../net/wireless/brcm80211/brcmsmac/mac80211_if.c | 7 +- drivers/net/wireless/cw1200/main.c | 16 ++-- drivers/net/wireless/iwlegacy/3945-mac.c | 6 +- drivers/net/wireless/iwlegacy/4965-mac.c | 12 +-- drivers/net/wireless/iwlwifi/dvm/mac80211.c | 26 +++--- drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c | 4 +- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 32 ++++---- drivers/net/wireless/libertas_tf/main.c | 2 +- drivers/net/wireless/mac80211_hwsim.c | 26 +++--- drivers/net/wireless/mediatek/mt7601u/init.c | 10 +-- drivers/net/wireless/mwl8k.c | 9 +- drivers/net/wireless/p54/main.c | 12 +-- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 7 +- drivers/net/wireless/rt2x00/rt2400pci.c | 8 +- drivers/net/wireless/rt2x00/rt2500pci.c | 8 +- drivers/net/wireless/rt2x00/rt2500usb.c | 9 +- drivers/net/wireless/rt2x00/rt2800lib.c | 16 ++-- drivers/net/wireless/rt2x00/rt61pci.c | 9 +- drivers/net/wireless/rt2x00/rt73usb.c | 9 +- drivers/net/wireless/rtl818x/rtl8180/dev.c | 9 +- drivers/net/wireless/rtl818x/rtl8187/dev.c | 6 +- drivers/net/wireless/rtlwifi/base.c | 22 +++-- drivers/net/wireless/ti/wl1251/main.c | 3 +- drivers/net/wireless/ti/wlcore/main.c | 26 +++--- drivers/net/wireless/zd1211rw/zd_mac.c | 8 +- drivers/staging/vt6655/device_main.c | 8 +- drivers/staging/vt6656/main_usb.c | 8 +- include/net/mac80211.h | 79 +++++++++++------- net/mac80211/agg-tx.c | 4 +- net/mac80211/cfg.c | 10 +-- net/mac80211/debugfs.c | 96 ++++++++++++---------- net/mac80211/driver-ops.h | 2 +- net/mac80211/iface.c | 10 +-- net/mac80211/key.c | 4 +- net/mac80211/main.c | 14 ++-- net/mac80211/mlme.c | 63 +++++++------- net/mac80211/offchannel.c | 2 +- net/mac80211/pm.c | 4 +- net/mac80211/rate.c | 6 +- net/mac80211/rc80211_minstrel_ht.c | 2 +- net/mac80211/rx.c | 26 +++--- net/mac80211/scan.c | 10 +-- net/mac80211/sta_info.c | 14 ++-- net/mac80211/status.c | 13 ++- net/mac80211/tdls.c | 2 +- net/mac80211/tx.c | 34 ++++---- net/mac80211/util.c | 6 +- 59 files changed, 447 insertions(+), 423 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c index 8c283fcd843d..15f057ed41ad 100644 --- a/drivers/net/wireless/adm8211.c +++ b/drivers/net/wireless/adm8211.c @@ -1373,9 +1373,9 @@ static void adm8211_configure_filter(struct ieee80211_hw *dev, ADM8211_CSR_READ(NAR); if (priv->nar & ADM8211_NAR_PR) - dev->flags |= IEEE80211_HW_RX_INCLUDES_FCS; + ieee80211_hw_set(dev, RX_INCLUDES_FCS); else - dev->flags &= ~IEEE80211_HW_RX_INCLUDES_FCS; + __clear_bit(IEEE80211_HW_RX_INCLUDES_FCS, dev->flags); if (*total_flags & FIF_BCN_PRBRESP_PROMISC) adm8211_set_bssid(dev, bcast); @@ -1861,8 +1861,8 @@ static int adm8211_probe(struct pci_dev *pdev, SET_IEEE80211_PERM_ADDR(dev, perm_addr); dev->extra_tx_headroom = sizeof(struct adm8211_tx_hdr); - /* dev->flags = IEEE80211_HW_RX_INCLUDES_FCS in promisc mode */ - dev->flags = IEEE80211_HW_SIGNAL_UNSPEC; + /* dev->flags = RX_INCLUDES_FCS in promisc mode */ + ieee80211_hw_set(dev, SIGNAL_UNSPEC); dev->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION); dev->max_signal = 100; /* FIXME: find better value */ diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c index 49219c508963..dab25136214a 100644 --- a/drivers/net/wireless/at76c50x-usb.c +++ b/drivers/net/wireless/at76c50x-usb.c @@ -2360,8 +2360,8 @@ static int at76_init_new_device(struct at76_priv *priv, priv->hw->wiphy->max_scan_ie_len = 0; priv->hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION); priv->hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &at76_supported_band; - priv->hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_SIGNAL_UNSPEC; + ieee80211_hw_set(priv->hw, RX_INCLUDES_FCS); + ieee80211_hw_set(priv->hw, SIGNAL_UNSPEC); priv->hw->max_signal = 100; SET_IEEE80211_DEV(priv->hw, &interface->dev); diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 14937cbeca56..3b343c63aa52 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -1682,9 +1682,9 @@ static int ar5523_probe(struct usb_interface *intf, (id->driver_info & AR5523_FLAG_ABG) ? '5' : '2'); ar->vif = NULL; - hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_HAS_RATE_CONTROL; + ieee80211_hw_set(hw, HAS_RATE_CONTROL); + ieee80211_hw_set(hw, RX_INCLUDES_FCS); + ieee80211_hw_set(hw, SIGNAL_DBM); hw->extra_tx_headroom = sizeof(struct ar5523_tx_desc) + sizeof(struct ar5523_chunk); hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION); diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 0ed422ae46a4..609ca8619ebd 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -6882,21 +6882,21 @@ int ath10k_mac_register(struct ath10k *ar) BIT(NL80211_IFTYPE_P2P_CLIENT) | BIT(NL80211_IFTYPE_P2P_GO); - ar->hw->flags = IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_SUPPORTS_DYNAMIC_PS | - IEEE80211_HW_MFP_CAPABLE | - IEEE80211_HW_REPORTS_TX_ACK_STATUS | - IEEE80211_HW_HAS_RATE_CONTROL | - IEEE80211_HW_AP_LINK_PS | - IEEE80211_HW_SPECTRUM_MGMT | - IEEE80211_HW_SW_CRYPTO_CONTROL | - IEEE80211_HW_SUPPORT_FAST_XMIT | - IEEE80211_HW_CONNECTION_MONITOR | - IEEE80211_HW_SUPPORTS_PER_STA_GTK | - IEEE80211_HW_WANT_MONITOR_VIF | - IEEE80211_HW_CHANCTX_STA_CSA | - IEEE80211_HW_QUEUE_CONTROL; + ieee80211_hw_set(ar->hw, SIGNAL_DBM); + ieee80211_hw_set(ar->hw, SUPPORTS_PS); + ieee80211_hw_set(ar->hw, SUPPORTS_DYNAMIC_PS); + ieee80211_hw_set(ar->hw, MFP_CAPABLE); + ieee80211_hw_set(ar->hw, REPORTS_TX_ACK_STATUS); + ieee80211_hw_set(ar->hw, HAS_RATE_CONTROL); + ieee80211_hw_set(ar->hw, AP_LINK_PS); + ieee80211_hw_set(ar->hw, SPECTRUM_MGMT); + ieee80211_hw_set(ar->hw, SW_CRYPTO_CONTROL); + ieee80211_hw_set(ar->hw, SUPPORT_FAST_XMIT); + ieee80211_hw_set(ar->hw, CONNECTION_MONITOR); + ieee80211_hw_set(ar->hw, SUPPORTS_PER_STA_GTK); + ieee80211_hw_set(ar->hw, WANT_MONITOR_VIF); + ieee80211_hw_set(ar->hw, CHANCTX_STA_CSA); + ieee80211_hw_set(ar->hw, QUEUE_CONTROL); ar->hw->wiphy->features |= NL80211_FEATURE_STATIC_SMPS; ar->hw->wiphy->flags |= WIPHY_FLAG_IBSS_RSN; @@ -6905,8 +6905,8 @@ int ath10k_mac_register(struct ath10k *ar) ar->hw->wiphy->features |= NL80211_FEATURE_DYNAMIC_SMPS; if (ar->ht_cap_info & WMI_HT_CAP_ENABLED) { - ar->hw->flags |= IEEE80211_HW_AMPDU_AGGREGATION; - ar->hw->flags |= IEEE80211_HW_TX_AMPDU_SETUP_IN_HW; + ieee80211_hw_set(ar->hw, AMPDU_AGGREGATION); + ieee80211_hw_set(ar->hw, TX_AMPDU_SETUP_IN_HW); } ar->hw->wiphy->max_scan_ssids = WLAN_SCAN_PARAMS_MAX_SSID; diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index a6131825c9f6..23552f43d125 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -2537,12 +2537,12 @@ ath5k_init_ah(struct ath5k_hw *ah, const struct ath_bus_ops *bus_ops) /* Initialize driver private data */ SET_IEEE80211_DEV(hw, ah->dev); - hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | - IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_MFP_CAPABLE | - IEEE80211_HW_REPORTS_TX_ACK_STATUS | - IEEE80211_HW_SUPPORTS_RC_TABLE; + ieee80211_hw_set(hw, SUPPORTS_RC_TABLE); + ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS); + ieee80211_hw_set(hw, MFP_CAPABLE); + ieee80211_hw_set(hw, SIGNAL_DBM); + ieee80211_hw_set(hw, RX_INCLUDES_FCS); + ieee80211_hw_set(hw, HOST_BROADCAST_PS_BUFFERING); hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_AP) | diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c index 746856243bff..36218ee2bdbe 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c @@ -717,18 +717,18 @@ static void ath9k_set_hw_capab(struct ath9k_htc_priv *priv, struct ath_common *common = ath9k_hw_common(priv->ah); struct base_eep_header *pBase; - hw->flags = IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_AMPDU_AGGREGATION | - IEEE80211_HW_SPECTRUM_MGMT | - IEEE80211_HW_HAS_RATE_CONTROL | - IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_PS_NULLFUNC_STACK | - IEEE80211_HW_REPORTS_TX_ACK_STATUS | - IEEE80211_HW_MFP_CAPABLE | - IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING; + ieee80211_hw_set(hw, HOST_BROADCAST_PS_BUFFERING); + ieee80211_hw_set(hw, MFP_CAPABLE); + ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS); + ieee80211_hw_set(hw, PS_NULLFUNC_STACK); + ieee80211_hw_set(hw, RX_INCLUDES_FCS); + ieee80211_hw_set(hw, HAS_RATE_CONTROL); + ieee80211_hw_set(hw, SPECTRUM_MGMT); + ieee80211_hw_set(hw, SIGNAL_DBM); + ieee80211_hw_set(hw, AMPDU_AGGREGATION); if (ath9k_ps_enable) - hw->flags |= IEEE80211_HW_SUPPORTS_PS; + ieee80211_hw_set(hw, SUPPORTS_PS); hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index f8d11efa7b0f..eff0e5325e6a 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -796,7 +796,7 @@ static void ath9k_set_mcc_capab(struct ath_softc *sc, struct ieee80211_hw *hw) if (!ath9k_is_chanctx_enabled()) return; - hw->flags |= IEEE80211_HW_QUEUE_CONTROL; + ieee80211_hw_set(hw, QUEUE_CONTROL); hw->queues = ATH9K_NUM_TX_QUEUES; hw->offchannel_tx_hw_queue = hw->queues - 1; hw->wiphy->interface_modes &= ~ BIT(NL80211_IFTYPE_WDS); @@ -818,20 +818,20 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) struct ath_hw *ah = sc->sc_ah; struct ath_common *common = ath9k_hw_common(ah); - hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | - IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_PS_NULLFUNC_STACK | - IEEE80211_HW_SPECTRUM_MGMT | - IEEE80211_HW_REPORTS_TX_ACK_STATUS | - IEEE80211_HW_SUPPORTS_RC_TABLE | - IEEE80211_HW_SUPPORTS_HT_CCK_RATES; + ieee80211_hw_set(hw, SUPPORTS_HT_CCK_RATES); + ieee80211_hw_set(hw, SUPPORTS_RC_TABLE); + ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS); + ieee80211_hw_set(hw, SPECTRUM_MGMT); + ieee80211_hw_set(hw, PS_NULLFUNC_STACK); + ieee80211_hw_set(hw, SIGNAL_DBM); + ieee80211_hw_set(hw, RX_INCLUDES_FCS); + ieee80211_hw_set(hw, HOST_BROADCAST_PS_BUFFERING); if (ath9k_ps_enable) - hw->flags |= IEEE80211_HW_SUPPORTS_PS; + ieee80211_hw_set(hw, SUPPORTS_PS); if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_HT) { - hw->flags |= IEEE80211_HW_AMPDU_AGGREGATION; + ieee80211_hw_set(hw, AMPDU_AGGREGATION); if (AR_SREV_9280_20_OR_LATER(ah)) hw->radiotap_mcs_details |= @@ -839,7 +839,7 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) } if (AR_SREV_9160_10_OR_LATER(sc->sc_ah) || ath9k_modparam_nohwcrypt) - hw->flags |= IEEE80211_HW_MFP_CAPABLE; + ieee80211_hw_set(hw, MFP_CAPABLE); hw->wiphy->features |= NL80211_FEATURE_ACTIVE_MONITOR | NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE | diff --git a/drivers/net/wireless/ath/carl9170/fw.c b/drivers/net/wireless/ath/carl9170/fw.c index 020cd46471f5..88045f93a76c 100644 --- a/drivers/net/wireless/ath/carl9170/fw.c +++ b/drivers/net/wireless/ath/carl9170/fw.c @@ -286,7 +286,7 @@ static int carl9170_fw(struct ar9170 *ar, const __u8 *data, size_t len) } if (SUPP(CARL9170FW_PSM) && SUPP(CARL9170FW_FIXED_5GHZ_PSM)) - ar->hw->flags |= IEEE80211_HW_SUPPORTS_PS; + ieee80211_hw_set(ar->hw, SUPPORTS_PS); if (!SUPP(CARL9170FW_USB_INIT_FIRMWARE)) { dev_err(&ar->udev->dev, "firmware does not provide " diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 59db6732d4e3..170c209f99b8 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1844,22 +1844,22 @@ void *carl9170_alloc(size_t priv_size) /* firmware decides which modes we support */ hw->wiphy->interface_modes = 0; - hw->flags |= IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_MFP_CAPABLE | - IEEE80211_HW_REPORTS_TX_ACK_STATUS | - IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_PS_NULLFUNC_STACK | - IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC | - IEEE80211_HW_SUPPORTS_RC_TABLE | - IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_SUPPORTS_HT_CCK_RATES; + ieee80211_hw_set(hw, RX_INCLUDES_FCS); + ieee80211_hw_set(hw, MFP_CAPABLE); + ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS); + ieee80211_hw_set(hw, SUPPORTS_PS); + ieee80211_hw_set(hw, PS_NULLFUNC_STACK); + ieee80211_hw_set(hw, NEED_DTIM_BEFORE_ASSOC); + ieee80211_hw_set(hw, SUPPORTS_RC_TABLE); + ieee80211_hw_set(hw, SIGNAL_DBM); + ieee80211_hw_set(hw, SUPPORTS_HT_CCK_RATES); if (!modparam_noht) { /* * see the comment above, why we allow the user * to disable HT by a module parameter. */ - hw->flags |= IEEE80211_HW_AMPDU_AGGREGATION; + ieee80211_hw_set(hw, AMPDU_AGGREGATION); } hw->extra_tx_headroom = sizeof(struct _carl9170_tx_superframe); diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index 0783d2ed8238..900e72a089d8 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -944,12 +944,12 @@ static int wcn36xx_init_ieee80211(struct wcn36xx *wcn) WLAN_CIPHER_SUITE_CCMP, }; - wcn->hw->flags = IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_HAS_RATE_CONTROL | - IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_CONNECTION_MONITOR | - IEEE80211_HW_AMPDU_AGGREGATION | - IEEE80211_HW_TIMING_BEACON_ONLY; + ieee80211_hw_set(wcn->hw, TIMING_BEACON_ONLY); + ieee80211_hw_set(wcn->hw, AMPDU_AGGREGATION); + ieee80211_hw_set(wcn->hw, CONNECTION_MONITOR); + ieee80211_hw_set(wcn->hw, SUPPORTS_PS); + ieee80211_hw_set(wcn->hw, SIGNAL_DBM); + ieee80211_hw_set(wcn->hw, HAS_RATE_CONTROL); wcn->hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_AP) | diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index f40992969b4a..575b9f4b5589 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -5605,8 +5605,8 @@ static struct b43_wl *b43_wireless_init(struct b43_bus_dev *dev) wl = hw_to_b43_wl(hw); /* fill hw info */ - hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_SIGNAL_DBM; + ieee80211_hw_set(hw, RX_INCLUDES_FCS); + ieee80211_hw_set(hw, SIGNAL_DBM); hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_AP) | diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 39d49d6cd07f..afc1fb3e38df 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -3832,8 +3832,9 @@ static int b43legacy_wireless_init(struct ssb_device *dev) } /* fill hw info */ - hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_SIGNAL_DBM; + ieee80211_hw_set(hw, RX_INCLUDES_FCS); + ieee80211_hw_set(hw, SIGNAL_DBM); + hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_STATION) | diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c index b46cab250615..ab775a5d5b33 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c @@ -1060,10 +1060,9 @@ static int ieee_hw_rate_init(struct ieee80211_hw *hw) */ static int ieee_hw_init(struct ieee80211_hw *hw) { - hw->flags = IEEE80211_HW_SIGNAL_DBM - /* | IEEE80211_HW_CONNECTION_MONITOR What is this? */ - | IEEE80211_HW_REPORTS_TX_ACK_STATUS - | IEEE80211_HW_AMPDU_AGGREGATION; + ieee80211_hw_set(hw, AMPDU_AGGREGATION); + ieee80211_hw_set(hw, SIGNAL_DBM); + ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS); hw->extra_tx_headroom = brcms_c_get_header_len(); hw->queues = N_TX_QUEUES; diff --git a/drivers/net/wireless/cw1200/main.c b/drivers/net/wireless/cw1200/main.c index 3689dbbd10bd..0e51e27d2e3f 100644 --- a/drivers/net/wireless/cw1200/main.c +++ b/drivers/net/wireless/cw1200/main.c @@ -278,14 +278,14 @@ static struct ieee80211_hw *cw1200_init_common(const u8 *macaddr, else priv->ba_tx_tid_mask = 0xff; /* Enable TX BLKACK for all TIDs */ - hw->flags = IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_SUPPORTS_DYNAMIC_PS | - IEEE80211_HW_REPORTS_TX_ACK_STATUS | - IEEE80211_HW_CONNECTION_MONITOR | - IEEE80211_HW_AMPDU_AGGREGATION | - IEEE80211_HW_TX_AMPDU_SETUP_IN_HW | - IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC; + ieee80211_hw_set(hw, NEED_DTIM_BEFORE_ASSOC); + ieee80211_hw_set(hw, TX_AMPDU_SETUP_IN_HW); + ieee80211_hw_set(hw, AMPDU_AGGREGATION); + ieee80211_hw_set(hw, CONNECTION_MONITOR); + ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS); + ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS); + ieee80211_hw_set(hw, SIGNAL_DBM); + ieee80211_hw_set(hw, SUPPORTS_PS); hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_ADHOC) | diff --git a/drivers/net/wireless/iwlegacy/3945-mac.c b/drivers/net/wireless/iwlegacy/3945-mac.c index 189cdf58084b..7f4cb692cc57 100644 --- a/drivers/net/wireless/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/iwlegacy/3945-mac.c @@ -3561,8 +3561,10 @@ il3945_setup_mac(struct il_priv *il) hw->vif_data_size = sizeof(struct il_vif_priv); /* Tell mac80211 our characteristics */ - hw->flags = IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_SPECTRUM_MGMT | - IEEE80211_HW_SUPPORTS_PS | IEEE80211_HW_SUPPORTS_DYNAMIC_PS; + ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS); + ieee80211_hw_set(hw, SUPPORTS_PS); + ieee80211_hw_set(hw, SIGNAL_DBM); + ieee80211_hw_set(hw, SPECTRUM_MGMT); hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_ADHOC); diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c index e4b175cbeefd..44fa422f255e 100644 --- a/drivers/net/wireless/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/iwlegacy/4965-mac.c @@ -5751,11 +5751,13 @@ il4965_mac_setup_register(struct il_priv *il, u32 max_probe_length) hw->rate_control_algorithm = "iwl-4965-rs"; /* Tell mac80211 our characteristics */ - hw->flags = - IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_AMPDU_AGGREGATION | - IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC | IEEE80211_HW_SPECTRUM_MGMT | - IEEE80211_HW_REPORTS_TX_ACK_STATUS | IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_SUPPORTS_DYNAMIC_PS; + ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS); + ieee80211_hw_set(hw, SUPPORTS_PS); + ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS); + ieee80211_hw_set(hw, SPECTRUM_MGMT); + ieee80211_hw_set(hw, NEED_DTIM_BEFORE_ASSOC); + ieee80211_hw_set(hw, SIGNAL_DBM); + ieee80211_hw_set(hw, AMPDU_AGGREGATION); if (il->cfg->sku & IL_SKU_N) hw->wiphy->features |= NL80211_FEATURE_DYNAMIC_SMPS | NL80211_FEATURE_STATIC_SMPS; diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index 852461ffe98f..7acaa266b704 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -104,16 +104,16 @@ int iwlagn_mac_setup_register(struct iwl_priv *priv, hw->rate_control_algorithm = "iwl-agn-rs"; /* Tell mac80211 our characteristics */ - hw->flags = IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_AMPDU_AGGREGATION | - IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC | - IEEE80211_HW_SPECTRUM_MGMT | - IEEE80211_HW_REPORTS_TX_ACK_STATUS | - IEEE80211_HW_QUEUE_CONTROL | - IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_SUPPORTS_DYNAMIC_PS | - IEEE80211_HW_SUPPORT_FAST_XMIT | - IEEE80211_HW_WANT_MONITOR_VIF; + ieee80211_hw_set(hw, SIGNAL_DBM); + ieee80211_hw_set(hw, AMPDU_AGGREGATION); + ieee80211_hw_set(hw, NEED_DTIM_BEFORE_ASSOC); + ieee80211_hw_set(hw, SPECTRUM_MGMT); + ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS); + ieee80211_hw_set(hw, QUEUE_CONTROL); + ieee80211_hw_set(hw, SUPPORTS_PS); + ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS); + ieee80211_hw_set(hw, SUPPORT_FAST_XMIT); + ieee80211_hw_set(hw, WANT_MONITOR_VIF); hw->offchannel_tx_hw_queue = IWL_AUX_QUEUE; hw->radiotap_mcs_details |= IEEE80211_RADIOTAP_MCS_HAVE_FMT; @@ -136,7 +136,7 @@ int iwlagn_mac_setup_register(struct iwl_priv *priv, */ if (priv->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_MFP && !iwlwifi_mod_params.sw_crypto) - hw->flags |= IEEE80211_HW_MFP_CAPABLE; + ieee80211_hw_set(hw, MFP_CAPABLE); hw->sta_data_size = sizeof(struct iwl_station_priv); hw->vif_data_size = sizeof(struct iwl_vif_priv); @@ -1342,9 +1342,9 @@ static int iwlagn_mac_add_interface(struct ieee80211_hw *hw, * other interfaces are added, this is safe. */ if (vif->type == NL80211_IFTYPE_MONITOR) - priv->hw->flags |= IEEE80211_HW_RX_INCLUDES_FCS; + ieee80211_hw_set(priv->hw, RX_INCLUDES_FCS); else - priv->hw->flags &= ~IEEE80211_HW_RX_INCLUDES_FCS; + __clear_bit(IEEE80211_HW_RX_INCLUDES_FCS, priv->hw->flags); err = iwl_setup_interface(priv, ctx); if (!err || reset) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c index 8088c7137f7c..1812dd018af2 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c @@ -852,7 +852,7 @@ static int iwl_mvm_mac_ctxt_cmd_listener(struct iwl_mvm *mvm, MAC_FILTER_IN_BEACON | MAC_FILTER_IN_PROBE_REQUEST | MAC_FILTER_IN_CRC32); - mvm->hw->flags |= IEEE80211_HW_RX_INCLUDES_FCS; + ieee80211_hw_set(mvm->hw, RX_INCLUDES_FCS); return iwl_mvm_mac_ctxt_send_cmd(mvm, &cmd); } @@ -1270,7 +1270,7 @@ int iwl_mvm_mac_ctxt_remove(struct iwl_mvm *mvm, struct ieee80211_vif *vif) mvmvif->uploaded = false; if (vif->type == NL80211_IFTYPE_MONITOR) - mvm->hw->flags &= ~IEEE80211_HW_RX_INCLUDES_FCS; + __clear_bit(IEEE80211_HW_RX_INCLUDES_FCS, mvm->hw->flags); return 0; } diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 81daf0027859..dfdab38e2d4a 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -423,19 +423,19 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) }; /* Tell mac80211 our characteristics */ - hw->flags = IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_SPECTRUM_MGMT | - IEEE80211_HW_REPORTS_TX_ACK_STATUS | - IEEE80211_HW_QUEUE_CONTROL | - IEEE80211_HW_WANT_MONITOR_VIF | - IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_SUPPORTS_DYNAMIC_PS | - IEEE80211_HW_AMPDU_AGGREGATION | - IEEE80211_HW_TIMING_BEACON_ONLY | - IEEE80211_HW_CONNECTION_MONITOR | - IEEE80211_HW_CHANCTX_STA_CSA | - IEEE80211_HW_SUPPORT_FAST_XMIT | - IEEE80211_HW_SUPPORTS_CLONED_SKBS; + ieee80211_hw_set(hw, SIGNAL_DBM); + ieee80211_hw_set(hw, SPECTRUM_MGMT); + ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS); + ieee80211_hw_set(hw, QUEUE_CONTROL); + ieee80211_hw_set(hw, WANT_MONITOR_VIF); + ieee80211_hw_set(hw, SUPPORTS_PS); + ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS); + ieee80211_hw_set(hw, AMPDU_AGGREGATION); + ieee80211_hw_set(hw, TIMING_BEACON_ONLY); + ieee80211_hw_set(hw, CONNECTION_MONITOR); + ieee80211_hw_set(hw, CHANCTX_STA_CSA); + ieee80211_hw_set(hw, SUPPORT_FAST_XMIT); + ieee80211_hw_set(hw, SUPPORTS_CLONED_SKBS); hw->queues = mvm->first_agg_queue; hw->offchannel_tx_hw_queue = IWL_MVM_OFFCHANNEL_QUEUE; @@ -459,7 +459,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) */ if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_MFP && !iwlwifi_mod_params.sw_crypto) { - hw->flags |= IEEE80211_HW_MFP_CAPABLE; + ieee80211_hw_set(hw, MFP_CAPABLE); mvm->ciphers[hw->wiphy->n_cipher_suites] = WLAN_CIPHER_SUITE_AES_CMAC; hw->wiphy->n_cipher_suites++; @@ -474,7 +474,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->wiphy->n_cipher_suites++; } - hw->flags |= IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS; + ieee80211_hw_set(hw, SINGLE_SCAN_ON_ALL_BANDS); hw->wiphy->features |= NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR | NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR | @@ -2885,7 +2885,7 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw, key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; break; case WLAN_CIPHER_SUITE_AES_CMAC: - WARN_ON_ONCE(!(hw->flags & IEEE80211_HW_MFP_CAPABLE)); + WARN_ON_ONCE(!ieee80211_hw_check(hw, MFP_CAPABLE)); break; case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c index 1bdf18674fb8..a47f0acc099a 100644 --- a/drivers/net/wireless/libertas_tf/main.c +++ b/drivers/net/wireless/libertas_tf/main.c @@ -634,7 +634,7 @@ struct lbtf_private *lbtf_add_card(void *card, struct device *dmdev) priv->tx_skb = NULL; hw->queues = 1; - hw->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING; + ieee80211_hw_set(hw, HOST_BROADCAST_PS_BUFFERING); hw->extra_tx_headroom = sizeof(struct txpd); memcpy(priv->channels, lbtf_channels, sizeof(lbtf_channels)); memcpy(priv->rates, lbtf_rates, sizeof(lbtf_rates)); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 8d2f6bbf9598..99e873dc8684 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1286,7 +1286,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, if (control->sta) hwsim_check_sta_magic(control->sta); - if (hw->flags & IEEE80211_HW_SUPPORTS_RC_TABLE) + if (ieee80211_hw_check(hw, SUPPORTS_RC_TABLE)) ieee80211_get_tx_rates(txi->control.vif, control->sta, skb, txi->control.rates, ARRAY_SIZE(txi->control.rates)); @@ -1395,7 +1395,7 @@ static void mac80211_hwsim_tx_frame(struct ieee80211_hw *hw, { u32 _pid = ACCESS_ONCE(wmediumd_portid); - if (hw->flags & IEEE80211_HW_SUPPORTS_RC_TABLE) { + if (ieee80211_hw_check(hw, SUPPORTS_RC_TABLE)) { struct ieee80211_tx_info *txi = IEEE80211_SKB_CB(skb); ieee80211_get_tx_rates(txi->control.vif, NULL, skb, txi->control.rates, @@ -1432,7 +1432,7 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac, if (skb == NULL) return; info = IEEE80211_SKB_CB(skb); - if (hw->flags & IEEE80211_HW_SUPPORTS_RC_TABLE) + if (ieee80211_hw_check(hw, SUPPORTS_RC_TABLE)) ieee80211_get_tx_rates(vif, NULL, skb, info->control.rates, ARRAY_SIZE(info->control.rates)); @@ -2391,16 +2391,16 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, if (param->p2p_device) hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_P2P_DEVICE); - hw->flags = IEEE80211_HW_MFP_CAPABLE | - IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_AMPDU_AGGREGATION | - IEEE80211_HW_WANT_MONITOR_VIF | - IEEE80211_HW_QUEUE_CONTROL | - IEEE80211_HW_SUPPORTS_HT_CCK_RATES | - IEEE80211_HW_CHANCTX_STA_CSA | - IEEE80211_HW_SUPPORT_FAST_XMIT; + ieee80211_hw_set(hw, SUPPORT_FAST_XMIT); + ieee80211_hw_set(hw, CHANCTX_STA_CSA); + ieee80211_hw_set(hw, SUPPORTS_HT_CCK_RATES); + ieee80211_hw_set(hw, QUEUE_CONTROL); + ieee80211_hw_set(hw, WANT_MONITOR_VIF); + ieee80211_hw_set(hw, AMPDU_AGGREGATION); + ieee80211_hw_set(hw, MFP_CAPABLE); + ieee80211_hw_set(hw, SIGNAL_DBM); if (rctbl) - hw->flags |= IEEE80211_HW_SUPPORTS_RC_TABLE; + ieee80211_hw_set(hw, SUPPORTS_RC_TABLE); hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_TDLS | WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL | @@ -2509,7 +2509,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, } if (param->no_vif) - hw->flags |= IEEE80211_HW_NO_AUTO_VIF; + ieee80211_hw_set(hw, NO_AUTO_VIF); err = ieee80211_register_hw(hw); if (err < 0) { diff --git a/drivers/net/wireless/mediatek/mt7601u/init.c b/drivers/net/wireless/mediatek/mt7601u/init.c index 1fc86e865c8c..ca1ea2e55ff4 100644 --- a/drivers/net/wireless/mediatek/mt7601u/init.c +++ b/drivers/net/wireless/mediatek/mt7601u/init.c @@ -591,11 +591,11 @@ int mt7601u_register_device(struct mt7601u_dev *dev) SET_IEEE80211_DEV(hw, dev->dev); hw->queues = 4; - hw->flags = IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_PS_NULLFUNC_STACK | - IEEE80211_HW_SUPPORTS_HT_CCK_RATES | - IEEE80211_HW_AMPDU_AGGREGATION | - IEEE80211_HW_SUPPORTS_RC_TABLE; + ieee80211_hw_set(hw, SIGNAL_DBM); + ieee80211_hw_set(hw, PS_NULLFUNC_STACK); + ieee80211_hw_set(hw, SUPPORTS_HT_CCK_RATES); + ieee80211_hw_set(hw, AMPDU_AGGREGATION); + ieee80211_hw_set(hw, SUPPORTS_RC_TABLE); hw->max_rates = 1; hw->max_report_rates = 7; hw->max_rate_tries = 1; diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c index b71fc74d14ab..77361af68b18 100644 --- a/drivers/net/wireless/mwl8k.c +++ b/drivers/net/wireless/mwl8k.c @@ -2380,7 +2380,7 @@ mwl8k_set_ht_caps(struct ieee80211_hw *hw, if (cap & MWL8K_CAP_GREENFIELD) band->ht_cap.cap |= IEEE80211_HT_CAP_GRN_FLD; if (cap & MWL8K_CAP_AMPDU) { - hw->flags |= IEEE80211_HW_AMPDU_AGGREGATION; + ieee80211_hw_set(hw, AMPDU_AGGREGATION); band->ht_cap.ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K; band->ht_cap.ampdu_density = IEEE80211_HT_MPDU_DENSITY_NONE; } @@ -5431,7 +5431,7 @@ mwl8k_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u8 *addr = sta->addr, idx; struct mwl8k_sta *sta_info = MWL8K_STA(sta); - if (!(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION)) + if (!ieee80211_hw_check(hw, AMPDU_AGGREGATION)) return -ENOTSUPP; spin_lock(&priv->stream_lock); @@ -6076,14 +6076,15 @@ static int mwl8k_firmware_load_success(struct mwl8k_priv *priv) hw->queues = MWL8K_TX_WMM_QUEUES; /* Set rssi values to dBm */ - hw->flags |= IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_HAS_RATE_CONTROL; + ieee80211_hw_set(hw, SIGNAL_DBM); + ieee80211_hw_set(hw, HAS_RATE_CONTROL); /* * Ask mac80211 to not to trigger PS mode * based on PM bit of incoming frames. */ if (priv->ap_fw) - hw->flags |= IEEE80211_HW_AP_LINK_PS; + ieee80211_hw_set(hw, AP_LINK_PS); hw->vif_data_size = sizeof(struct mwl8k_vif); hw->sta_data_size = sizeof(struct mwl8k_sta); diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c index 2947ad21053c..7805864e76f9 100644 --- a/drivers/net/wireless/p54/main.c +++ b/drivers/net/wireless/p54/main.c @@ -746,12 +746,12 @@ struct ieee80211_hw *p54_init_common(size_t priv_data_len) spin_lock_init(&priv->tx_stats_lock); skb_queue_head_init(&priv->tx_queue); skb_queue_head_init(&priv->tx_pending); - dev->flags = IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_PS_NULLFUNC_STACK | - IEEE80211_HW_MFP_CAPABLE | - IEEE80211_HW_REPORTS_TX_ACK_STATUS; + ieee80211_hw_set(dev, REPORTS_TX_ACK_STATUS); + ieee80211_hw_set(dev, MFP_CAPABLE); + ieee80211_hw_set(dev, PS_NULLFUNC_STACK); + ieee80211_hw_set(dev, SUPPORTS_PS); + ieee80211_hw_set(dev, RX_INCLUDES_FCS); + ieee80211_hw_set(dev, SIGNAL_DBM); dev->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_ADHOC) | diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index aeaf87bb5518..7e804324bfa7 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -1062,10 +1062,9 @@ int rsi_mac80211_attach(struct rsi_common *common) hw->priv = adapter; adapter->hw = hw; - hw->flags = IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_HAS_RATE_CONTROL | - IEEE80211_HW_AMPDU_AGGREGATION | - 0; + ieee80211_hw_set(hw, SIGNAL_DBM); + ieee80211_hw_set(hw, HAS_RATE_CONTROL); + ieee80211_hw_set(hw, AMPDU_AGGREGATION); hw->queues = MAX_HW_QUEUES; hw->extra_tx_headroom = RSI_NEEDED_HEADROOM; diff --git a/drivers/net/wireless/rt2x00/rt2400pci.c b/drivers/net/wireless/rt2x00/rt2400pci.c index 7da138892026..9a3966cd6fbe 100644 --- a/drivers/net/wireless/rt2x00/rt2400pci.c +++ b/drivers/net/wireless/rt2x00/rt2400pci.c @@ -1574,10 +1574,10 @@ static int rt2400pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev) /* * Initialize all hw fields. */ - rt2x00dev->hw->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | - IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_PS_NULLFUNC_STACK; + ieee80211_hw_set(rt2x00dev->hw, PS_NULLFUNC_STACK); + ieee80211_hw_set(rt2x00dev->hw, SUPPORTS_PS); + ieee80211_hw_set(rt2x00dev->hw, HOST_BROADCAST_PS_BUFFERING); + ieee80211_hw_set(rt2x00dev->hw, SIGNAL_DBM); SET_IEEE80211_DEV(rt2x00dev->hw, rt2x00dev->dev); SET_IEEE80211_PERM_ADDR(rt2x00dev->hw, diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c index 4ea53aa9ede3..1a6740b4d396 100644 --- a/drivers/net/wireless/rt2x00/rt2500pci.c +++ b/drivers/net/wireless/rt2x00/rt2500pci.c @@ -1869,10 +1869,10 @@ static int rt2500pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev) /* * Initialize all hw fields. */ - rt2x00dev->hw->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | - IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_PS_NULLFUNC_STACK; + ieee80211_hw_set(rt2x00dev->hw, PS_NULLFUNC_STACK); + ieee80211_hw_set(rt2x00dev->hw, SUPPORTS_PS); + ieee80211_hw_set(rt2x00dev->hw, HOST_BROADCAST_PS_BUFFERING); + ieee80211_hw_set(rt2x00dev->hw, SIGNAL_DBM); SET_IEEE80211_DEV(rt2x00dev->hw, rt2x00dev->dev); SET_IEEE80211_PERM_ADDR(rt2x00dev->hw, diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c index 237bbb54c7a8..b50d873145d5 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/rt2x00/rt2500usb.c @@ -1696,11 +1696,10 @@ static int rt2500usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev) * multicast and broadcast traffic immediately instead of buffering it * infinitly and thus dropping it after some time. */ - rt2x00dev->hw->flags = - IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_PS_NULLFUNC_STACK; + ieee80211_hw_set(rt2x00dev->hw, PS_NULLFUNC_STACK); + ieee80211_hw_set(rt2x00dev->hw, SUPPORTS_PS); + ieee80211_hw_set(rt2x00dev->hw, RX_INCLUDES_FCS); + ieee80211_hw_set(rt2x00dev->hw, SIGNAL_DBM); /* * Disable powersaving as default. diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index dfeca8355b22..0bc5ac56f283 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -7497,13 +7497,12 @@ static int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev) /* * Initialize all hw fields. */ - rt2x00dev->hw->flags = - IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_PS_NULLFUNC_STACK | - IEEE80211_HW_AMPDU_AGGREGATION | - IEEE80211_HW_REPORTS_TX_ACK_STATUS | - IEEE80211_HW_SUPPORTS_HT_CCK_RATES; + ieee80211_hw_set(rt2x00dev->hw, SUPPORTS_HT_CCK_RATES); + ieee80211_hw_set(rt2x00dev->hw, REPORTS_TX_ACK_STATUS); + ieee80211_hw_set(rt2x00dev->hw, AMPDU_AGGREGATION); + ieee80211_hw_set(rt2x00dev->hw, PS_NULLFUNC_STACK); + ieee80211_hw_set(rt2x00dev->hw, SIGNAL_DBM); + ieee80211_hw_set(rt2x00dev->hw, SUPPORTS_PS); /* * Don't set IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING for USB devices @@ -7513,8 +7512,7 @@ static int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev) * infinitly and thus dropping it after some time. */ if (!rt2x00_is_usb(rt2x00dev)) - rt2x00dev->hw->flags |= - IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING; + ieee80211_hw_set(rt2x00dev->hw, HOST_BROADCAST_PS_BUFFERING); SET_IEEE80211_DEV(rt2x00dev->hw, rt2x00dev->dev); SET_IEEE80211_PERM_ADDR(rt2x00dev->hw, diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c index c8a967247a9a..c0e730ea1b69 100644 --- a/drivers/net/wireless/rt2x00/rt61pci.c +++ b/drivers/net/wireless/rt2x00/rt61pci.c @@ -2758,11 +2758,10 @@ static int rt61pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev) /* * Initialize all hw fields. */ - rt2x00dev->hw->flags = - IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | - IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_PS_NULLFUNC_STACK; + ieee80211_hw_set(rt2x00dev->hw, PS_NULLFUNC_STACK); + ieee80211_hw_set(rt2x00dev->hw, SUPPORTS_PS); + ieee80211_hw_set(rt2x00dev->hw, HOST_BROADCAST_PS_BUFFERING); + ieee80211_hw_set(rt2x00dev->hw, SIGNAL_DBM); SET_IEEE80211_DEV(rt2x00dev->hw, rt2x00dev->dev); SET_IEEE80211_PERM_ADDR(rt2x00dev->hw, diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c index 65ce3afb888a..7081e13b4fd6 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.c +++ b/drivers/net/wireless/rt2x00/rt73usb.c @@ -2105,16 +2105,15 @@ static int rt73usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev) /* * Initialize all hw fields. * - * Don't set IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING unless we are + * Don't set IEEE80211_HOST_BROADCAST_PS_BUFFERING unless we are * capable of sending the buffered frames out after the DTIM * transmission using rt2x00lib_beacondone. This will send out * multicast and broadcast traffic immediately instead of buffering it * infinitly and thus dropping it after some time. */ - rt2x00dev->hw->flags = - IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_PS_NULLFUNC_STACK; + ieee80211_hw_set(rt2x00dev->hw, PS_NULLFUNC_STACK); + ieee80211_hw_set(rt2x00dev->hw, SIGNAL_DBM); + ieee80211_hw_set(rt2x00dev->hw, SUPPORTS_PS); SET_IEEE80211_DEV(rt2x00dev->hw, rt2x00dev->dev); SET_IEEE80211_PERM_ADDR(rt2x00dev->hw, diff --git a/drivers/net/wireless/rtl818x/rtl8180/dev.c b/drivers/net/wireless/rtl818x/rtl8180/dev.c index 706b844bce00..a43a16fde59d 100644 --- a/drivers/net/wireless/rtl818x/rtl8180/dev.c +++ b/drivers/net/wireless/rtl818x/rtl8180/dev.c @@ -1802,8 +1802,9 @@ static int rtl8180_probe(struct pci_dev *pdev, priv->band.n_bitrates = 4; dev->wiphy->bands[IEEE80211_BAND_2GHZ] = &priv->band; - dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | - IEEE80211_HW_RX_INCLUDES_FCS; + ieee80211_hw_set(dev, HOST_BROADCAST_PS_BUFFERING); + ieee80211_hw_set(dev, RX_INCLUDES_FCS); + dev->vif_data_size = sizeof(struct rtl8180_vif); dev->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_ADHOC); @@ -1868,9 +1869,9 @@ static int rtl8180_probe(struct pci_dev *pdev, } if (priv->chip_family != RTL818X_CHIP_FAMILY_RTL8180) - dev->flags |= IEEE80211_HW_SIGNAL_DBM; + ieee80211_hw_set(dev, SIGNAL_DBM); else - dev->flags |= IEEE80211_HW_SIGNAL_UNSPEC; + ieee80211_hw_set(dev, SIGNAL_UNSPEC); rtl8180_eeprom_read(priv); diff --git a/drivers/net/wireless/rtl818x/rtl8187/dev.c b/drivers/net/wireless/rtl818x/rtl8187/dev.c index 629ad8cfa17b..b7f72f9c7988 100644 --- a/drivers/net/wireless/rtl818x/rtl8187/dev.c +++ b/drivers/net/wireless/rtl818x/rtl8187/dev.c @@ -1478,9 +1478,9 @@ static int rtl8187_probe(struct usb_interface *intf, dev->wiphy->bands[IEEE80211_BAND_2GHZ] = &priv->band; - dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | - IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_RX_INCLUDES_FCS; + ieee80211_hw_set(dev, RX_INCLUDES_FCS); + ieee80211_hw_set(dev, HOST_BROADCAST_PS_BUFFERING); + ieee80211_hw_set(dev, SIGNAL_DBM); /* Initialize rate-control variables */ dev->max_rates = 1; dev->max_rate_tries = RETRY_COUNT; diff --git a/drivers/net/wireless/rtlwifi/base.c b/drivers/net/wireless/rtlwifi/base.c index 01f56c7df8b5..0517a4f2d3f2 100644 --- a/drivers/net/wireless/rtlwifi/base.c +++ b/drivers/net/wireless/rtlwifi/base.c @@ -394,20 +394,18 @@ static void _rtl_init_mac80211(struct ieee80211_hw *hw) } } /* <5> set hw caps */ - hw->flags = IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_AMPDU_AGGREGATION | - IEEE80211_HW_CONNECTION_MONITOR | - /* IEEE80211_HW_SUPPORTS_CQM_RSSI | */ - IEEE80211_HW_MFP_CAPABLE | - IEEE80211_HW_REPORTS_TX_ACK_STATUS | 0; + ieee80211_hw_set(hw, SIGNAL_DBM); + ieee80211_hw_set(hw, RX_INCLUDES_FCS); + ieee80211_hw_set(hw, AMPDU_AGGREGATION); + ieee80211_hw_set(hw, CONNECTION_MONITOR); + ieee80211_hw_set(hw, MFP_CAPABLE); + ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS); /* swlps or hwlps has been set in diff chip in init_sw_vars */ - if (rtlpriv->psc.swctrl_lps) - hw->flags |= IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_PS_NULLFUNC_STACK | - /* IEEE80211_HW_SUPPORTS_DYNAMIC_PS | */ - 0; + if (rtlpriv->psc.swctrl_lps) { + ieee80211_hw_set(hw, SUPPORTS_PS); + ieee80211_hw_set(hw, PS_NULLFUNC_STACK); + } hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_STATION) | diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c index f238ee54226c..cd4777954f87 100644 --- a/drivers/net/wireless/ti/wl1251/main.c +++ b/drivers/net/wireless/ti/wl1251/main.c @@ -1476,7 +1476,8 @@ int wl1251_init_ieee80211(struct wl1251 *wl) /* unit us */ /* FIXME: find a proper value */ - wl->hw->flags = IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_SUPPORTS_PS; + ieee80211_hw_set(wl->hw, SIGNAL_DBM); + ieee80211_hw_set(wl->hw, SUPPORTS_PS); wl->hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_ADHOC); diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index ef3fe0fff588..337223b9f6f8 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -6060,19 +6060,19 @@ static int wl1271_init_ieee80211(struct wl1271 *wl) /* FIXME: find a proper value */ wl->hw->max_listen_interval = wl->conf.conn.max_listen_interval; - wl->hw->flags = IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_SUPPORTS_DYNAMIC_PS | - IEEE80211_HW_HAS_RATE_CONTROL | - IEEE80211_HW_CONNECTION_MONITOR | - IEEE80211_HW_REPORTS_TX_ACK_STATUS | - IEEE80211_HW_SPECTRUM_MGMT | - IEEE80211_HW_AP_LINK_PS | - IEEE80211_HW_AMPDU_AGGREGATION | - IEEE80211_HW_TX_AMPDU_SETUP_IN_HW | - IEEE80211_HW_QUEUE_CONTROL | - IEEE80211_HW_CHANCTX_STA_CSA | - IEEE80211_HW_SUPPORT_FAST_XMIT; + ieee80211_hw_set(wl->hw, SUPPORT_FAST_XMIT); + ieee80211_hw_set(wl->hw, CHANCTX_STA_CSA); + ieee80211_hw_set(wl->hw, QUEUE_CONTROL); + ieee80211_hw_set(wl->hw, TX_AMPDU_SETUP_IN_HW); + ieee80211_hw_set(wl->hw, AMPDU_AGGREGATION); + ieee80211_hw_set(wl->hw, AP_LINK_PS); + ieee80211_hw_set(wl->hw, SPECTRUM_MGMT); + ieee80211_hw_set(wl->hw, REPORTS_TX_ACK_STATUS); + ieee80211_hw_set(wl->hw, CONNECTION_MONITOR); + ieee80211_hw_set(wl->hw, HAS_RATE_CONTROL); + ieee80211_hw_set(wl->hw, SUPPORTS_DYNAMIC_PS); + ieee80211_hw_set(wl->hw, SIGNAL_DBM); + ieee80211_hw_set(wl->hw, SUPPORTS_PS); wl->hw->wiphy->cipher_suites = cipher_suites; wl->hw->wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 89b6f69f09c8..e539d9b1b562 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -1397,10 +1397,10 @@ struct ieee80211_hw *zd_mac_alloc_hw(struct usb_interface *intf) hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &mac->band; - hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_SIGNAL_UNSPEC | - IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | - IEEE80211_HW_MFP_CAPABLE; + ieee80211_hw_set(hw, MFP_CAPABLE); + ieee80211_hw_set(hw, HOST_BROADCAST_PS_BUFFERING); + ieee80211_hw_set(hw, RX_INCLUDES_FCS); + ieee80211_hw_set(hw, SIGNAL_UNSPEC); hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_MESH_POINT) | diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index ecd7c0f82481..458bc340aece 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -1793,10 +1793,10 @@ vt6655_probe(struct pci_dev *pcid, const struct pci_device_id *ent) wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_AP); - priv->hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_REPORTS_TX_ACK_STATUS | - IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_TIMING_BEACON_ONLY; + ieee80211_hw_set(priv->hw, TIMING_BEACON_ONLY); + ieee80211_hw_set(priv->hw, SIGNAL_DBM); + ieee80211_hw_set(priv->hw, RX_INCLUDES_FCS); + ieee80211_hw_set(priv->hw, REPORTS_TX_ACK_STATUS); priv->hw->max_signal = 100; diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index 0d97b6457ead..2ccfbff37c42 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -978,10 +978,10 @@ vt6656_probe(struct usb_interface *intf, const struct usb_device_id *id) wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_AP); - priv->hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_REPORTS_TX_ACK_STATUS | - IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_TIMING_BEACON_ONLY; + ieee80211_hw_set(priv->hw, TIMING_BEACON_ONLY); + ieee80211_hw_set(priv->hw, SIGNAL_DBM); + ieee80211_hw_set(priv->hw, RX_INCLUDES_FCS); + ieee80211_hw_set(priv->hw, REPORTS_TX_ACK_STATUS); priv->hw->max_signal = 100; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index faadb736ac4d..6b1077c2a63f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1887,37 +1887,42 @@ struct ieee80211_txq { * * @IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS: The HW supports scanning on all bands * in one command, mac80211 doesn't have to run separate scans per band. + * + * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { - IEEE80211_HW_HAS_RATE_CONTROL = 1<<0, - IEEE80211_HW_RX_INCLUDES_FCS = 1<<1, - IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING = 1<<2, - IEEE80211_HW_SIGNAL_UNSPEC = 1<<5, - IEEE80211_HW_SIGNAL_DBM = 1<<6, - IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC = 1<<7, - IEEE80211_HW_SPECTRUM_MGMT = 1<<8, - IEEE80211_HW_AMPDU_AGGREGATION = 1<<9, - IEEE80211_HW_SUPPORTS_PS = 1<<10, - IEEE80211_HW_PS_NULLFUNC_STACK = 1<<11, - IEEE80211_HW_SUPPORTS_DYNAMIC_PS = 1<<12, - IEEE80211_HW_MFP_CAPABLE = 1<<13, - IEEE80211_HW_WANT_MONITOR_VIF = 1<<14, - IEEE80211_HW_NO_AUTO_VIF = 1<<15, - IEEE80211_HW_SW_CRYPTO_CONTROL = 1<<16, - IEEE80211_HW_SUPPORT_FAST_XMIT = 1<<17, - IEEE80211_HW_REPORTS_TX_ACK_STATUS = 1<<18, - IEEE80211_HW_CONNECTION_MONITOR = 1<<19, - IEEE80211_HW_QUEUE_CONTROL = 1<<20, - IEEE80211_HW_SUPPORTS_PER_STA_GTK = 1<<21, - IEEE80211_HW_AP_LINK_PS = 1<<22, - IEEE80211_HW_TX_AMPDU_SETUP_IN_HW = 1<<23, - IEEE80211_HW_SUPPORTS_RC_TABLE = 1<<24, - IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF = 1<<25, - IEEE80211_HW_TIMING_BEACON_ONLY = 1<<26, - IEEE80211_HW_SUPPORTS_HT_CCK_RATES = 1<<27, - IEEE80211_HW_CHANCTX_STA_CSA = 1<<28, - IEEE80211_HW_SUPPORTS_CLONED_SKBS = 1<<29, - IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS = 1<<30, + IEEE80211_HW_HAS_RATE_CONTROL, + IEEE80211_HW_RX_INCLUDES_FCS, + IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING, + IEEE80211_HW_SIGNAL_UNSPEC, + IEEE80211_HW_SIGNAL_DBM, + IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC, + IEEE80211_HW_SPECTRUM_MGMT, + IEEE80211_HW_AMPDU_AGGREGATION, + IEEE80211_HW_SUPPORTS_PS, + IEEE80211_HW_PS_NULLFUNC_STACK, + IEEE80211_HW_SUPPORTS_DYNAMIC_PS, + IEEE80211_HW_MFP_CAPABLE, + IEEE80211_HW_WANT_MONITOR_VIF, + IEEE80211_HW_NO_AUTO_VIF, + IEEE80211_HW_SW_CRYPTO_CONTROL, + IEEE80211_HW_SUPPORT_FAST_XMIT, + IEEE80211_HW_REPORTS_TX_ACK_STATUS, + IEEE80211_HW_CONNECTION_MONITOR, + IEEE80211_HW_QUEUE_CONTROL, + IEEE80211_HW_SUPPORTS_PER_STA_GTK, + IEEE80211_HW_AP_LINK_PS, + IEEE80211_HW_TX_AMPDU_SETUP_IN_HW, + IEEE80211_HW_SUPPORTS_RC_TABLE, + IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF, + IEEE80211_HW_TIMING_BEACON_ONLY, + IEEE80211_HW_SUPPORTS_HT_CCK_RATES, + IEEE80211_HW_CHANCTX_STA_CSA, + IEEE80211_HW_SUPPORTS_CLONED_SKBS, + IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS, + + /* keep last, obviously */ + NUM_IEEE80211_HW_FLAGS }; /** @@ -2024,7 +2029,7 @@ struct ieee80211_hw { struct wiphy *wiphy; const char *rate_control_algorithm; void *priv; - u32 flags; + unsigned long flags[BITS_TO_LONGS(NUM_IEEE80211_HW_FLAGS)]; unsigned int extra_tx_headroom; unsigned int extra_beacon_tailroom; int vif_data_size; @@ -2050,6 +2055,20 @@ struct ieee80211_hw { int txq_ac_max_pending; }; +static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw, + enum ieee80211_hw_flags flg) +{ + return test_bit(flg, hw->flags); +} +#define ieee80211_hw_check(hw, flg) _ieee80211_hw_check(hw, IEEE80211_HW_##flg) + +static inline void _ieee80211_hw_set(struct ieee80211_hw *hw, + enum ieee80211_hw_flags flg) +{ + return __set_bit(flg, hw->flags); +} +#define ieee80211_hw_set(hw, flg) _ieee80211_hw_set(hw, IEEE80211_HW_##flg) + /** * struct ieee80211_scan_request - hw scan request * diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index cce9d425c718..c8ba2e77737c 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -564,8 +564,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; if ((tid >= IEEE80211_NUM_TIDS) || - !(local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) || - (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)) + !ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION) || + ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW)) return -EINVAL; ht_dbg(sdata, "Open BA session requested for %pM tid %u\n", diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 1d01190535b0..bf7023f6c327 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1763,7 +1763,7 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, /* our RSSI threshold implementation is supported only for * devices that report signal in dBm. */ - if (!(sdata->local->hw.flags & IEEE80211_HW_SIGNAL_DBM)) + if (!ieee80211_hw_check(&sdata->local->hw, SIGNAL_DBM)) return -ENOTSUPP; conf->rssi_threshold = nconf->rssi_threshold; } @@ -2407,7 +2407,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; - if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) + if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS)) return -EOPNOTSUPP; if (enabled == sdata->u.mgd.powersave && @@ -2422,7 +2422,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, __ieee80211_request_smps_mgd(sdata, sdata->u.mgd.req_smps); sdata_unlock(sdata); - if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) + if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); ieee80211_recalc_ps(local, -1); @@ -2466,7 +2466,7 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; - if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) { + if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { ret = drv_set_bitrate_mask(local, sdata, mask); if (ret) return ret; @@ -3451,7 +3451,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN | IEEE80211_TX_INTFL_OFFCHAN_TX_OK; - if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) + if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) IEEE80211_SKB_CB(skb)->hw_queue = local->hw.offchannel_tx_hw_queue; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 1b94d2704c27..3ea8b7de9633 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -91,56 +91,66 @@ static const struct file_operations reset_ops = { }; #endif +static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = { +#define FLAG(F) [IEEE80211_HW_##F] = #F + FLAG(HAS_RATE_CONTROL), + FLAG(RX_INCLUDES_FCS), + FLAG(HOST_BROADCAST_PS_BUFFERING), + FLAG(SIGNAL_UNSPEC), + FLAG(SIGNAL_DBM), + FLAG(NEED_DTIM_BEFORE_ASSOC), + FLAG(SPECTRUM_MGMT), + FLAG(AMPDU_AGGREGATION), + FLAG(SUPPORTS_PS), + FLAG(PS_NULLFUNC_STACK), + FLAG(SUPPORTS_DYNAMIC_PS), + FLAG(MFP_CAPABLE), + FLAG(WANT_MONITOR_VIF), + FLAG(NO_AUTO_VIF), + FLAG(SW_CRYPTO_CONTROL), + FLAG(SUPPORT_FAST_XMIT), + FLAG(REPORTS_TX_ACK_STATUS), + FLAG(CONNECTION_MONITOR), + FLAG(QUEUE_CONTROL), + FLAG(SUPPORTS_PER_STA_GTK), + FLAG(AP_LINK_PS), + FLAG(TX_AMPDU_SETUP_IN_HW), + FLAG(SUPPORTS_RC_TABLE), + FLAG(P2P_DEV_ADDR_FOR_INTF), + FLAG(TIMING_BEACON_ONLY), + FLAG(SUPPORTS_HT_CCK_RATES), + FLAG(CHANCTX_STA_CSA), + FLAG(SUPPORTS_CLONED_SKBS), + FLAG(SINGLE_SCAN_ON_ALL_BANDS), + + /* keep last for the build bug below */ + (void *)0x1 +#undef FLAG +}; + static ssize_t hwflags_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; - int mxln = 500; + size_t bufsz = 30 * NUM_IEEE80211_HW_FLAGS; + char *buf = kzalloc(bufsz, GFP_KERNEL); + char *pos = buf, *end = buf + bufsz - 1; ssize_t rv; - char *buf = kzalloc(mxln, GFP_KERNEL); - int sf = 0; /* how many written so far */ + int i; if (!buf) - return 0; - - sf += scnprintf(buf, mxln - sf, "0x%x\n", local->hw.flags); - if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) - sf += scnprintf(buf + sf, mxln - sf, "HAS_RATE_CONTROL\n"); - if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) - sf += scnprintf(buf + sf, mxln - sf, "RX_INCLUDES_FCS\n"); - if (local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) - sf += scnprintf(buf + sf, mxln - sf, - "HOST_BCAST_PS_BUFFERING\n"); - if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) - sf += scnprintf(buf + sf, mxln - sf, "SIGNAL_UNSPEC\n"); - if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) - sf += scnprintf(buf + sf, mxln - sf, "SIGNAL_DBM\n"); - if (local->hw.flags & IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC) - sf += scnprintf(buf + sf, mxln - sf, - "NEED_DTIM_BEFORE_ASSOC\n"); - if (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT) - sf += scnprintf(buf + sf, mxln - sf, "SPECTRUM_MGMT\n"); - if (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) - sf += scnprintf(buf + sf, mxln - sf, "AMPDU_AGGREGATION\n"); - if (local->hw.flags & IEEE80211_HW_SUPPORTS_PS) - sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_PS\n"); - if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) - sf += scnprintf(buf + sf, mxln - sf, "PS_NULLFUNC_STACK\n"); - if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) - sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_DYNAMIC_PS\n"); - if (local->hw.flags & IEEE80211_HW_MFP_CAPABLE) - sf += scnprintf(buf + sf, mxln - sf, "MFP_CAPABLE\n"); - if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) - sf += scnprintf(buf + sf, mxln - sf, - "REPORTS_TX_ACK_STATUS\n"); - if (local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) - sf += scnprintf(buf + sf, mxln - sf, "CONNECTION_MONITOR\n"); - if (local->hw.flags & IEEE80211_HW_SUPPORTS_PER_STA_GTK) - sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_PER_STA_GTK\n"); - if (local->hw.flags & IEEE80211_HW_AP_LINK_PS) - sf += scnprintf(buf + sf, mxln - sf, "AP_LINK_PS\n"); - if (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW) - sf += scnprintf(buf + sf, mxln - sf, "TX_AMPDU_SETUP_IN_HW\n"); + return -ENOMEM; + + /* fail compilation if somebody adds or removes + * a flag without updating the name array above + */ + BUILD_BUG_ON(hw_flag_names[NUM_IEEE80211_HW_FLAGS] != (void *)0x1); + + for (i = 0; i < NUM_IEEE80211_HW_FLAGS; i++) { + if (test_bit(i, local->hw.flags)) + pos += scnprintf(pos, end - pos, "%s", + hw_flag_names[i]); + } rv = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); kfree(buf); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index c01e681b90fb..32a2e707e222 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -146,7 +146,7 @@ static inline int drv_add_interface(struct ieee80211_local *local, if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && - !(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF) && + !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) && !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)))) return -EINVAL; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b2e85ffca7ed..ed1edac14372 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -338,7 +338,7 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata, if ((iftype != NL80211_IFTYPE_AP && iftype != NL80211_IFTYPE_P2P_GO && iftype != NL80211_IFTYPE_MESH_POINT) || - !(sdata->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)) { + !ieee80211_hw_check(&sdata->local->hw, QUEUE_CONTROL)) { sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE; return 0; } @@ -378,7 +378,7 @@ static void ieee80211_set_default_queues(struct ieee80211_sub_if_data *sdata) int i; for (i = 0; i < IEEE80211_NUM_ACS; i++) { - if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) + if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) sdata->vif.hw_queue[i] = IEEE80211_INVAL_HW_QUEUE; else if (local->hw.queues >= IEEE80211_NUM_ACS) sdata->vif.hw_queue[i] = i; @@ -393,7 +393,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) struct ieee80211_sub_if_data *sdata; int ret; - if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)) + if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) return 0; ASSERT_RTNL(); @@ -454,7 +454,7 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)) + if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) return; ASSERT_RTNL(); @@ -1586,7 +1586,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: - if (local->hw.flags & IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF) { + if (ieee80211_hw_check(&local->hw, P2P_DEV_ADDR_FOR_INTF)) { list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) continue; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index b9aac809628f..8abc31ebcf61 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -147,7 +147,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) * is supported; if not, return. */ if (sta && !(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE) && - !(key->local->hw.flags & IEEE80211_HW_SUPPORTS_PER_STA_GTK)) + !ieee80211_hw_check(&key->local->hw, SUPPORTS_PER_STA_GTK)) goto out_unsupported; if (sta && !sta->uploaded) @@ -201,7 +201,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) /* all of these we can do in software - if driver can */ if (ret == 1) return 0; - if (key->local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL) + if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) return -EINVAL; return 0; default: diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 674164fe5cdb..3c63468b4dfb 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -661,7 +661,7 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local) { bool have_wep = !(IS_ERR(local->wep_tx_tfm) || IS_ERR(local->wep_rx_tfm)); - bool have_mfp = local->hw.flags & IEEE80211_HW_MFP_CAPABLE; + bool have_mfp = ieee80211_hw_check(&local->hw, MFP_CAPABLE); int n_suites = 0, r = 0, w = 0; u32 *suites; static const u32 cipher_suites[] = { @@ -681,7 +681,7 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local) WLAN_CIPHER_SUITE_BIP_GMAC_256, }; - if (local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL || + if (ieee80211_hw_check(&local->hw, SW_CRYPTO_CONTROL) || local->hw.wiphy->cipher_suites) { /* If the driver advertises, or doesn't support SW crypto, * we only need to remove WEP if necessary. @@ -797,7 +797,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) netdev_features_t feature_whitelist; struct cfg80211_chan_def dflt_chandef = {}; - if (hw->flags & IEEE80211_HW_QUEUE_CONTROL && + if (ieee80211_hw_check(hw, QUEUE_CONTROL) && (local->hw.offchannel_tx_hw_queue == IEEE80211_INVAL_HW_QUEUE || local->hw.offchannel_tx_hw_queue >= local->hw.queues)) return -EINVAL; @@ -945,9 +945,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) /* mac80211 supports control port protocol changing */ local->hw.wiphy->flags |= WIPHY_FLAG_CONTROL_PORT_PROTOCOL; - if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { + if (ieee80211_hw_check(&local->hw, SIGNAL_DBM)) { local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; - } else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) { + } else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC)) { local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC; if (hw->max_signal <= 0) { result = -EINVAL; @@ -1001,7 +1001,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP; /* mac80211 supports eCSA, if the driver supports STA CSA at all */ - if (local->hw.flags & IEEE80211_HW_CHANCTX_STA_CSA) + if (ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA)) local->ext_capa[0] |= WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING; local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM; @@ -1069,7 +1069,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) /* add one default STA interface if supported */ if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) && - !(hw->flags & IEEE80211_HW_NO_AUTO_VIF)) { + !ieee80211_hw_check(hw, NO_AUTO_VIF)) { result = ieee80211_if_add(local, "wlan%d", NET_NAME_ENUM, NULL, NL80211_IFTYPE_STATION, NULL); if (result) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 235ed68dca36..9b2cc278ac2a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -118,7 +118,7 @@ void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata) if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER) return; - if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) + if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR)) return; mod_timer(&sdata->u.mgd.bcn_mon_timer, @@ -134,7 +134,7 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata) ifmgd->probe_send_count = 0; - if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) + if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR)) return; mod_timer(&sdata->u.mgd.conn_mon_timer, @@ -677,7 +677,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) capab |= WLAN_CAPABILITY_PRIVACY; if ((assoc_data->capability & WLAN_CAPABILITY_SPECTRUM_MGMT) && - (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT)) + ieee80211_hw_check(&local->hw, SPECTRUM_MGMT)) capab |= WLAN_CAPABILITY_SPECTRUM_MGMT; if (ifmgd->flags & IEEE80211_STA_ENABLE_RRM) @@ -885,7 +885,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) drv_mgd_prepare_tx(local, sdata); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; - if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS | IEEE80211_TX_INTFL_MLME_CONN_TX; ieee80211_tx_skb(sdata, skb); @@ -927,7 +927,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | IEEE80211_TX_INTFL_OFFCHAN_TX_OK; - if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) @@ -1198,7 +1198,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, chanctx = container_of(conf, struct ieee80211_chanctx, conf); if (local->use_chanctx && - !(local->hw.flags & IEEE80211_HW_CHANCTX_STA_CSA)) { + !ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA)) { sdata_info(sdata, "driver doesn't support chan-switch with channel contexts\n"); goto drop_connection; @@ -1407,15 +1407,15 @@ static void ieee80211_enable_ps(struct ieee80211_local *local, return; if (conf->dynamic_ps_timeout > 0 && - !(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)) { + !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) { mod_timer(&local->dynamic_ps_timer, jiffies + msecs_to_jiffies(conf->dynamic_ps_timeout)); } else { - if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) + if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) ieee80211_send_nullfunc(local, sdata, 1); - if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && - (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) + if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) && + ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) return; conf->flags |= IEEE80211_CONF_PS; @@ -1474,7 +1474,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) int count = 0; int timeout; - if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) { + if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS)) { local->ps_sdata = NULL; return; } @@ -1620,7 +1620,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } - if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && + if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) && !(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) { if (drv_tx_frames_pending(local)) { mod_timer(&local->dynamic_ps_timer, jiffies + @@ -1633,8 +1633,8 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) } } - if (!((local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) && - (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) || + if (!(ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) && + ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) || (ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) { ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; local->hw.conf.flags |= IEEE80211_CONF_PS; @@ -2159,7 +2159,7 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) ieee80211_recalc_ps(local, -1); mutex_unlock(&local->iflist_mtx); - if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) + if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR)) goto out; /* @@ -2257,7 +2257,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) */ ifmgd->probe_send_count++; - if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { + if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) { ifmgd->nullfunc_failed = false; ieee80211_send_nullfunc(sdata->local, sdata, 0); } else { @@ -2562,7 +2562,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, return; auth_data->expected_transaction = 4; drv_mgd_prepare_tx(sdata->local, sdata); - if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS | IEEE80211_TX_INTFL_MLME_CONN_TX; ieee80211_send_auth(sdata, 3, auth_data->algorithm, 0, @@ -3337,7 +3337,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } ifmgd->have_beacon = true; ifmgd->assoc_data->need_beacon = false; - if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { + if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) { sdata->vif.bss_conf.sync_tsf = le64_to_cpu(mgmt->u.beacon.timestamp); sdata->vif.bss_conf.sync_device_ts = @@ -3443,7 +3443,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, len - baselen, false, &elems, care_about_ies, ncrc); - if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) { + if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) { bool directed_tim = ieee80211_check_tim(elems.tim, elems.tim_len, ifmgd->aid); @@ -3511,7 +3511,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, * the driver will use them. The synchronized view is currently * guaranteed only in certain callbacks. */ - if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { + if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) { sdata->vif.bss_conf.sync_tsf = le64_to_cpu(mgmt->u.beacon.timestamp); sdata->vif.bss_conf.sync_device_ts = @@ -3749,7 +3749,7 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) auth_data->expected_transaction = trans; } - if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS | IEEE80211_TX_INTFL_MLME_CONN_TX; @@ -3822,7 +3822,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) IEEE80211_ASSOC_MAX_TRIES); ieee80211_send_assoc(sdata); - if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { + if (!ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT; assoc_data->timeout_started = true; run_again(sdata, assoc_data->timeout); @@ -3936,7 +3936,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); - if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) max_tries = max_nullfunc_tries; else max_tries = max_probe_tries; @@ -3961,7 +3961,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) } } else if (time_is_after_jiffies(ifmgd->probe_timeout)) run_again(sdata, ifmgd->probe_timeout); - else if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { + else if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { mlme_dbg(sdata, "Failed to send nullfunc to AP %pM after %dms, disconnecting\n", bssid, probe_wait_ms); @@ -4030,14 +4030,11 @@ static void ieee80211_sta_monitor_work(struct work_struct *work) static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) { - u32 flags; - if (sdata->vif.type == NL80211_IFTYPE_STATION) { __ieee80211_stop_poll(sdata); /* let's probe the connection once */ - flags = sdata->local->hw.flags; - if (!(flags & IEEE80211_HW_CONNECTION_MONITOR)) + if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR)) ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.monitor_work); /* and do all the other regular work too */ @@ -4450,8 +4447,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.sync_dtim_count = tim_ie[2]; else sdata->vif.bss_conf.sync_dtim_count = 0; - } else if (!(local->hw.flags & - IEEE80211_HW_TIMING_BEACON_ONLY)) { + } else if (!ieee80211_hw_check(&sdata->local->hw, + TIMING_BEACON_ONLY)) { ies = rcu_dereference(cbss->proberesp_ies); /* must be non-NULL since beacon IEs were NULL */ sdata->vif.bss_conf.sync_tsf = ies->tsf; @@ -4829,7 +4826,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) && - (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK), + ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK), "U-APSD not supported with HW_PS_NULLFUNC_STACK\n")) sdata->vif.driver_flags &= ~IEEE80211_VIF_SUPPORTS_UAPSD; @@ -4910,7 +4907,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); beacon_ies = rcu_dereference(req->bss->beacon_ies); - if (sdata->local->hw.flags & IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC && + if (ieee80211_hw_check(&sdata->local->hw, NEED_DTIM_BEFORE_ASSOC) && !beacon_ies) { /* * Wait up to one beacon interval ... @@ -4937,7 +4934,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, assoc_data->timeout = jiffies; assoc_data->timeout_started = true; - if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { + if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) { sdata->vif.bss_conf.sync_tsf = beacon_ies->tsf; sdata->vif.bss_conf.sync_device_ts = bss->device_ts_beacon; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 683f0e3cb124..f2c75cf491fc 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -46,7 +46,7 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) } if (!local->offchannel_ps_enabled || - !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) + !ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) /* * If power save was enabled, no need to send a nullfunc * frame because AP knows that we are sleeping. But if the diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index ac6ad6238e3a..06b60980c62c 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -23,7 +23,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) ieee80211_del_virtual_monitor(local); - if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { + if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) { mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { set_sta_flag(sta, WLAN_STA_BLOCK_BA); @@ -82,7 +82,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) if (err < 0) { local->quiescing = false; local->wowlan = false; - if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { + if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) { mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index de69adf24f53..36ba7c4f0283 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -680,7 +680,7 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, info->control.rates[i].count = 0; } - if (sdata->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) + if (ieee80211_hw_check(&sdata->local->hw, HAS_RATE_CONTROL)) return; if (ista) { @@ -691,7 +691,7 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, ref->ops->get_rate(ref->priv, NULL, NULL, txrc); } - if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_RC_TABLE) + if (ieee80211_hw_check(&sdata->local->hw, SUPPORTS_RC_TABLE)) return; ieee80211_get_tx_rates(&sdata->vif, ista, txrc->skb, @@ -733,7 +733,7 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, if (local->open_count) return -EBUSY; - if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) { + if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { if (WARN_ON(!local->ops->set_rts_threshold)) return -EINVAL; return 0; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 7430a1df2ab1..543b67233535 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1070,7 +1070,7 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, if (sband->band != IEEE80211_BAND_2GHZ) return; - if (!(mp->hw->flags & IEEE80211_HW_SUPPORTS_HT_CCK_RATES)) + if (!ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES)) return; mi->cck_supported = 0; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7d85f7516324..5dae166cb7f5 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -52,7 +52,7 @@ static struct sk_buff *remove_monitor_info(struct ieee80211_local *local, struct sk_buff *skb, unsigned int rtap_vendor_space) { - if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) { + if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) { if (likely(skb->len > FCS_LEN)) __pskb_trim(skb, skb->len - FCS_LEN); else { @@ -110,7 +110,7 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, len = ALIGN(len, 8); len += 8; } - if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) + if (ieee80211_hw_check(&local->hw, SIGNAL_DBM)) len += 1; /* antenna field, if we don't have per-chain info */ @@ -185,7 +185,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, } mpdulen = skb->len; - if (!(has_fcs && (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS))) + if (!(has_fcs && ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS))) mpdulen += FCS_LEN; rthdr = (struct ieee80211_radiotap_header *)skb_push(skb, rtap_len); @@ -239,7 +239,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, } /* IEEE80211_RADIOTAP_FLAGS */ - if (has_fcs && (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)) + if (has_fcs && ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) *pos |= IEEE80211_RADIOTAP_F_FCS; if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) *pos |= IEEE80211_RADIOTAP_F_BADFCS; @@ -289,7 +289,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos += 2; /* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */ - if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM && + if (ieee80211_hw_check(&local->hw, SIGNAL_DBM) && !(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { *pos = status->signal; rthdr->it_present |= @@ -458,7 +458,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, * the SKB because it has a bad FCS/PLCP checksum. */ - if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) + if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) present_fcs_len = FCS_LEN; /* ensure hdr->frame_control and vendor radiotap data are in skb head */ @@ -1197,7 +1197,7 @@ static void sta_ps_start(struct sta_info *sta) atomic_inc(&ps->num_sta_ps); set_sta_flag(sta, WLAN_STA_PS_STA); - if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) + if (!ieee80211_hw_check(&local->hw, AP_LINK_PS)) drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); ps_dbg(sdata, "STA %pM aid %d enters power save mode\n", sta->sta.addr, sta->sta.aid); @@ -1245,7 +1245,7 @@ int ieee80211_sta_ps_transition(struct ieee80211_sta *sta, bool start) struct sta_info *sta_inf = container_of(sta, struct sta_info, sta); bool in_ps; - WARN_ON(!(sta_inf->local->hw.flags & IEEE80211_HW_AP_LINK_PS)); + WARN_ON(!ieee80211_hw_check(&sta_inf->local->hw, AP_LINK_PS)); /* Don't let the same PS state be set twice */ in_ps = test_sta_flag(sta_inf, WLAN_STA_PS_STA); @@ -1281,7 +1281,7 @@ ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx) * uAPSD and PS-Poll frames (the latter shouldn't even come up from * it to mac80211 since they're handled.) */ - if (sdata->local->hw.flags & IEEE80211_HW_AP_LINK_PS) + if (ieee80211_hw_check(&sdata->local->hw, AP_LINK_PS)) return RX_CONTINUE; /* @@ -1413,7 +1413,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) * Change STA power saving mode only at the end of a frame * exchange sequence. */ - if (!(sta->local->hw.flags & IEEE80211_HW_AP_LINK_PS) && + if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) && !ieee80211_has_morefrags(hdr->frame_control) && !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && (rx->sdata->vif.type == NL80211_IFTYPE_AP || @@ -2543,7 +2543,7 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) !(rx->flags & IEEE80211_RX_BEACON_REPORTED)) { int sig = 0; - if (rx->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) + if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM)) sig = status->signal; cfg80211_report_obss_beacon(rx->local->hw.wiphy, @@ -2874,7 +2874,7 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx) * it transmitted were processed or returned. */ - if (rx->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) + if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM)) sig = status->signal; if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig, @@ -2939,7 +2939,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) info->flags = IEEE80211_TX_CTL_TX_OFFCHAN | IEEE80211_TX_INTFL_OFFCHAN_TX_OK | IEEE80211_TX_CTL_NO_CCK_RATE; - if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) + if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) info->hw_queue = local->hw.offchannel_tx_hw_queue; } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 1454c1b7d06c..11d0901ebb7b 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -71,9 +71,9 @@ ieee80211_bss_info_update(struct ieee80211_local *local, s32 signal = 0; bool signal_valid; - if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) + if (ieee80211_hw_check(&local->hw, SIGNAL_DBM)) signal = rx_status->signal * 100; - else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) + else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC)) signal = (rx_status->signal * 100) / local->hw.max_signal; scan_width = NL80211_BSS_CHAN_WIDTH_20; @@ -263,7 +263,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) if (test_bit(SCAN_HW_CANCELLED, &local->scanning)) return false; - if (local->hw.flags & IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS) { + if (ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS)) { for (i = 0; i < req->n_channels; i++) { local->hw_scan_req->req.channels[i] = req->channels[i]; bands_used |= BIT(req->channels[i]->band); @@ -332,7 +332,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) return; if (hw_scan && !aborted && - !(local->hw.flags & IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS) && + !ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS) && ieee80211_prep_hw_scan(local)) { int rc; @@ -526,7 +526,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, local->hw_scan_ies_bufsize = local->scan_ies_len + req->ie_len; - if (local->hw.flags & IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS) { + if (ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS)) { int i, n_bands = 0; u8 bands_counted = 0; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ce0c1662de42..666ddac3c87c 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -282,7 +282,7 @@ static void sta_deliver_ps_frames(struct work_struct *wk) static int sta_prepare_rate_control(struct ieee80211_local *local, struct sta_info *sta, gfp_t gfp) { - if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) + if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) return 0; sta->rate_ctrl = local->rate_ctrl; @@ -643,7 +643,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) } /* No need to do anything if the driver does all */ - if (local->hw.flags & IEEE80211_HW_AP_LINK_PS) + if (ieee80211_hw_check(&local->hw, AP_LINK_PS)) return; if (sta->dead) @@ -1148,7 +1148,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) sta->driver_buffered_tids = 0; sta->txq_buffered_tids = 0; - if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) + if (!ieee80211_hw_check(&local->hw, AP_LINK_PS)) drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta); if (sta->sta.txq[0]) { @@ -1879,8 +1879,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->rx_beacon_signal_avg = ieee80211_ave_rssi(&sdata->vif); } - if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || - (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { + if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) || + ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) { if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) { sinfo->signal = (s8)sta->last_signal; sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL); @@ -1932,7 +1932,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) && - local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { + ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_RETRIES); tidstats->tx_msdu_retries = sta->tx_msdu_retries[i]; @@ -1940,7 +1940,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) && - local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { + ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_FAILED); tidstats->tx_msdu_failed = sta->tx_msdu_failed[i]; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 67c428735a51..45628f37c083 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -181,7 +181,7 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; - if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) sta->last_rx = jiffies; if (ieee80211_is_data_qos(mgmt->frame_control)) { @@ -414,8 +414,7 @@ static void ieee80211_tdls_td_tx_handle(struct ieee80211_local *local, if (is_teardown) { /* This mechanism relies on being able to get ACKs */ - WARN_ON(!(local->hw.flags & - IEEE80211_HW_REPORTS_TX_ACK_STATUS)); + WARN_ON(!ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)); /* Check if peer has ACKed */ if (flags & IEEE80211_TX_STAT_ACK) { @@ -731,7 +730,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) ieee80211_get_qos_ctl(hdr), sta, true, acked); - if ((local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) && + if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) && (ieee80211_is_data(hdr->frame_control)) && (rates_idx != -1)) sta->last_tx_rate = info->status.rates[rates_idx]; @@ -798,11 +797,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) ieee80211_frame_acked(sta, skb); if ((sta->sdata->vif.type == NL80211_IFTYPE_STATION) && - (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) + ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) ieee80211_sta_tx_notify(sta->sdata, (void *) skb->data, acked, info->status.tx_time); - if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { + if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { if (info->flags & IEEE80211_TX_STAT_ACK) { if (sta->lost_packets) sta->lost_packets = 0; @@ -853,7 +852,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) } if (ieee80211_is_nullfunc(fc) && ieee80211_has_pm(fc) && - (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) && + ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) && !(info->flags & IEEE80211_TX_CTL_INJECTED) && local->ps_sdata && !(local->scanning)) { if (info->flags & IEEE80211_TX_STAT_ACK) { diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 28298cc50326..ad31b2dab4f5 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -935,7 +935,7 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, * packet through the AP. */ if ((action_code == WLAN_TDLS_TEARDOWN) && - (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { + ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) { bool try_resend; /* Should we keep skb for possible resend */ /* If not sending directly to peer - no point in keeping skb */ diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7fe528adc5ae..8410bb3bf5e8 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -211,11 +211,11 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) struct ieee80211_if_managed *ifmgd; /* driver doesn't support power save */ - if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) + if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS)) return TX_CONTINUE; /* hardware does dynamic power save */ - if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) + if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) return TX_CONTINUE; /* dynamic power save disabled */ @@ -431,7 +431,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) if (ieee80211_is_probe_req(hdr->frame_control)) return TX_CONTINUE; - if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) + if (ieee80211_hw_check(&tx->local->hw, QUEUE_CONTROL)) info->hw_queue = tx->sdata->vif.cab_queue; /* no stations in PS mode */ @@ -441,7 +441,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM; /* device releases frame after DTIM beacon */ - if (!(tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING)) + if (!ieee80211_hw_check(&tx->local->hw, HOST_BROADCAST_PS_BUFFERING)) return TX_CONTINUE; /* buffered in mac80211 */ @@ -1185,8 +1185,8 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) && !ieee80211_is_qos_nullfunc(hdr->frame_control) && - (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) && - !(local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)) { + ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION) && + !ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW)) { struct tid_ampdu_tx *tid_tx; qc = ieee80211_get_qos_ctl(hdr); @@ -1429,7 +1429,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local, vif = &sdata->vif; info->hw_queue = vif->hw_queue[skb_get_queue_mapping(skb)]; - } else if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) { + } else if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) { dev_kfree_skb(skb); return true; } else @@ -1475,7 +1475,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) CALL_TXH(ieee80211_tx_h_ps_buf); CALL_TXH(ieee80211_tx_h_check_control_port_protocol); CALL_TXH(ieee80211_tx_h_select_key); - if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) + if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL)) CALL_TXH(ieee80211_tx_h_rate_ctrl); if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) { @@ -1490,7 +1490,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) /* handlers after fragment must be aware of tx info fragmentation! */ CALL_TXH(ieee80211_tx_h_stats); CALL_TXH(ieee80211_tx_h_encrypt); - if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) + if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL)) CALL_TXH(ieee80211_tx_h_calculate_duration); #undef CALL_TXH @@ -1580,7 +1580,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, /* set up hw_queue value early */ if (!(info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) || - !(local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)) + !ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; @@ -1607,7 +1607,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, } if (skb_cloned(skb) && - (!(local->hw.flags & IEEE80211_HW_SUPPORTS_CLONED_SKBS) || + (!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) || !skb_clone_writable(skb, ETH_HLEN) || (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt))) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); @@ -2426,7 +2426,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) struct ieee80211_chanctx_conf *chanctx_conf; __le16 fc; - if (!(local->hw.flags & IEEE80211_HW_SUPPORT_FAST_XMIT)) + if (!ieee80211_hw_check(&local->hw, SUPPORT_FAST_XMIT)) return; /* Locking here protects both the pointer itself, and against concurrent @@ -2442,8 +2442,8 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) * cleared/changed already. */ spin_lock_bh(&sta->lock); - if (local->hw.flags & IEEE80211_HW_SUPPORTS_PS && - !(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) && + if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) && + !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS) && sdata->vif.type == NL80211_IFTYPE_STATION) goto out; @@ -2790,7 +2790,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, if (fast_tx->key) info->control.hw_key = &fast_tx->key->conf; - if (!(local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) { + if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { tx.skb = skb; r = ieee80211_tx_h_rate_ctrl(&tx); skb = tx.skb; @@ -3807,7 +3807,7 @@ int ieee80211_reserve_tid(struct ieee80211_sta *pubsta, u8 tid) synchronize_net(); /* Tear down BA sessions so we stop aggregating on this TID */ - if (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) { + if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION)) { set_sta_flag(sta, WLAN_STA_BLOCK_BA); __ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_LOCAL_REQUEST); @@ -3821,7 +3821,7 @@ int ieee80211_reserve_tid(struct ieee80211_sta *pubsta, u8 tid) ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_RESERVE_TID); - if (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) + if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION)) clear_sta_flag(sta, WLAN_STA_BLOCK_BA); ret = 0; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b864ebc6ab8f..43e5aadd7a89 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -564,7 +564,7 @@ ieee80211_get_vif_queues(struct ieee80211_local *local, { unsigned int queues; - if (sdata && local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) { + if (sdata && ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) { int ac; queues = 0; @@ -592,7 +592,7 @@ void __ieee80211_flush_queues(struct ieee80211_local *local, * If no queue was set, or if the HW doesn't support * IEEE80211_HW_QUEUE_CONTROL - flush all queues */ - if (!queues || !(local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)) + if (!queues || !ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) queues = ieee80211_get_vif_queues(local, sdata); ieee80211_stop_queues_by_reason(&local->hw, queues, @@ -2046,7 +2046,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) * about the sessions, but we and the AP still think they * are active. This is really a workaround though. */ - if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { + if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) { mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { -- cgit v1.2.3 From 37a9a8df8ce9de6ea73349c9ac8bdf6ba4ec4f70 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Wed, 10 Jun 2015 08:44:59 -0400 Subject: net/unix: support SCM_SECURITY for stream sockets SCM_SECURITY was originally only implemented for datagram sockets, not for stream sockets. However, SCM_CREDENTIALS is supported on Unix stream sockets. For consistency, implement Unix stream support for SCM_SECURITY as well. Also clean up the existing code and get rid of the superfluous UNIXSID macro. Motivated by https://bugzilla.redhat.com/show_bug.cgi?id=1224211, where systemd was using SCM_CREDENTIALS and assumed wrongly that SCM_SECURITY was also supported on Unix stream sockets. Signed-off-by: Stephen Smalley Acked-by: Paul Moore Signed-off-by: David S. Miller --- include/net/af_unix.h | 1 - net/unix/af_unix.c | 20 ++++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index a175ba4a7adb..4a167b30a12f 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -39,7 +39,6 @@ struct unix_skb_parms { }; #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) -#define UNIXSID(skb) (&UNIXCB((skb)).secid) #define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f25e1675b865..03ee4d359f6a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -140,12 +140,17 @@ static struct hlist_head *unix_sockets_unbound(void *addr) #ifdef CONFIG_SECURITY_NETWORK static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { - memcpy(UNIXSID(skb), &scm->secid, sizeof(u32)); + UNIXCB(skb).secid = scm->secid; } static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) { - scm->secid = *UNIXSID(skb); + scm->secid = UNIXCB(skb).secid; +} + +static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) +{ + return (scm->secid == UNIXCB(skb).secid); } #else static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) @@ -153,6 +158,11 @@ static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) { } + +static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) +{ + return true; +} #endif /* CONFIG_SECURITY_NETWORK */ /* @@ -1414,6 +1424,7 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen UNIXCB(skb).uid = scm->creds.uid; UNIXCB(skb).gid = scm->creds.gid; UNIXCB(skb).fp = NULL; + unix_get_secdata(scm, skb); if (scm->fp && send_fds) err = unix_attach_fds(scm, skb); @@ -1509,7 +1520,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, if (err < 0) goto out_free; max_level = err + 1; - unix_get_secdata(&scm, skb); skb_put(skb, len - data_len); skb->data_len = data_len; @@ -2118,11 +2128,13 @@ unlock: /* Never glue messages from different writers */ if ((UNIXCB(skb).pid != scm.pid) || !uid_eq(UNIXCB(skb).uid, scm.creds.uid) || - !gid_eq(UNIXCB(skb).gid, scm.creds.gid)) + !gid_eq(UNIXCB(skb).gid, scm.creds.gid) || + !unix_secdata_eq(&scm, skb)) break; } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); + unix_set_secdata(&scm, skb); check_creds = true; } -- cgit v1.2.3 From 4f822c625f9c68267ffee7519519e304858a46c3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 10 Jun 2015 18:07:57 -0700 Subject: net: phy: broadcom: include phy.h for brcmphy.h We utilize inline functions from the PHY library, make sure that we do include phy.h in brcmphy.h in order for the code including brcmphy.h not to have to resolve this inclusion dependency. Fixes: 705314797b8b ("net: phy: broadcom: move shadow 0x1C register accessors to brcmphy.h") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 656da2a12ffe..abb6106f839d 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -1,6 +1,8 @@ #ifndef _LINUX_BRCMPHY_H #define _LINUX_BRCMPHY_H +#include + #define PHY_ID_BCM50610 0x0143bd60 #define PHY_ID_BCM50610M 0x0143bd70 #define PHY_ID_BCM5241 0x0143bc30 -- cgit v1.2.3 From 8bc84b79265f66d5af736473db582e74b28e099d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 10 Jun 2015 18:07:58 -0700 Subject: net: phy: broadcom: define Broadcom pseudo-PHY address in brcmphy.h Define the pseudo-PHY address (30) which is used by all Broadcom Ethernet switches in a shared header file. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index abb6106f839d..697ca7795bd9 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -3,6 +3,11 @@ #include +/* All Broadcom Ethernet switches have a pseudo-PHY at address 30 which is used + * to configure the switch internal registers via MDIO accesses. + */ +#define BRCM_PSEUDO_PHY_ADDR 30 + #define PHY_ID_BCM50610 0x0143bd60 #define PHY_ID_BCM50610M 0x0143bd70 #define PHY_ID_BCM5241 0x0143bc30 -- cgit v1.2.3 From a4244b0cf58d56c171874e85228ba5deffeb017a Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 11 Jun 2015 10:28:16 +0300 Subject: net/ethtool: Add current supported tunable options Add strings array of the current supported tunable options. Signed-off-by: Hadar Hen Zion Reviewed-by: Amir Vadai Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 6 ++++++ net/core/ethtool.c | 12 ++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 0594933cdf55..cd67aec187d9 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -215,6 +215,11 @@ enum tunable_id { ETHTOOL_ID_UNSPEC, ETHTOOL_RX_COPYBREAK, ETHTOOL_TX_COPYBREAK, + /* + * Add your fresh new tubale attribute above and remember to update + * tunable_strings[] in net/core/ethtool.c + */ + __ETHTOOL_TUNABLE_COUNT, }; enum tunable_type_id { @@ -545,6 +550,7 @@ enum ethtool_stringset { ETH_SS_NTUPLE_FILTERS, ETH_SS_FEATURES, ETH_SS_RSS_HASH_FUNCS, + ETH_SS_TUNABLES, }; /** diff --git a/net/core/ethtool.c b/net/core/ethtool.c index eb0c3ace7458..b495ab1797fa 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -106,6 +106,13 @@ rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = { [ETH_RSS_HASH_XOR_BIT] = "xor", }; +static const char +tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = { + [ETHTOOL_ID_UNSPEC] = "Unspec", + [ETHTOOL_RX_COPYBREAK] = "rx-copybreak", + [ETHTOOL_TX_COPYBREAK] = "tx-copybreak", +}; + static int ethtool_get_features(struct net_device *dev, void __user *useraddr) { struct ethtool_gfeatures cmd = { @@ -194,6 +201,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset) if (sset == ETH_SS_RSS_HASH_FUNCS) return ARRAY_SIZE(rss_hash_func_strings); + if (sset == ETH_SS_TUNABLES) + return ARRAY_SIZE(tunable_strings); + if (ops->get_sset_count && ops->get_strings) return ops->get_sset_count(dev, sset); else @@ -211,6 +221,8 @@ static void __ethtool_get_strings(struct net_device *dev, else if (stringset == ETH_SS_RSS_HASH_FUNCS) memcpy(data, rss_hash_func_strings, sizeof(rss_hash_func_strings)); + else if (stringset == ETH_SS_TUNABLES) + memcpy(data, tunable_strings, sizeof(tunable_strings)); else /* ops->get_strings is valid because checked earlier */ ops->get_strings(dev, stringset, data); -- cgit v1.2.3 From 9961127d4bce6325e9a0b0fb105e0c85a6c62cb7 Mon Sep 17 00:00:00 2001 From: Vincent Cuissard Date: Thu, 11 Jun 2015 11:25:47 +0200 Subject: NFC: nci: add generic uart support Some NFC controller supports UART as host interface. As with SPI, a lot of code can be shared between vendor drivers. This patch add the generic support of UART and provides some extension API for vendor specific needs. This code is strongly inspired by the Bluetooth HCI ldisc implementation. NCI UART vendor drivers will have to register themselves to this layer via nci_uart_register. Underlying tty will have to be configured from user land thanks to an ioctl. Signed-off-by: Vincent Cuissard Signed-off-by: Samuel Ortiz --- include/net/nfc/nci.h | 1 + include/net/nfc/nci_core.h | 47 +++++ include/uapi/linux/tty.h | 1 + net/nfc/nci/Kconfig | 7 + net/nfc/nci/Makefile | 3 + net/nfc/nci/uart.c | 495 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 554 insertions(+) create mode 100644 net/nfc/nci/uart.c (limited to 'include') diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h index a2f2f3d3196d..75d2e1880059 100644 --- a/include/net/nfc/nci.h +++ b/include/net/nfc/nci.h @@ -35,6 +35,7 @@ #define NCI_MAX_NUM_RF_CONFIGS 10 #define NCI_MAX_NUM_CONN 10 #define NCI_MAX_PARAM_LEN 251 +#define NCI_MAX_PACKET_SIZE 258 /* NCI Status Codes */ #define NCI_STATUS_OK 0x00 diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 9d77ed556b78..01fc8c531115 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -391,4 +392,50 @@ int nci_spi_send(struct nci_spi *nspi, struct sk_buff *skb); struct sk_buff *nci_spi_read(struct nci_spi *nspi); +/* ----- NCI UART ---- */ + +/* Ioctl */ +#define NCIUARTSETDRIVER _IOW('U', 0, char *) + +enum nci_uart_driver { + NCI_UART_DRIVER_MARVELL = 0, + NCI_UART_DRIVER_MAX +}; + +struct nci_uart; + +struct nci_uart_ops { + int (*open)(struct nci_uart *nci_uart); + void (*close)(struct nci_uart *nci_uart); + int (*recv)(struct nci_uart *nci_uart, struct sk_buff *skb); + int (*recv_buf)(struct nci_uart *nci_uart, const u8 *data, char *flags, + int count); + int (*send)(struct nci_uart *nci_uart, struct sk_buff *skb); + void (*tx_start)(struct nci_uart *nci_uart); + void (*tx_done)(struct nci_uart *nci_uart); +}; + +struct nci_uart { + struct module *owner; + struct nci_uart_ops ops; + const char *name; + enum nci_uart_driver driver; + + /* Dynamic data */ + struct nci_dev *ndev; + spinlock_t rx_lock; + struct work_struct write_work; + struct tty_struct *tty; + unsigned long tx_state; + struct sk_buff_head tx_q; + struct sk_buff *tx_skb; + struct sk_buff *rx_skb; + int rx_packet_len; + void *drv_data; +}; + +int nci_uart_register(struct nci_uart *nu); +void nci_uart_unregister(struct nci_uart *nu); +void nci_uart_set_config(struct nci_uart *nu, int baudrate, int flow_ctrl); + #endif /* __NCI_CORE_H */ diff --git a/include/uapi/linux/tty.h b/include/uapi/linux/tty.h index dac199a2dba5..01c4410352ff 100644 --- a/include/uapi/linux/tty.h +++ b/include/uapi/linux/tty.h @@ -34,5 +34,6 @@ #define N_TI_WL 22 /* for TI's WL BT, FM, GPS combo chips */ #define N_TRACESINK 23 /* Trace data routing for MIPI P1149.7 */ #define N_TRACEROUTER 24 /* Trace data routing for MIPI P1149.7 */ +#define N_NCI 25 /* NFC NCI UART */ #endif /* _UAPI_LINUX_TTY_H */ diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig index a4f1e42e3481..901c1ddba841 100644 --- a/net/nfc/nci/Kconfig +++ b/net/nfc/nci/Kconfig @@ -19,3 +19,10 @@ config NFC_NCI_SPI an NFC Controller (NFCC) and a Device Host (DH). Say yes if you use an NCI driver that requires SPI link layer. + +config NFC_NCI_UART + depends on NFC_NCI && TTY + tristate "NCI over UART protocol support" + default n + help + Say yes if you use an NCI driver that requires UART link layer. diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile index 7ed8949266cc..b4b85b82e988 100644 --- a/net/nfc/nci/Makefile +++ b/net/nfc/nci/Makefile @@ -7,3 +7,6 @@ obj-$(CONFIG_NFC_NCI) += nci.o nci-objs := core.o data.o lib.o ntf.o rsp.o hci.o nci-$(CONFIG_NFC_NCI_SPI) += spi.o + +nci_uart-y += uart.o +obj-$(CONFIG_NFC_NCI_UART) += nci_uart.o diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c new file mode 100644 index 000000000000..70c279543074 --- /dev/null +++ b/net/nfc/nci/uart.c @@ -0,0 +1,495 @@ +/* + * Copyright (C) 2015, Marvell International Ltd. + * + * This software file (the "File") is distributed by Marvell International + * Ltd. under the terms of the GNU General Public License Version 2, June 1991 + * (the "License"). You may use, redistribute and/or modify this File in + * accordance with the terms and conditions of the License, a copy of which + * is available on the worldwide web at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. + * + * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE + * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE + * ARE EXPRESSLY DISCLAIMED. The License provides additional details about + * this warranty disclaimer. + + */ + +/* Inspired (hugely) by HCI LDISC implementation in Bluetooth. + * + * Copyright (C) 2000-2001 Qualcomm Incorporated + * Copyright (C) 2002-2003 Maxim Krasnyansky + * Copyright (C) 2004-2005 Marcel Holtmann + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* TX states */ +#define NCI_UART_SENDING 1 +#define NCI_UART_TX_WAKEUP 2 + +static struct nci_uart *nci_uart_drivers[NCI_UART_DRIVER_MAX]; + +static inline struct sk_buff *nci_uart_dequeue(struct nci_uart *nu) +{ + struct sk_buff *skb = nu->tx_skb; + + if (!skb) + skb = skb_dequeue(&nu->tx_q); + else + nu->tx_skb = NULL; + + return skb; +} + +static inline int nci_uart_queue_empty(struct nci_uart *nu) +{ + if (nu->tx_skb) + return 0; + + return skb_queue_empty(&nu->tx_q); +} + +static int nci_uart_tx_wakeup(struct nci_uart *nu) +{ + if (test_and_set_bit(NCI_UART_SENDING, &nu->tx_state)) { + set_bit(NCI_UART_TX_WAKEUP, &nu->tx_state); + return 0; + } + + schedule_work(&nu->write_work); + + return 0; +} + +static void nci_uart_write_work(struct work_struct *work) +{ + struct nci_uart *nu = container_of(work, struct nci_uart, write_work); + struct tty_struct *tty = nu->tty; + struct sk_buff *skb; + +restart: + clear_bit(NCI_UART_TX_WAKEUP, &nu->tx_state); + + if (nu->ops.tx_start) + nu->ops.tx_start(nu); + + while ((skb = nci_uart_dequeue(nu))) { + int len; + + set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); + len = tty->ops->write(tty, skb->data, skb->len); + skb_pull(skb, len); + if (skb->len) { + nu->tx_skb = skb; + break; + } + kfree_skb(skb); + } + + if (test_bit(NCI_UART_TX_WAKEUP, &nu->tx_state)) + goto restart; + + if (nu->ops.tx_done && nci_uart_queue_empty(nu)) + nu->ops.tx_done(nu); + + clear_bit(NCI_UART_SENDING, &nu->tx_state); +} + +static int nci_uart_set_driver(struct tty_struct *tty, unsigned int driver) +{ + struct nci_uart *nu = NULL; + int ret; + + if (driver >= NCI_UART_DRIVER_MAX) + return -EINVAL; + + if (!nci_uart_drivers[driver]) + return -ENOENT; + + nu = kzalloc(sizeof(*nu), GFP_KERNEL); + if (!nu) + return -ENOMEM; + + memcpy(nu, nci_uart_drivers[driver], sizeof(struct nci_uart)); + nu->tty = tty; + tty->disc_data = nu; + skb_queue_head_init(&nu->tx_q); + INIT_WORK(&nu->write_work, nci_uart_write_work); + spin_lock_init(&nu->rx_lock); + + ret = nu->ops.open(nu); + if (ret) { + tty->disc_data = NULL; + kfree(nu); + } else if (!try_module_get(nu->owner)) { + nu->ops.close(nu); + tty->disc_data = NULL; + kfree(nu); + return -ENOENT; + } + return ret; +} + +/* ------ LDISC part ------ */ + +/* nci_uart_tty_open + * + * Called when line discipline changed to NCI_UART. + * + * Arguments: + * tty pointer to tty info structure + * Return Value: + * 0 if success, otherwise error code + */ +static int nci_uart_tty_open(struct tty_struct *tty) +{ + /* Error if the tty has no write op instead of leaving an exploitable + * hole + */ + if (!tty->ops->write) + return -EOPNOTSUPP; + + tty->disc_data = NULL; + tty->receive_room = 65536; + + /* Flush any pending characters in the driver and line discipline. */ + + /* FIXME: why is this needed. Note don't use ldisc_ref here as the + * open path is before the ldisc is referencable. + */ + + if (tty->ldisc->ops->flush_buffer) + tty->ldisc->ops->flush_buffer(tty); + tty_driver_flush_buffer(tty); + + return 0; +} + +/* nci_uart_tty_close() + * + * Called when the line discipline is changed to something + * else, the tty is closed, or the tty detects a hangup. + */ +static void nci_uart_tty_close(struct tty_struct *tty) +{ + struct nci_uart *nu = (void *)tty->disc_data; + + /* Detach from the tty */ + tty->disc_data = NULL; + + if (!nu) + return; + + if (nu->tx_skb) + kfree_skb(nu->tx_skb); + if (nu->rx_skb) + kfree_skb(nu->rx_skb); + + skb_queue_purge(&nu->tx_q); + + nu->ops.close(nu); + nu->tty = NULL; + module_put(nu->owner); + + cancel_work_sync(&nu->write_work); + + kfree(nu); +} + +/* nci_uart_tty_wakeup() + * + * Callback for transmit wakeup. Called when low level + * device driver can accept more send data. + * + * Arguments: tty pointer to associated tty instance data + * Return Value: None + */ +static void nci_uart_tty_wakeup(struct tty_struct *tty) +{ + struct nci_uart *nu = (void *)tty->disc_data; + + if (!nu) + return; + + clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); + + if (tty != nu->tty) + return; + + nci_uart_tx_wakeup(nu); +} + +/* nci_uart_tty_receive() + * + * Called by tty low level driver when receive data is + * available. + * + * Arguments: tty pointer to tty isntance data + * data pointer to received data + * flags pointer to flags for data + * count count of received data in bytes + * + * Return Value: None + */ +static void nci_uart_tty_receive(struct tty_struct *tty, const u8 *data, + char *flags, int count) +{ + struct nci_uart *nu = (void *)tty->disc_data; + + if (!nu || tty != nu->tty) + return; + + spin_lock(&nu->rx_lock); + nu->ops.recv_buf(nu, (void *)data, flags, count); + spin_unlock(&nu->rx_lock); + + tty_unthrottle(tty); +} + +/* nci_uart_tty_ioctl() + * + * Process IOCTL system call for the tty device. + * + * Arguments: + * + * tty pointer to tty instance data + * file pointer to open file object for device + * cmd IOCTL command code + * arg argument for IOCTL call (cmd dependent) + * + * Return Value: Command dependent + */ +static int nci_uart_tty_ioctl(struct tty_struct *tty, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct nci_uart *nu = (void *)tty->disc_data; + int err = 0; + + switch (cmd) { + case NCIUARTSETDRIVER: + if (!nu) + return nci_uart_set_driver(tty, (unsigned int)arg); + else + return -EBUSY; + break; + default: + err = n_tty_ioctl_helper(tty, file, cmd, arg); + break; + } + + return err; +} + +/* We don't provide read/write/poll interface for user space. */ +static ssize_t nci_uart_tty_read(struct tty_struct *tty, struct file *file, + unsigned char __user *buf, size_t nr) +{ + return 0; +} + +static ssize_t nci_uart_tty_write(struct tty_struct *tty, struct file *file, + const unsigned char *data, size_t count) +{ + return 0; +} + +static unsigned int nci_uart_tty_poll(struct tty_struct *tty, + struct file *filp, poll_table *wait) +{ + return 0; +} + +static int nci_uart_send(struct nci_uart *nu, struct sk_buff *skb) +{ + /* Queue TX packet */ + skb_queue_tail(&nu->tx_q, skb); + + /* Try to start TX (if possible) */ + nci_uart_tx_wakeup(nu); + + return 0; +} + +/* -- Default recv_buf handler -- + * + * This handler supposes that NCI frames are sent over UART link without any + * framing. It reads NCI header, retrieve the packet size and once all packet + * bytes are received it passes it to nci_uart driver for processing. + */ +static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data, + char *flags, int count) +{ + int chunk_len; + + if (!nu->ndev) { + nfc_err(nu->tty->dev, + "receive data from tty but no NCI dev is attached yet, drop buffer\n"); + return 0; + } + + /* Decode all incoming data in packets + * and enqueue then for processing. + */ + while (count > 0) { + /* If this is the first data of a packet, allocate a buffer */ + if (!nu->rx_skb) { + nu->rx_packet_len = -1; + nu->rx_skb = nci_skb_alloc(nu->ndev, + NCI_MAX_PACKET_SIZE, + GFP_KERNEL); + if (!nu->rx_skb) + return -ENOMEM; + } + + /* Eat byte after byte till full packet header is received */ + if (nu->rx_skb->len < NCI_CTRL_HDR_SIZE) { + *skb_put(nu->rx_skb, 1) = *data++; + --count; + continue; + } + + /* Header was received but packet len was not read */ + if (nu->rx_packet_len < 0) + nu->rx_packet_len = NCI_CTRL_HDR_SIZE + + nci_plen(nu->rx_skb->data); + + /* Compute how many bytes are missing and how many bytes can + * be consumed. + */ + chunk_len = nu->rx_packet_len - nu->rx_skb->len; + if (count < chunk_len) + chunk_len = count; + memcpy(skb_put(nu->rx_skb, chunk_len), data, chunk_len); + data += chunk_len; + count -= chunk_len; + + /* Chcek if packet is fully received */ + if (nu->rx_packet_len == nu->rx_skb->len) { + /* Pass RX packet to driver */ + if (nu->ops.recv(nu, nu->rx_skb) != 0) + nfc_err(nu->tty->dev, "corrupted RX packet\n"); + /* Next packet will be a new one */ + nu->rx_skb = NULL; + } + } + + return 0; +} + +/* -- Default recv handler -- */ +static int nci_uart_default_recv(struct nci_uart *nu, struct sk_buff *skb) +{ + return nci_recv_frame(nu->ndev, skb); +} + +int nci_uart_register(struct nci_uart *nu) +{ + if (!nu || !nu->ops.open || + !nu->ops.recv || !nu->ops.close) + return -EINVAL; + + /* Set the send callback */ + nu->ops.send = nci_uart_send; + + /* Install default handlers if not overridden */ + if (!nu->ops.recv_buf) + nu->ops.recv_buf = nci_uart_default_recv_buf; + if (!nu->ops.recv) + nu->ops.recv = nci_uart_default_recv; + + /* Add this driver in the driver list */ + if (!nci_uart_drivers[nu->driver]) { + pr_err("driver %d is already registered\n", nu->driver); + return -EBUSY; + } + nci_uart_drivers[nu->driver] = nu; + + pr_info("NCI uart driver '%s [%d]' registered\n", nu->name, nu->driver); + + return 0; +} +EXPORT_SYMBOL_GPL(nci_uart_register); + +void nci_uart_unregister(struct nci_uart *nu) +{ + pr_info("NCI uart driver '%s [%d]' unregistered\n", nu->name, + nu->driver); + + /* Remove this driver from the driver list */ + nci_uart_drivers[nu->driver] = NULL; +} +EXPORT_SYMBOL_GPL(nci_uart_unregister); + +void nci_uart_set_config(struct nci_uart *nu, int baudrate, int flow_ctrl) +{ + struct ktermios new_termios; + + if (!nu->tty) + return; + + down_read(&nu->tty->termios_rwsem); + new_termios = nu->tty->termios; + up_read(&nu->tty->termios_rwsem); + tty_termios_encode_baud_rate(&new_termios, baudrate, baudrate); + + if (flow_ctrl) + new_termios.c_cflag |= CRTSCTS; + else + new_termios.c_cflag &= ~CRTSCTS; + + tty_set_termios(nu->tty, &new_termios); +} +EXPORT_SYMBOL_GPL(nci_uart_set_config); + +static struct tty_ldisc_ops nci_uart_ldisc = { + .magic = TTY_LDISC_MAGIC, + .owner = THIS_MODULE, + .name = "n_nci", + .open = nci_uart_tty_open, + .close = nci_uart_tty_close, + .read = nci_uart_tty_read, + .write = nci_uart_tty_write, + .poll = nci_uart_tty_poll, + .receive_buf = nci_uart_tty_receive, + .write_wakeup = nci_uart_tty_wakeup, + .ioctl = nci_uart_tty_ioctl, +}; + +static int __init nci_uart_init(void) +{ + memset(nci_uart_drivers, 0, sizeof(nci_uart_drivers)); + return tty_register_ldisc(N_NCI, &nci_uart_ldisc); +} + +static void __exit nci_uart_exit(void) +{ + tty_unregister_ldisc(N_NCI); +} + +module_init(nci_uart_init); +module_exit(nci_uart_exit); + +MODULE_AUTHOR("Marvell International Ltd."); +MODULE_DESCRIPTION("NFC NCI UART driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_LDISC(N_NCI); -- cgit v1.2.3 From dc14bdef8762a8098b1da881b611d722e24fe787 Mon Sep 17 00:00:00 2001 From: Vincent Cuissard Date: Thu, 11 Jun 2015 14:00:19 +0200 Subject: NFC: nfcmrvl: add platform_data and DT configuration Declare nfcmrvl platform_data structure and few DT parameters for nfcmrvl driver. Signed-off-by: Vincent Cuissard Signed-off-by: Samuel Ortiz --- drivers/nfc/nfcmrvl/main.c | 66 +++++++++++++++++++++++++++-------- drivers/nfc/nfcmrvl/nfcmrvl.h | 36 +++++++++++++------ drivers/nfc/nfcmrvl/usb.c | 6 +++- include/linux/platform_data/nfcmrvl.h | 31 ++++++++++++++++ 4 files changed, 113 insertions(+), 26 deletions(-) create mode 100644 include/linux/platform_data/nfcmrvl.h (limited to 'include') diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c index 708aad28c567..e317a69a4560 100644 --- a/drivers/nfc/nfcmrvl/main.c +++ b/drivers/nfc/nfcmrvl/main.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -65,7 +66,7 @@ static int nfcmrvl_nci_send(struct nci_dev *ndev, struct sk_buff *skb) if (!test_bit(NFCMRVL_NCI_RUNNING, &priv->flags)) return -EBUSY; - if (priv->hci_muxed) { + if (priv->config.hci_muxed) { unsigned char *hdr; unsigned char len = skb->len; @@ -92,9 +93,9 @@ static struct nci_ops nfcmrvl_nci_ops = { }; struct nfcmrvl_private *nfcmrvl_nci_register_dev(void *drv_data, - struct nfcmrvl_if_ops *ops, - struct device *dev, - unsigned int flags) + struct nfcmrvl_if_ops *ops, + struct device *dev, + struct nfcmrvl_platform_data *pdata) { struct nfcmrvl_private *priv; int rc; @@ -108,23 +109,24 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(void *drv_data, priv->drv_data = drv_data; priv->if_ops = ops; priv->dev = dev; - priv->hci_muxed = (flags & NFCMRVL_DEV_FLAG_HCI_MUXED) ? 1 : 0; - priv->reset_n_io = NFCMRVL_DEV_FLAG_GET_RESET_N_IO(flags); - if (priv->reset_n_io) { + memcpy(&priv->config, pdata, sizeof(*pdata)); + + if (priv->config.reset_n_io) { rc = devm_gpio_request_one(dev, - priv->reset_n_io, + priv->config.reset_n_io, GPIOF_OUT_INIT_LOW, "nfcmrvl_reset_n"); if (rc < 0) nfc_err(dev, "failed to request reset_n io\n"); } - if (priv->hci_muxed) + if (priv->config.hci_muxed) headroom = NFCMRVL_HCI_EVENT_HEADER_SIZE; protocols = NFC_PROTO_JEWEL_MASK - | NFC_PROTO_MIFARE_MASK | NFC_PROTO_FELICA_MASK + | NFC_PROTO_MIFARE_MASK + | NFC_PROTO_FELICA_MASK | NFC_PROTO_ISO14443_MASK | NFC_PROTO_ISO14443_B_MASK | NFC_PROTO_NFC_DEP_MASK; @@ -169,7 +171,7 @@ EXPORT_SYMBOL_GPL(nfcmrvl_nci_unregister_dev); int nfcmrvl_nci_recv_frame(struct nfcmrvl_private *priv, struct sk_buff *skb) { - if (priv->hci_muxed) { + if (priv->config.hci_muxed) { if (skb->data[0] == NFCMRVL_HCI_EVENT_CODE && skb->data[1] == NFCMRVL_HCI_NFC_EVENT_CODE) { /* Data packet, let's extract NCI payload */ @@ -200,15 +202,51 @@ void nfcmrvl_chip_reset(struct nfcmrvl_private *priv) * To be improved. */ - if (priv->reset_n_io) { + if (priv->config.reset_n_io) { nfc_info(priv->dev, "reset the chip\n"); - gpio_set_value(priv->reset_n_io, 0); + gpio_set_value(priv->config.reset_n_io, 0); usleep_range(5000, 10000); - gpio_set_value(priv->reset_n_io, 1); + gpio_set_value(priv->config.reset_n_io, 1); } else nfc_info(priv->dev, "no reset available on this interface\n"); } +#ifdef CONFIG_OF + +int nfcmrvl_parse_dt(struct device_node *node, + struct nfcmrvl_platform_data *pdata) +{ + int reset_n_io; + + reset_n_io = of_get_named_gpio(node, "reset-n-io", 0); + if (reset_n_io < 0) { + pr_info("no reset-n-io config\n"); + reset_n_io = 0; + } else if (!gpio_is_valid(reset_n_io)) { + pr_err("invalid reset-n-io GPIO\n"); + return reset_n_io; + } + pdata->reset_n_io = reset_n_io; + + if (of_find_property(node, "hci-muxed", NULL)) + pdata->hci_muxed = 1; + else + pdata->hci_muxed = 0; + + return 0; +} + +#else + +int nfcmrvl_parse_dt(struct device_node *node, + struct nfcmrvl_platform_data *pdata) +{ + return -ENODEV; +} + +#endif +EXPORT_SYMBOL_GPL(nfcmrvl_parse_dt); + MODULE_AUTHOR("Marvell International Ltd."); MODULE_DESCRIPTION("Marvell NFC driver ver " VERSION); MODULE_VERSION(VERSION); diff --git a/drivers/nfc/nfcmrvl/nfcmrvl.h b/drivers/nfc/nfcmrvl/nfcmrvl.h index 2edae9a4b6bd..214412bd6110 100644 --- a/drivers/nfc/nfcmrvl/nfcmrvl.h +++ b/drivers/nfc/nfcmrvl/nfcmrvl.h @@ -16,6 +16,11 @@ * this warranty disclaimer. **/ +#ifndef _NFCMRVL_H_ +#define _NFCMRVL_H_ + +#include + /* Define private flags: */ #define NFCMRVL_NCI_RUNNING 1 @@ -38,22 +43,25 @@ #define NFCMRVL_HCI_OGF 0x81 #define NFCMRVL_HCI_OCF 0xFE -#define NFCMRVL_DEV_FLAG_HCI_MUXED (1 << 0) -#define NFCMRVL_DEV_FLAG_SET_RESET_N_IO(X) ((X) << 16) -#define NFCMRVL_DEV_FLAG_GET_RESET_N_IO(X) ((X) >> 16) struct nfcmrvl_private { - /* Tell if NCI packets are encapsulated in HCI ones */ - int hci_muxed; + unsigned long flags; + + /* Platform configuration */ + struct nfcmrvl_platform_data config; + struct nci_dev *ndev; - /* Reset IO (0 if not available) */ - int reset_n_io; + /* + ** PHY related information + */ - unsigned long flags; + /* PHY driver context */ void *drv_data; + /* PHY device */ struct device *dev; + /* Low level driver ops */ struct nfcmrvl_if_ops *if_ops; }; @@ -66,8 +74,14 @@ struct nfcmrvl_if_ops { void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv); int nfcmrvl_nci_recv_frame(struct nfcmrvl_private *priv, struct sk_buff *skb); struct nfcmrvl_private *nfcmrvl_nci_register_dev(void *drv_data, - struct nfcmrvl_if_ops *ops, - struct device *dev, - unsigned int flags); + struct nfcmrvl_if_ops *ops, + struct device *dev, + struct nfcmrvl_platform_data *pdata); + void nfcmrvl_chip_reset(struct nfcmrvl_private *priv); + +int nfcmrvl_parse_dt(struct device_node *node, + struct nfcmrvl_platform_data *pdata); + +#endif diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c index c4046b681bfa..aa3f3c1cff0a 100644 --- a/drivers/nfc/nfcmrvl/usb.c +++ b/drivers/nfc/nfcmrvl/usb.c @@ -302,6 +302,10 @@ static int nfcmrvl_probe(struct usb_interface *intf, struct nfcmrvl_private *priv; int i; struct usb_device *udev = interface_to_usbdev(intf); + struct nfcmrvl_platform_data config; + + /* No configuration for USB */ + memset(&config, 0, sizeof(config)); nfc_info(&udev->dev, "intf %p id %p\n", intf, id); @@ -339,7 +343,7 @@ static int nfcmrvl_probe(struct usb_interface *intf, init_usb_anchor(&drv_data->deferred); priv = nfcmrvl_nci_register_dev(drv_data, &usb_ops, - &drv_data->udev->dev, 0); + &drv_data->udev->dev, &config); if (IS_ERR(priv)) return PTR_ERR(priv); diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h new file mode 100644 index 000000000000..106cfe5ed589 --- /dev/null +++ b/include/linux/platform_data/nfcmrvl.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2015, Marvell International Ltd. + * + * This software file (the "File") is distributed by Marvell International + * Ltd. under the terms of the GNU General Public License Version 2, June 1991 + * (the "License"). You may use, redistribute and/or modify this File in + * accordance with the terms and conditions of the License, a copy of which + * is available on the worldwide web at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. + * + * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE + * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE + * ARE EXPRESSLY DISCLAIMED. The License provides additional details about + * this warranty disclaimer. + */ + +#ifndef _NFCMRVL_PTF_H_ +#define _NFCMRVL_PTF_H_ + +struct nfcmrvl_platform_data { + /* + * Generic + */ + + /* GPIO that is wired to RESET_N signal */ + unsigned int reset_n_io; + /* Tell if transport is muxed in HCI one */ + unsigned int hci_muxed; +}; + +#endif /* _NFCMRVL_PTF_H_ */ -- cgit v1.2.3 From e097dc624f784debbde49701a493bf920bc422c7 Mon Sep 17 00:00:00 2001 From: Vincent Cuissard Date: Thu, 11 Jun 2015 14:00:20 +0200 Subject: NFC: nfcmrvl: add UART driver Add support of Marvell NFC chip controlled over UART Signed-off-by: Vincent Cuissard Signed-off-by: Samuel Ortiz --- .../devicetree/bindings/net/nfc/nfcmrvl.txt | 29 +++ drivers/nfc/nfcmrvl/Kconfig | 11 + drivers/nfc/nfcmrvl/Makefile | 3 + drivers/nfc/nfcmrvl/nfcmrvl.h | 7 + drivers/nfc/nfcmrvl/uart.c | 225 +++++++++++++++++++++ include/linux/platform_data/nfcmrvl.h | 9 + net/nfc/nci/uart.c | 1 - 7 files changed, 284 insertions(+), 1 deletion(-) create mode 100644 Documentation/devicetree/bindings/net/nfc/nfcmrvl.txt create mode 100644 drivers/nfc/nfcmrvl/uart.c (limited to 'include') diff --git a/Documentation/devicetree/bindings/net/nfc/nfcmrvl.txt b/Documentation/devicetree/bindings/net/nfc/nfcmrvl.txt new file mode 100644 index 000000000000..7c4a0cc370cf --- /dev/null +++ b/Documentation/devicetree/bindings/net/nfc/nfcmrvl.txt @@ -0,0 +1,29 @@ +* Marvell International Ltd. NCI NFC Controller + +Required properties: +- compatible: Should be "mrvl,nfc-uart". + +Optional SoC specific properties: +- pinctrl-names: Contains only one value - "default". +- pintctrl-0: Specifies the pin control groups used for this controller. +- reset-n-io: Output GPIO pin used to reset the chip (active low). +- hci-muxed: Specifies that the chip is muxing NCI over HCI frames. + +Optional UART-based chip specific properties: +- flow-control: Specifies that the chip is using RTS/CTS. +- break-control: Specifies that the chip needs specific break management. + +Example (for ARM-based BeagleBoard Black with 88W8887 on UART5): + +&uart5 { + status = "okay"; + + nfcmrvluart: nfcmrvluart@5 { + compatible = "mrvl,nfc-uart"; + + reset-n-io = <&gpio3 16 0>; + + hci-muxed; + flow-control; + } +}; diff --git a/drivers/nfc/nfcmrvl/Kconfig b/drivers/nfc/nfcmrvl/Kconfig index 5e18afd9abe2..796be2411440 100644 --- a/drivers/nfc/nfcmrvl/Kconfig +++ b/drivers/nfc/nfcmrvl/Kconfig @@ -21,3 +21,14 @@ config NFC_MRVL_USB Say Y here to compile support for Marvell NFC-over-USB driver into the kernel or say M to compile it as module. + +config NFC_MRVL_UART + tristate "Marvell NFC-over-UART driver" + depends on NFC_MRVL && NFC_NCI_UART + help + Marvell NFC-over-UART driver. + + This driver provides support for Marvell NFC-over-UART devices + + Say Y here to compile support for Marvell NFC-over-UART driver + into the kernel or say M to compile it as module. diff --git a/drivers/nfc/nfcmrvl/Makefile b/drivers/nfc/nfcmrvl/Makefile index 97a0de72dc01..775196274d1f 100644 --- a/drivers/nfc/nfcmrvl/Makefile +++ b/drivers/nfc/nfcmrvl/Makefile @@ -7,3 +7,6 @@ obj-$(CONFIG_NFC_MRVL) += nfcmrvl.o nfcmrvl_usb-y += usb.o obj-$(CONFIG_NFC_MRVL_USB) += nfcmrvl_usb.o + +nfcmrvl_uart-y += uart.o +obj-$(CONFIG_NFC_MRVL_UART) += nfcmrvl_uart.o diff --git a/drivers/nfc/nfcmrvl/nfcmrvl.h b/drivers/nfc/nfcmrvl/nfcmrvl.h index 214412bd6110..09780d57c9b8 100644 --- a/drivers/nfc/nfcmrvl/nfcmrvl.h +++ b/drivers/nfc/nfcmrvl/nfcmrvl.h @@ -43,6 +43,11 @@ #define NFCMRVL_HCI_OGF 0x81 #define NFCMRVL_HCI_OCF 0xFE +enum nfcmrvl_phy { + NFCMRVL_PHY_USB = 0, + NFCMRVL_PHY_UART = 1, +}; + struct nfcmrvl_private { @@ -61,6 +66,8 @@ struct nfcmrvl_private { void *drv_data; /* PHY device */ struct device *dev; + /* PHY type */ + enum nfcmrvl_phy phy; /* Low level driver ops */ struct nfcmrvl_if_ops *if_ops; }; diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c new file mode 100644 index 000000000000..61442d6528a6 --- /dev/null +++ b/drivers/nfc/nfcmrvl/uart.c @@ -0,0 +1,225 @@ +/** + * Marvell NFC-over-UART driver + * + * Copyright (C) 2015, Marvell International Ltd. + * + * This software file (the "File") is distributed by Marvell International + * Ltd. under the terms of the GNU General Public License Version 2, June 1991 + * (the "License"). You may use, redistribute and/or modify this File in + * accordance with the terms and conditions of the License, a copy of which + * is available on the worldwide web at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. + * + * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE + * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE + * ARE EXPRESSLY DISCLAIMED. The License provides additional details about + * this warranty disclaimer. + */ + +#include +#include +#include +#include +#include +#include "nfcmrvl.h" + +static unsigned int hci_muxed; +static unsigned int flow_control; +static unsigned int break_control; +static unsigned int reset_n_io; + +/* +** NFCMRVL NCI OPS +*/ + +static int nfcmrvl_uart_nci_open(struct nfcmrvl_private *priv) +{ + return 0; +} + +static int nfcmrvl_uart_nci_close(struct nfcmrvl_private *priv) +{ + return 0; +} + +static int nfcmrvl_uart_nci_send(struct nfcmrvl_private *priv, + struct sk_buff *skb) +{ + struct nci_uart *nu = priv->drv_data; + + return nu->ops.send(nu, skb); +} + +static struct nfcmrvl_if_ops uart_ops = { + .nci_open = nfcmrvl_uart_nci_open, + .nci_close = nfcmrvl_uart_nci_close, + .nci_send = nfcmrvl_uart_nci_send, +}; + +#ifdef CONFIG_OF + +static int nfcmrvl_uart_parse_dt(struct device_node *node, + struct nfcmrvl_platform_data *pdata) +{ + struct device_node *matched_node; + int ret; + + matched_node = of_find_compatible_node(node, NULL, "mrvl,nfc-uart"); + if (!matched_node) + return -ENODEV; + + ret = nfcmrvl_parse_dt(matched_node, pdata); + if (ret < 0) { + pr_err("Failed to get generic entries\n"); + return ret; + } + + if (of_find_property(matched_node, "flow-control", NULL)) + pdata->flow_control = 1; + else + pdata->flow_control = 0; + + if (of_find_property(matched_node, "break-control", NULL)) + pdata->break_control = 1; + else + pdata->break_control = 0; + + return 0; +} + +#else + +static int nfcmrvl_uart_parse_dt(struct device_node *node, + struct nfcmrvl_platform_data *pdata) +{ + return -ENODEV; +} + +#endif + +/* +** NCI UART OPS +*/ + +static int nfcmrvl_nci_uart_open(struct nci_uart *nu) +{ + struct nfcmrvl_private *priv; + struct nfcmrvl_platform_data *pdata = NULL; + struct nfcmrvl_platform_data config; + + /* + * Platform data cannot be used here since usually it is already used + * by low level serial driver. We can try to retrieve serial device + * and check if DT entries were added. + */ + + if (nu->tty->dev->parent && nu->tty->dev->parent->of_node) + if (nfcmrvl_uart_parse_dt(nu->tty->dev->parent->of_node, + &config) == 0) + pdata = &config; + + if (!pdata) { + pr_info("No platform data / DT -> fallback to module params\n"); + config.hci_muxed = hci_muxed; + config.reset_n_io = reset_n_io; + config.flow_control = flow_control; + config.break_control = break_control; + pdata = &config; + } + + priv = nfcmrvl_nci_register_dev(nu, &uart_ops, nu->tty->dev, pdata); + if (IS_ERR(priv)) + return PTR_ERR(priv); + + priv->phy = NFCMRVL_PHY_UART; + + nu->drv_data = priv; + nu->ndev = priv->ndev; + + /* Set BREAK */ + if (priv->config.break_control && nu->tty->ops->break_ctl) + nu->tty->ops->break_ctl(nu->tty, -1); + + return 0; +} + +static void nfcmrvl_nci_uart_close(struct nci_uart *nu) +{ + nfcmrvl_nci_unregister_dev((struct nfcmrvl_private *)nu->drv_data); +} + +static int nfcmrvl_nci_uart_recv(struct nci_uart *nu, struct sk_buff *skb) +{ + return nfcmrvl_nci_recv_frame((struct nfcmrvl_private *)nu->drv_data, + skb); +} + +static void nfcmrvl_nci_uart_tx_start(struct nci_uart *nu) +{ + struct nfcmrvl_private *priv = (struct nfcmrvl_private *)nu->drv_data; + + /* Remove BREAK to wake up the NFCC */ + if (priv->config.break_control && nu->tty->ops->break_ctl) { + nu->tty->ops->break_ctl(nu->tty, 0); + usleep_range(3000, 5000); + } +} + +static void nfcmrvl_nci_uart_tx_done(struct nci_uart *nu) +{ + struct nfcmrvl_private *priv = (struct nfcmrvl_private *)nu->drv_data; + + /* + ** To ensure that if the NFCC goes in DEEP SLEEP sate we can wake him + ** up. we set BREAK. Once we will be ready to send again we will remove + ** it. + */ + if (priv->config.break_control && nu->tty->ops->break_ctl) + nu->tty->ops->break_ctl(nu->tty, -1); +} + +static struct nci_uart nfcmrvl_nci_uart = { + .owner = THIS_MODULE, + .name = "nfcmrvl_uart", + .driver = NCI_UART_DRIVER_MARVELL, + .ops = { + .open = nfcmrvl_nci_uart_open, + .close = nfcmrvl_nci_uart_close, + .recv = nfcmrvl_nci_uart_recv, + .tx_start = nfcmrvl_nci_uart_tx_start, + .tx_done = nfcmrvl_nci_uart_tx_done, + } +}; + +/* +** Module init +*/ + +static int nfcmrvl_uart_init_module(void) +{ + return nci_uart_register(&nfcmrvl_nci_uart); +} + +static void nfcmrvl_uart_exit_module(void) +{ + nci_uart_unregister(&nfcmrvl_nci_uart); +} + +module_init(nfcmrvl_uart_init_module); +module_exit(nfcmrvl_uart_exit_module); + +MODULE_AUTHOR("Marvell International Ltd."); +MODULE_DESCRIPTION("Marvell NFC-over-UART"); +MODULE_LICENSE("GPL v2"); + +module_param(flow_control, uint, 0); +MODULE_PARM_DESC(flow_control, "Tell if UART needs flow control at init."); + +module_param(break_control, uint, 0); +MODULE_PARM_DESC(break_control, "Tell if UART driver must drive break signal."); + +module_param(hci_muxed, uint, 0); +MODULE_PARM_DESC(hci_muxed, "Tell if transport is muxed in HCI one."); + +module_param(reset_n_io, uint, 0); +MODULE_PARM_DESC(reset_n_io, "GPIO that is wired to RESET_N signal."); diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h index 106cfe5ed589..ac91707dabcb 100644 --- a/include/linux/platform_data/nfcmrvl.h +++ b/include/linux/platform_data/nfcmrvl.h @@ -26,6 +26,15 @@ struct nfcmrvl_platform_data { unsigned int reset_n_io; /* Tell if transport is muxed in HCI one */ unsigned int hci_muxed; + + /* + * UART specific + */ + + /* Tell if UART needs flow control at init */ + unsigned int flow_control; + /* Tell if firmware supports break control for power management */ + unsigned int break_control; }; #endif /* _NFCMRVL_PTF_H_ */ diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index 70c279543074..2c48810af5bb 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -12,7 +12,6 @@ * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE * ARE EXPRESSLY DISCLAIMED. The License provides additional details about * this warranty disclaimer. - */ /* Inspired (hugely) by HCI LDISC implementation in Bluetooth. -- cgit v1.2.3 From facc9699f0fe7d65a92cc09e175662659306066d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 11 Jun 2015 14:47:27 +0300 Subject: net/mlx5e: Fix HW MTU settings Previously we configured HW MTU to be netdev->mtu, actually we need to configure netdev->mtu + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN). Also, query MTU can not fail, hence make the relevant helper a void functionm, add mlx5e_set_dev_port_mtu, helper function to handle MTU setting. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/main.c | 8 +-- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 61 ++++++++++++----------- drivers/net/ethernet/mellanox/mlx5/core/port.c | 36 ++++++------- include/linux/mlx5/driver.h | 10 ++-- 5 files changed, 55 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d4dea86052d6..79dadd627e9c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -446,15 +446,11 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, if (err) goto out; - err = mlx5_query_port_max_mtu(mdev, &max_mtu, port); - if (err) - goto out; + mlx5_query_port_max_mtu(mdev, &max_mtu, port); props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu); - err = mlx5_query_port_oper_mtu(mdev, &oper_mtu, port); - if (err) - goto out; + mlx5_query_port_oper_mtu(mdev, &oper_mtu, port); props->active_mtu = mlx5_mtu_to_ib_mtu(oper_mtu); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e9edb7210de1..71f38bb5dbfc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -57,7 +57,6 @@ #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 #define MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ 0x7 -#define MLX5E_PARAMS_MIN_MTU 46 #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 075e5175e779..22b2665e0328 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -277,6 +277,9 @@ static void mlx5e_send_nop(struct mlx5e_sq *sq) mlx5e_tx_notify_hw(sq, wqe); } +#define MLX5E_HW2SW_MTU(hwmtu) (hwmtu - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) +#define MLX5E_SW2HW_MTU(swmtu) (swmtu + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) + static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) @@ -305,7 +308,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, } rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : - priv->netdev->mtu + ETH_HLEN + VLAN_HLEN; + MLX5E_SW2HW_MTU(priv->netdev->mtu); for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); @@ -1367,11 +1370,30 @@ static void mlx5e_close_tirs(struct mlx5e_priv *priv) mlx5e_close_tir(priv, i); } -int mlx5e_open_locked(struct net_device *netdev) +static int mlx5e_set_dev_port_mtu(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - int actual_mtu; + int hw_mtu; + int err; + + err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(netdev->mtu), 1); + if (err) + return err; + + mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1); + + if (MLX5E_HW2SW_MTU(hw_mtu) != netdev->mtu) + netdev_warn(netdev, "%s: Port MTU %d is different than netdev mtu %d\n", + __func__, MLX5E_HW2SW_MTU(hw_mtu), netdev->mtu); + + netdev->mtu = MLX5E_HW2SW_MTU(hw_mtu); + return 0; +} + +int mlx5e_open_locked(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); int num_txqs; int err; @@ -1380,25 +1402,9 @@ int mlx5e_open_locked(struct net_device *netdev) netif_set_real_num_tx_queues(netdev, num_txqs); netif_set_real_num_rx_queues(netdev, priv->params.num_channels); - err = mlx5_set_port_mtu(mdev, netdev->mtu); - if (err) { - netdev_err(netdev, "%s: mlx5_set_port_mtu failed %d\n", - __func__, err); + err = mlx5e_set_dev_port_mtu(netdev); + if (err) return err; - } - - err = mlx5_query_port_oper_mtu(mdev, &actual_mtu, 1); - if (err) { - netdev_err(netdev, "%s: mlx5_query_port_oper_mtu failed %d\n", - __func__, err); - return err; - } - - if (actual_mtu != netdev->mtu) - netdev_warn(netdev, "%s: Failed to set MTU to %d\n", - __func__, netdev->mtu); - - netdev->mtu = actual_mtu; err = mlx5e_open_tises(priv); if (err) { @@ -1613,15 +1619,14 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; int max_mtu; - int err = 0; + int err; - err = mlx5_query_port_max_mtu(mdev, &max_mtu, 1); - if (err) - return err; + mlx5_query_port_max_mtu(mdev, &max_mtu, 1); - if (new_mtu > max_mtu || new_mtu < MLX5E_PARAMS_MIN_MTU) { - netdev_err(netdev, "%s: Bad MTU size, mtu must be [%d-%d]\n", - __func__, MLX5E_PARAMS_MIN_MTU, max_mtu); + if (new_mtu > max_mtu) { + netdev_err(netdev, + "%s: Bad MTU (%d) > (%d) Max\n", + __func__, new_mtu, max_mtu); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 619d3baf19ea..70147999f657 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -248,22 +248,18 @@ int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status) return err; } -static int mlx5_query_port_mtu(struct mlx5_core_dev *dev, - int *admin_mtu, int *max_mtu, int *oper_mtu, - u8 local_port) +static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, int *admin_mtu, + int *max_mtu, int *oper_mtu, u8 port) { u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; - int err; memset(in, 0, sizeof(in)); - MLX5_SET(pmtu_reg, in, local_port, local_port); + MLX5_SET(pmtu_reg, in, local_port, port); - err = mlx5_core_access_reg(dev, in, sizeof(in), out, - sizeof(out), MLX5_REG_PMTU, 0, 0); - if (err) - return err; + mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PMTU, 0, 0); if (max_mtu) *max_mtu = MLX5_GET(pmtu_reg, out, max_mtu); @@ -271,11 +267,9 @@ static int mlx5_query_port_mtu(struct mlx5_core_dev *dev, *oper_mtu = MLX5_GET(pmtu_reg, out, oper_mtu); if (admin_mtu) *admin_mtu = MLX5_GET(pmtu_reg, out, admin_mtu); - - return 0; } -int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu) +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port) { u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; @@ -283,24 +277,24 @@ int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu) memset(in, 0, sizeof(in)); MLX5_SET(pmtu_reg, in, admin_mtu, mtu); - MLX5_SET(pmtu_reg, in, local_port, 1); + MLX5_SET(pmtu_reg, in, local_port, port); - return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), - MLX5_REG_PMTU, 0, 1); + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PMTU, 0, 1); } EXPORT_SYMBOL_GPL(mlx5_set_port_mtu); -int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, - u8 local_port) +void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, + u8 port) { - return mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, local_port); + mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, port); } EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu); -int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, - u8 local_port) +void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, + u8 port) { - return mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, local_port); + mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, port); } EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6093bde16b94..c0930f8d7021 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -756,11 +756,11 @@ int mlx5_set_port_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status); -int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu); -int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, - u8 local_port); -int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, - u8 local_port); +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port); +void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port); +void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, + u8 port); + int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, u8 *vl_hw_cap, u8 local_port); -- cgit v1.2.3 From fc11fbf9a785b25c5d07f05a30d4169ec39818da Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 11 Jun 2015 14:47:28 +0300 Subject: net/mlx5e: Add HW cacheline start padding Enable HW cacheline start padding and align RX WQE size to cacheline while considering HW start padding. Also, fix dma_unmap call to use the correct SKB data buffer size. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 ++++- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 10 +++++----- include/linux/mlx5/device.h | 4 ++++ 3 files changed, 13 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 22b2665e0328..1c62af69ca29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -309,12 +309,15 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : MLX5E_SW2HW_MTU(priv->netdev->mtu); + rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz + MLX5E_NET_IP_ALIGN); for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); + u32 byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN; wqe->data.lkey = c->mkey_be; - wqe->data.byte_count = cpu_to_be32(rq->wqe_sz); + wqe->data.byte_count = + cpu_to_be32(byte_count | MLX5_HW_START_PADDING); } rq->pdev = c->pdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index ce1317cdabd7..06e7c744ed4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -45,18 +45,18 @@ static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, if (unlikely(!skb)) return -ENOMEM; - skb_reserve(skb, MLX5E_NET_IP_ALIGN); - dma_addr = dma_map_single(rq->pdev, /* hw start padding */ - skb->data - MLX5E_NET_IP_ALIGN, - /* hw end padding */ + skb->data, + /* hw end padding */ rq->wqe_sz, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(rq->pdev, dma_addr))) goto err_free_skb; + skb_reserve(skb, MLX5E_NET_IP_ALIGN); + *((dma_addr_t *)skb->cb) = dma_addr; wqe->data.addr = cpu_to_be64(dma_addr + MLX5E_NET_IP_ALIGN); @@ -217,7 +217,7 @@ bool mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) dma_unmap_single(rq->pdev, *((dma_addr_t *)skb->cb), - skb_end_offset(skb), + rq->wqe_sz, DMA_FROM_DEVICE); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index b2c43508a737..b943cd9e2097 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -131,6 +131,10 @@ enum { MLX5_INLINE_SEG = 0x80000000, }; +enum { + MLX5_HW_START_PADDING = MLX5_INLINE_SEG, +}; + enum { MLX5_MIN_PKEY_TABLE_SIZE = 128, MLX5_MAX_LOG_PKEY_TABLE = 5, -- cgit v1.2.3 From f69ad292cfd13aa7ee00847320c6bb9ba2154e87 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Jun 2015 09:15:18 -0700 Subject: tcp: fill shinfo->gso_size at last moment In commit cd7d8498c9a5 ("tcp: change tcp_skb_pcount() location") we stored gso_segs in a temporary cache hot location. This patch does the same for gso_size. This allows to save 2 cache line misses in tcp xmit path for the last packet that is considered but not sent because of various conditions (cwnd, tso defer, receiver window, TSQ...) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 13 ++++++++----- net/ipv4/tcp_input.c | 8 ++++---- net/ipv4/tcp_output.c | 12 ++++-------- 3 files changed, 16 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 978cebedd3fc..950cfecaad3c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -730,11 +730,14 @@ struct tcp_skb_cb { /* Note : tcp_tw_isn is used in input path only * (isn chosen by tcp_timewait_state_process()) * - * tcp_gso_segs is used in write queue only, - * cf tcp_skb_pcount() + * tcp_gso_segs/size are used in write queue only, + * cf tcp_skb_pcount()/tcp_skb_mss() */ __u32 tcp_tw_isn; - __u32 tcp_gso_segs; + struct { + u16 tcp_gso_segs; + u16 tcp_gso_size; + }; }; __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ @@ -790,10 +793,10 @@ static inline void tcp_skb_pcount_add(struct sk_buff *skb, int segs) TCP_SKB_CB(skb)->tcp_gso_segs += segs; } -/* This is valid iff tcp_skb_pcount() > 1. */ +/* This is valid iff skb is in write queue and tcp_skb_pcount() > 1. */ static inline int tcp_skb_mss(const struct sk_buff *skb) { - return skb_shinfo(skb)->gso_size; + return TCP_SKB_CB(skb)->tcp_gso_size; } /* Events passed to congestion control interface */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 70a6fa8ecbd3..684f095d196e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1316,12 +1316,12 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, * code can come after this skb later on it's better to keep * setting gso_size to something. */ - if (!skb_shinfo(prev)->gso_size) - skb_shinfo(prev)->gso_size = mss; + if (!TCP_SKB_CB(prev)->tcp_gso_size) + TCP_SKB_CB(prev)->tcp_gso_size = mss; /* CHECKME: To clear or not to clear? Mimics normal skb currently */ if (tcp_skb_pcount(skb) <= 1) - skb_shinfo(skb)->gso_size = 0; + TCP_SKB_CB(skb)->tcp_gso_size = 0; /* Difference in this won't matter, both ACKed by the same cumul. ACK */ TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); @@ -2248,7 +2248,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) (oldcnt >= packets)) break; - mss = skb_shinfo(skb)->gso_size; + mss = tcp_skb_mss(skb); err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss, GFP_ATOMIC); if (err < 0) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d12888581337..787f57ff87c4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -402,8 +402,6 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, */ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) { - struct skb_shared_info *shinfo = skb_shinfo(skb); - skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; @@ -411,7 +409,6 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) TCP_SKB_CB(skb)->sacked = 0; tcp_skb_pcount_set(skb, 1); - shinfo->gso_size = 0; TCP_SKB_CB(skb)->seq = seq; if (flags & (TCPHDR_SYN | TCPHDR_FIN)) @@ -1028,8 +1025,9 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcp_skb_pcount(skb)); tp->segs_out += tcp_skb_pcount(skb); - /* OK, its time to fill skb_shinfo(skb)->gso_segs */ + /* OK, its time to fill skb_shinfo(skb)->gso_{segs|size} */ skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb); + skb_shinfo(skb)->gso_size = tcp_skb_mss(skb); /* Our usage of tstamp should remain private */ skb->tstamp.tv64 = 0; @@ -1068,8 +1066,6 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) /* Initialize TSO segments for a packet. */ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) { - struct skb_shared_info *shinfo = skb_shinfo(skb); - /* Make sure we own this skb before messing gso_size/gso_segs */ WARN_ON_ONCE(skb_cloned(skb)); @@ -1078,10 +1074,10 @@ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) * non-TSO case. */ tcp_skb_pcount_set(skb, 1); - shinfo->gso_size = 0; + TCP_SKB_CB(skb)->tcp_gso_size = 0; } else { tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now)); - shinfo->gso_size = mss_now; + TCP_SKB_CB(skb)->tcp_gso_size = mss_now; } } -- cgit v1.2.3 From 821f37666815c9f3a7a4d195ce9184ad4d084942 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 11 Jun 2015 13:52:29 +0300 Subject: Bluetooth: Read encryption key size for BR/EDR connections Since Bluetooth 3.0 there's a HCI command available for reading the encryption key size of an BR/EDR connection. This information is essential e.g. for generating an LTK using SMP over BR/EDR, so store it as part of struct hci_conn. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 10 ++++++ net/bluetooth/hci_event.c | 87 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index d95da83cb1b0..7ca6690355ea 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1202,6 +1202,16 @@ struct hci_rp_read_clock { __le16 accuracy; } __packed; +#define HCI_OP_READ_ENC_KEY_SIZE 0x1408 +struct hci_cp_read_enc_key_size { + __le16 handle; +} __packed; +struct hci_rp_read_enc_key_size { + __u8 status; + __le16 handle; + __u8 key_size; +} __packed; + #define HCI_OP_READ_LOCAL_AMP_INFO 0x1409 struct hci_rp_read_local_amp_info { __u8 status; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 62934151431b..88c57b12a222 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2603,6 +2603,64 @@ unlock: hci_dev_unlock(hdev); } +static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status, + u16 opcode, struct sk_buff *skb) +{ + const struct hci_rp_read_enc_key_size *rp; + struct hci_conn *conn; + u16 handle; + + BT_DBG("%s status 0x%02x", hdev->name, status); + + if (!skb || skb->len < sizeof(*rp)) { + BT_ERR("%s invalid HCI Read Encryption Key Size response", + hdev->name); + return; + } + + rp = (void *)skb->data; + handle = le16_to_cpu(rp->handle); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (!conn) + goto unlock; + + /* If we fail to read the encryption key size, assume maximum + * (which is the same we do also when this HCI command isn't + * supported. + */ + if (rp->status) { + BT_ERR("%s failed to read key size for handle %u", hdev->name, + handle); + conn->enc_key_size = HCI_LINK_KEY_SIZE; + } else { + conn->enc_key_size = rp->key_size; + } + + if (conn->state == BT_CONFIG) { + conn->state = BT_CONNECTED; + hci_connect_cfm(conn, 0); + hci_conn_drop(conn); + } else { + u8 encrypt; + + if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags)) + encrypt = 0x00; + else if (conn->type == ACL_LINK && + test_bit(HCI_CONN_AES_CCM, &conn->flags)) + encrypt = 0x02; + else + encrypt = 0x01; + + hci_encrypt_cfm(conn, 0, encrypt); + } + +unlock: + hci_dev_unlock(hdev); +} + static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_encrypt_change *ev = (void *) skb->data; @@ -2662,6 +2720,35 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) goto unlock; } + /* Try reading the encryption key size for encrypted ACL links */ + if (!ev->status && ev->encrypt && conn->type == ACL_LINK) { + struct hci_cp_read_enc_key_size cp; + struct hci_request req; + + /* Only send HCI_Read_Encryption_Key_Size if the + * controller really supports it. If it doesn't, assume + * the default size (16). + */ + if (!(hdev->commands[20] & 0x10)) { + conn->enc_key_size = HCI_LINK_KEY_SIZE; + goto notify; + } + + hci_req_init(&req, hdev); + + cp.handle = cpu_to_le16(conn->handle); + hci_req_add(&req, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp); + + if (hci_req_run_skb(&req, read_enc_key_size_complete)) { + BT_ERR("Sending HCI Read Encryption Key Size failed"); + conn->enc_key_size = HCI_LINK_KEY_SIZE; + goto notify; + } + + goto unlock; + } + +notify: if (conn->state == BT_CONFIG) { if (!ev->status) conn->state = BT_CONNECTED; -- cgit v1.2.3 From fe89e69050489884b304ea67b580056395dbd2b1 Mon Sep 17 00:00:00 2001 From: Varka Bhadram Date: Fri, 12 Jun 2015 09:13:49 +0530 Subject: mac802154: cleanup llsec param flags This patch changes the setting of llsec param flag with the BIT macro. Signed-off-by: Varka Bhadram Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/ieee802154_netdev.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 0a87975128ec..2c10a9f0c6d9 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -346,15 +346,15 @@ struct ieee802154_mac_params { struct wpan_phy; enum { - IEEE802154_LLSEC_PARAM_ENABLED = 1 << 0, - IEEE802154_LLSEC_PARAM_FRAME_COUNTER = 1 << 1, - IEEE802154_LLSEC_PARAM_OUT_LEVEL = 1 << 2, - IEEE802154_LLSEC_PARAM_OUT_KEY = 1 << 3, - IEEE802154_LLSEC_PARAM_KEY_SOURCE = 1 << 4, - IEEE802154_LLSEC_PARAM_PAN_ID = 1 << 5, - IEEE802154_LLSEC_PARAM_HWADDR = 1 << 6, - IEEE802154_LLSEC_PARAM_COORD_HWADDR = 1 << 7, - IEEE802154_LLSEC_PARAM_COORD_SHORTADDR = 1 << 8, + IEEE802154_LLSEC_PARAM_ENABLED = BIT(0), + IEEE802154_LLSEC_PARAM_FRAME_COUNTER = BIT(1), + IEEE802154_LLSEC_PARAM_OUT_LEVEL = BIT(2), + IEEE802154_LLSEC_PARAM_OUT_KEY = BIT(3), + IEEE802154_LLSEC_PARAM_KEY_SOURCE = BIT(4), + IEEE802154_LLSEC_PARAM_PAN_ID = BIT(5), + IEEE802154_LLSEC_PARAM_HWADDR = BIT(6), + IEEE802154_LLSEC_PARAM_COORD_HWADDR = BIT(7), + IEEE802154_LLSEC_PARAM_COORD_SHORTADDR = BIT(8), }; struct ieee802154_llsec_ops { -- cgit v1.2.3 From 70f36507d0c3277e4a5424ca0c6c2a002ae42768 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 12 Jun 2015 09:24:00 +0200 Subject: mac802154: fix flags BIT definitions order This patch fixes commits ("mac802154: cleanup ieee802154 hardware flags") bcbfd2078d9b11277d9c9ce0c30ba73c750503c9 and ("mac802154: cleanup address filtering flags") 6b70a43c7e0202cf285c864bc9f20f607c42e432 by starting the flags definitions at BIT(0) which is same like the previous behaviour. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/mac802154.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index de1cdde92edc..f534a46911dc 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -49,10 +49,10 @@ * do frame address filtering as a pan coordinator. */ enum ieee802154_hw_addr_filt_flags { - IEEE802154_AFILT_SADDR_CHANGED = BIT(1), - IEEE802154_AFILT_IEEEADDR_CHANGED = BIT(2), - IEEE802154_AFILT_PANID_CHANGED = BIT(3), - IEEE802154_AFILT_PANC_CHANGED = BIT(4), + IEEE802154_AFILT_SADDR_CHANGED = BIT(0), + IEEE802154_AFILT_IEEEADDR_CHANGED = BIT(1), + IEEE802154_AFILT_PANID_CHANGED = BIT(2), + IEEE802154_AFILT_PANC_CHANGED = BIT(3), }; /** @@ -133,14 +133,14 @@ struct ieee802154_hw { * frames with bad checksum. */ enum ieee802154_hw_flags { - IEEE802154_HW_TX_OMIT_CKSUM = BIT(1), - IEEE802154_HW_LBT = BIT(2), - IEEE802154_HW_CSMA_PARAMS = BIT(3), - IEEE802154_HW_FRAME_RETRIES = BIT(4), - IEEE802154_HW_AFILT = BIT(5), - IEEE802154_HW_PROMISCUOUS = BIT(6), - IEEE802154_HW_RX_OMIT_CKSUM = BIT(7), - IEEE802154_HW_RX_DROP_BAD_CKSUM = BIT(8), + IEEE802154_HW_TX_OMIT_CKSUM = BIT(0), + IEEE802154_HW_LBT = BIT(1), + IEEE802154_HW_CSMA_PARAMS = BIT(2), + IEEE802154_HW_FRAME_RETRIES = BIT(3), + IEEE802154_HW_AFILT = BIT(4), + IEEE802154_HW_PROMISCUOUS = BIT(5), + IEEE802154_HW_RX_OMIT_CKSUM = BIT(6), + IEEE802154_HW_RX_DROP_BAD_CKSUM = BIT(7), }; /* Indicates that receiver omits FCS and xmitter will add FCS on it's own. */ -- cgit v1.2.3 From 72b31f7271df34c6aab36c01305287924826678f Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 30 May 2015 15:27:40 +0200 Subject: netfilter: bridge: detect NAT66 correctly and change MAC address IPv4 iptables allows to REDIRECT/DNAT/SNAT any traffic over a bridge. e.g. REDIRECT $ sysctl -w net.bridge.bridge-nf-call-iptables=1 $ iptables -t nat -A PREROUTING -p tcp -m tcp --dport 8080 \ -j REDIRECT --to-ports 81 This does not work with ip6tables on a bridge in NAT66 scenario because the REDIRECT/DNAT/SNAT is not correctly detected. The bridge pre-routing (finish) netfilter hook has to check for a possible redirect and then fix the destination mac address. This allows to use the ip6tables rules for local REDIRECT/DNAT/SNAT REDIRECT similar to the IPv4 iptables version. e.g. REDIRECT $ sysctl -w net.bridge.bridge-nf-call-ip6tables=1 $ ip6tables -t nat -A PREROUTING -p tcp -m tcp --dport 8080 \ -j REDIRECT --to-ports 81 This patch makes it possible to use IPv6 NAT66 on a bridge. It was tested on a bridge with two interfaces using SNAT/DNAT NAT66 rules. Reported-by: Artie Hamilton Signed-off-by: Sven Eckelmann [bernhard.thaler@wvnet.at: rebased, add indirect call to ip6_route_input()] [bernhard.thaler@wvnet.at: rebased, split into separate patches] Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 1 + include/linux/skbuff.h | 6 ++++- net/bridge/br_netfilter.c | 55 +++++++++++++++++++++++++++++++++++------- net/ipv6/netfilter.c | 1 + 4 files changed, 53 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 64dad1cc1a4b..e2d19694ee8f 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -25,6 +25,7 @@ void ipv6_netfilter_fini(void); struct nf_ipv6_ops { int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); + void (*route_input)(struct sk_buff *skb); }; extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cc612fc0a894..f70fc0e6bf7b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -36,6 +36,7 @@ #include #include #include +#include /* A. Checksumming of received packets by device. * @@ -179,7 +180,10 @@ struct nf_bridge_info { struct net_device *physoutdev; char neigh_header[8]; }; - __be32 ipv4_daddr; + union { + __be32 ipv4_daddr; + struct in6_addr ipv6_daddr; + }; }; #endif diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 6cb642c43451..9ac0c6417c77 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -326,30 +326,63 @@ free_skb: static bool daddr_was_changed(const struct sk_buff *skb, const struct nf_bridge_info *nf_bridge) { - return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; + switch (skb->protocol) { + case htons(ETH_P_IP): + return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; + case htons(ETH_P_IPV6): + return memcmp(&nf_bridge->ipv6_daddr, &ipv6_hdr(skb)->daddr, + sizeof(ipv6_hdr(skb)->daddr)) != 0; + default: + return false; + } } -/* PF_BRIDGE/PRE_ROUTING *********************************************/ -/* Undo the changes made for ip6tables PREROUTING and continue the - * bridge PRE_ROUTING hook. +/* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables + * PREROUTING and continue the bridge PRE_ROUTING hook. See comment + * for br_nf_pre_routing_finish(), same logic is used here but + * equivalent IPv6 function ip6_route_input() called indirectly. */ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; + struct net_device *dev = skb->dev; + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; } nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; + if (daddr_was_changed(skb, nf_bridge)) { + skb_dst_drop(skb); + v6ops->route_input(skb); - rt = bridge_parent_rtable(nf_bridge->physindev); - if (!rt) { - kfree_skb(skb); - return 0; + if (skb_dst(skb)->error) { + kfree_skb(skb); + return 0; + } + + if (skb_dst(skb)->dev == dev) { + skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, + sk, skb, skb->dev, NULL, + br_nf_pre_routing_finish_bridge, + 1); + return 0; + } + ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); + skb->pkt_type = PACKET_HOST; + } else { + rt = bridge_parent_rtable(nf_bridge->physindev); + if (!rt) { + kfree_skb(skb); + return 0; + } + skb_dst_set_noref(skb, &rt->dst); } - skb_dst_set_noref(skb, &rt->dst); skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); @@ -579,6 +612,7 @@ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { + struct nf_bridge_info *nf_bridge; const struct ipv6hdr *hdr; u32 pkt_len; @@ -610,6 +644,9 @@ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, if (!setup_pre_routing(skb)) return NF_DROP; + nf_bridge = nf_bridge_info_get(skb); + nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; + skb->protocol = htons(ETH_P_IPV6); NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb, skb->dev, NULL, diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d958718b5031..bbca09fe9553 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -191,6 +191,7 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, static const struct nf_ipv6_ops ipv6ops = { .chk_addr = ipv6_chk_addr, + .route_input = ip6_route_input }; static const struct nf_afinfo nf_ip6_afinfo = { -- cgit v1.2.3 From 411ffb4fde80705a9a8db4c2d38dbeef6f5bd689 Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 30 May 2015 15:28:28 +0200 Subject: netfilter: bridge: refactor frag_max_size Currently frag_max_size is member of br_input_skb_cb and copied back and forth using IPCB(skb) and BR_INPUT_SKB_CB(skb) each time it is changed or used. Attach frag_max_size to nf_bridge_info and set value in pre_routing and forward functions. Use its value in forward and xmit functions. Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 1 + net/bridge/br_netfilter.c | 20 +++++++------------- net/bridge/br_private.h | 1 - 3 files changed, 8 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f70fc0e6bf7b..32b105e682b3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -174,6 +174,7 @@ struct nf_bridge_info { BRNF_PROTO_PPPOE } orig_proto:8; bool pkt_otherhost; + __u16 frag_max_size; unsigned int mask; struct net_device *physindev; union { diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 9ac0c6417c77..1f30b28745de 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -440,10 +440,8 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; int err; - int frag_max_size; - frag_max_size = IPCB(skb)->frag_max_size; - BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; + nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; @@ -738,11 +736,9 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) struct net_device *in; if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { - int frag_max_size; if (skb->protocol == htons(ETH_P_IP)) { - frag_max_size = IPCB(skb)->frag_max_size; - BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; + nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; } in = nf_bridge->physindev; @@ -806,12 +802,9 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, } if (pf == NFPROTO_IPV4) { - int frag_max = BR_INPUT_SKB_CB(skb)->frag_max_size; - if (br_parse_ip_options(skb)) return NF_DROP; - - IPCB(skb)->frag_max_size = frag_max; + IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; } nf_bridge->physoutdev = skb->dev; @@ -904,7 +897,7 @@ static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { int ret; - int frag_max_size; + struct nf_bridge_info *nf_bridge; unsigned int mtu_reserved; if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) { @@ -913,17 +906,18 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) } mtu_reserved = nf_bridge_mtu_reduction(skb); + nf_bridge = nf_bridge_info_get(skb); /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ if (skb->len + mtu_reserved > skb->dev->mtu) { struct brnf_frag_data *data; - frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; if (br_parse_ip_options(skb)) /* Drop invalid packet */ return NF_DROP; - IPCB(skb)->frag_max_size = frag_max_size; + + IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; nf_bridge_update_protocol(skb); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1f36fa70639b..8cde96e68fd5 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -304,7 +304,6 @@ struct br_input_skb_cb { int mrouters_only; #endif - u16 frag_max_size; bool proxyarp_replied; #ifdef CONFIG_BRIDGE_VLAN_FILTERING -- cgit v1.2.3 From efb6de9b4ba0092b2c55f6a52d16294a8a698edd Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 30 May 2015 15:30:16 +0200 Subject: netfilter: bridge: forward IPv6 fragmented packets IPv6 fragmented packets are not forwarded on an ethernet bridge with netfilter ip6_tables loaded. e.g. steps to reproduce 1) create a simple bridge like this modprobe br_netfilter brctl addbr br0 brctl addif br0 eth0 brctl addif br0 eth2 ifconfig eth0 up ifconfig eth2 up ifconfig br0 up 2) place a host with an IPv6 address on each side of the bridge set IPv6 address on host A: ip -6 addr add fd01:2345:6789:1::1/64 dev eth0 set IPv6 address on host B: ip -6 addr add fd01:2345:6789:1::2/64 dev eth0 3) run a simple ping command on host A with packets > MTU ping6 -s 4000 fd01:2345:6789:1::2 4) wait some time and run e.g. "ip6tables -t nat -nvL" on the bridge IPv6 fragmented packets traverse the bridge cleanly until somebody runs. "ip6tables -t nat -nvL". As soon as it is run (and netfilter modules are loaded) IPv6 fragmented packets do not traverse the bridge any more (you see no more responses in ping's output). After applying this patch IPv6 fragmented packets traverse the bridge cleanly in above scenario. Signed-off-by: Bernhard Thaler [pablo@netfilter.org: small changes to br_nf_dev_queue_xmit] Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 2 + net/bridge/br_netfilter.c | 139 +++++++++++++++++++++++++++++------------ net/bridge/br_private.h | 6 +- net/ipv6/netfilter.c | 3 +- 4 files changed, 108 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index e2d19694ee8f..8b7d28f3aada 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -26,6 +26,8 @@ struct nf_ipv6_ops { int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); void (*route_input)(struct sk_buff *skb); + int (*fragment)(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)); }; extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index d201ea4440c9..535f9dae743e 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -34,6 +34,7 @@ #include #include +#include #include #include @@ -320,6 +321,55 @@ bad: return -1; } +/* Equivalent to br_validate_ipv4 for IPv6 */ +static int br_validate_ipv6(struct sk_buff *skb) +{ + const struct ipv6hdr *hdr; + struct net_device *dev = skb->dev; + struct inet6_dev *idev = in6_dev_get(skb->dev); + u32 pkt_len; + u8 ip6h_len = sizeof(struct ipv6hdr); + + if (!pskb_may_pull(skb, ip6h_len)) + goto inhdr_error; + + if (skb->len < ip6h_len) + goto drop; + + hdr = ipv6_hdr(skb); + + if (hdr->version != 6) + goto inhdr_error; + + pkt_len = ntohs(hdr->payload_len); + + if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { + if (pkt_len + ip6h_len > skb->len) { + IP6_INC_STATS_BH(dev_net(dev), idev, + IPSTATS_MIB_INTRUNCATEDPKTS); + goto drop; + } + if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { + IP6_INC_STATS_BH(dev_net(dev), idev, + IPSTATS_MIB_INDISCARDS); + goto drop; + } + } + if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) + goto drop; + + memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); + /* No IP options in IPv6 header; however it should be + * checked if some next headers need special treatment + */ + return 0; + +inhdr_error: + IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS); +drop: + return -1; +} + static void nf_bridge_update_protocol(struct sk_buff *skb) { switch (skb->nf_bridge->orig_proto) { @@ -405,6 +455,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) struct net_device *dev = skb->dev; const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; + if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; @@ -606,35 +658,15 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb) } /* Replicate the checks that IPv6 does on packet reception and pass the packet - * to ip6tables, which doesn't support NAT, so things are fairly simple. */ + * to ip6tables. + */ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_bridge_info *nf_bridge; - const struct ipv6hdr *hdr; - u32 pkt_len; - - if (skb->len < sizeof(struct ipv6hdr)) - return NF_DROP; - - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - return NF_DROP; - - hdr = ipv6_hdr(skb); - - if (hdr->version != 6) - return NF_DROP; - pkt_len = ntohs(hdr->payload_len); - - if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { - if (pkt_len + sizeof(struct ipv6hdr) > skb->len) - return NF_DROP; - if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) - return NF_DROP; - } - if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) + if (br_validate_ipv6(skb)) return NF_DROP; nf_bridge_put(skb->nf_bridge); @@ -738,9 +770,11 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { - if (skb->protocol == htons(ETH_P_IP)) { + if (skb->protocol == htons(ETH_P_IP)) nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; - } + + if (skb->protocol == htons(ETH_P_IPV6)) + nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; in = nf_bridge->physindev; if (nf_bridge->pkt_otherhost) { @@ -808,6 +842,12 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; } + if (pf == NFPROTO_IPV6) { + if (br_validate_ipv6(skb)) + return NF_DROP; + IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; + } + nf_bridge->physoutdev = skb->dev; if (pf == NFPROTO_IPV4) skb->protocol = htons(ETH_P_IP); @@ -855,7 +895,7 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, return NF_STOLEN; } -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) { struct brnf_frag_data *data; @@ -875,6 +915,7 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) nf_bridge_info_free(skb); return br_dev_queue_push_xmit(sk, skb); } +#endif static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, int (*output)(struct sock *, struct sk_buff *)) @@ -897,21 +938,23 @@ static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { - int ret; struct nf_bridge_info *nf_bridge; unsigned int mtu_reserved; - if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) { + mtu_reserved = nf_bridge_mtu_reduction(skb); + + if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) { nf_bridge_info_free(skb); return br_dev_queue_push_xmit(sk, skb); } - mtu_reserved = nf_bridge_mtu_reduction(skb); nf_bridge = nf_bridge_info_get(skb); + +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ - if (skb->len + mtu_reserved > skb->dev->mtu) { + if (skb->protocol == htons(ETH_P_IP)) { struct brnf_frag_data *data; if (br_validate_ipv4(skb)) @@ -928,21 +971,37 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) skb_copy_from_linear_data_offset(skb, -data->size, data->mac, data->size); - ret = br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit); - } else { - nf_bridge_info_free(skb); - ret = br_dev_queue_push_xmit(sk, skb); + return br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit); } +#endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + if (skb->protocol == htons(ETH_P_IPV6)) { + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + struct brnf_frag_data *data; - return ret; -} -#else -static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) -{ + if (br_validate_ipv6(skb)) + return NF_DROP; + + IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; + + nf_bridge_update_protocol(skb); + + data = this_cpu_ptr(&brnf_frag_data_storage); + data->encap_size = nf_bridge_encap_header_len(skb); + data->size = ETH_HLEN + data->encap_size; + + skb_copy_from_linear_data_offset(skb, -data->size, data->mac, + data->size); + + if (v6ops) + return v6ops->fragment(sk, skb, br_nf_push_frag_xmit); + else + return -EMSGSIZE; + } +#endif nf_bridge_info_free(skb); return br_dev_queue_push_xmit(sk, skb); } -#endif /* PF_BRIDGE/POST_ROUTING ********************************************/ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 8cde96e68fd5..5dccced71269 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #define BR_HASH_BITS 8 @@ -214,7 +215,10 @@ struct net_bridge spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - struct rtable fake_rtable; + union { + struct rtable fake_rtable; + struct rt6_info fake_rt6_info; + }; bool nf_call_iptables; bool nf_call_ip6tables; bool nf_call_arptables; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index bbca09fe9553..b4de08a83e0b 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -191,7 +191,8 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, static const struct nf_ipv6_ops ipv6ops = { .chk_addr = ipv6_chk_addr, - .route_input = ip6_route_input + .route_input = ip6_route_input, + .fragment = ip6_fragment }; static const struct nf_afinfo nf_ip6_afinfo = { -- cgit v1.2.3 From 33b1f31392861947fa2a2a57c3a39ab63b8c9f9d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 5 Jun 2015 13:28:38 +0200 Subject: net: ip_fragment: remove BRIDGE_NETFILTER mtu special handling since commit d6b915e29f4adea9 ("ip_fragment: don't forward defragmented DF packet") the largest fragment size is available in the IPCB. Therefore we no longer need to care about 'encapsulation' overhead of stripped PPPOE/VLAN headers since ip_do_fragment doesn't use device mtu in such cases. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 7 ------- net/bridge/br_netfilter.c | 7 +++++++ net/ipv4/ip_output.c | 4 ---- 3 files changed, 7 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index f2fdb5a52070..6d80fc686323 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -20,13 +20,6 @@ enum nf_br_hook_priorities { #define BRNF_BRIDGED_DNAT 0x02 #define BRNF_NF_BRIDGE_PREROUTING 0x08 -static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) -{ - if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) - return PPPOE_SES_HLEN; - return 0; -} - int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb); static inline void br_drop_fake_rtable(struct sk_buff *skb) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 535f9dae743e..1e62ae5d8f4e 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -936,6 +936,13 @@ static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, return ip_do_fragment(sk, skb, output); } +static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) +{ + if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) + return PPPOE_SES_HLEN; + return 0; +} + static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f5f5ef1cebd5..19d7e43b5370 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -549,10 +549,6 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb, hlen = iph->ihl * 4; mtu = mtu - hlen; /* Size of data space */ -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (skb->nf_bridge) - mtu -= nf_bridge_mtu_reduction(skb); -#endif IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; /* When frag_list is given, use it. First, check its validity: -- cgit v1.2.3 From 71ae0dff02d756e4d2ca710b79f2ff5390029a5f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Jun 2015 01:34:54 +0200 Subject: netfilter: xtables: use percpu rule counters The binary arp/ip/ip6tables ruleset is stored per cpu. The only reason left as to why we need percpu duplication are the rule counters embedded into ipt_entry et al -- since each cpu has its own copy of the rules, all counters can be lockless. The downside is that the more cpus are supported, the more memory is required. Rules are not just duplicated per online cpu but for each possible cpu, i.e. if maxcpu is 144, then rule is duplicated 144 times, not for the e.g. 64 cores present. To save some memory and also improve utilization of shared caches it would be preferable to only store the rule blob once. So we first need to separate counters and the rule blob. Instead of using entry->counters, allocate this percpu and store the percpu address in entry->counters.pcnt on CONFIG_SMP. This change makes no sense as-is; it is merely an intermediate step to remove the percpu duplication of the rule set in a followup patch. Suggested-by: Eric Dumazet Acked-by: Jesper Dangaard Brouer Reported-by: Marcelo Ricardo Leitner Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 49 ++++++++++++++++++++++++++++++++++++++ net/ipv4/netfilter/arp_tables.c | 32 +++++++++++++++++++++---- net/ipv4/netfilter/ip_tables.c | 31 ++++++++++++++++++++---- net/ipv6/netfilter/ip6_tables.c | 31 ++++++++++++++++++++---- 4 files changed, 129 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 09f38206c18f..b77ab9f17641 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -353,6 +353,55 @@ static inline unsigned long ifname_compare_aligned(const char *_a, return ret; } + +/* On SMP, ip(6)t_entry->counters.pcnt holds address of the + * real (percpu) counter. On !SMP, its just the packet count, + * so nothing needs to be done there. + * + * xt_percpu_counter_alloc returns the address of the percpu + * counter, or 0 on !SMP. + * + * Hence caller must use IS_ERR_VALUE to check for error, this + * allows us to return 0 for single core systems without forcing + * callers to deal with SMP vs. NONSMP issues. + */ +static inline u64 xt_percpu_counter_alloc(void) +{ + if (nr_cpu_ids > 1) { + void __percpu *res = alloc_percpu(struct xt_counters); + + if (res == NULL) + return (u64) -ENOMEM; + + return (__force u64) res; + } + + return 0; +} +static inline void xt_percpu_counter_free(u64 pcnt) +{ + if (nr_cpu_ids > 1) + free_percpu((void __percpu *) pcnt); +} + +static inline struct xt_counters * +xt_get_this_cpu_counter(struct xt_counters *cnt) +{ + if (nr_cpu_ids > 1) + return this_cpu_ptr((void __percpu *) cnt->pcnt); + + return cnt; +} + +static inline struct xt_counters * +xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) +{ + if (nr_cpu_ids > 1) + return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu); + + return cnt; +} + struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *); void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index a61200754f4b..0ada09ae6e81 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -289,13 +289,15 @@ unsigned int arpt_do_table(struct sk_buff *skb, arp = arp_hdr(skb); do { const struct xt_entry_target *t; + struct xt_counters *counter; if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { e = arpt_next_entry(e); continue; } - ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1); + counter = xt_get_this_cpu_counter(&e->counters); + ADD_COUNTER(*counter, arp_hdr_len(skb->dev), 1); t = arpt_get_target_c(e); @@ -521,6 +523,10 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) if (ret) return ret; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; + t = arpt_get_target(e); target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, t->u.user.revision); @@ -538,6 +544,8 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) err: module_put(t->u.kernel.target->me); out: + xt_percpu_counter_free(e->counters.pcnt); + return ret; } @@ -614,6 +622,7 @@ static inline void cleanup_entry(struct arpt_entry *e) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); + xt_percpu_counter_free(e->counters.pcnt); } /* Checks and translates the user-supplied table segment (held in @@ -723,13 +732,15 @@ static void get_counters(const struct xt_table_info *t, i = 0; xt_entry_foreach(iter, t->entries[cpu], t->size) { + struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); - bcnt = iter->counters.bcnt; - pcnt = iter->counters.pcnt; + bcnt = tmp->bcnt; + pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); @@ -1186,7 +1197,10 @@ static int do_add_counters(struct net *net, const void __user *user, loc_cpu_entry = private->entries[curcpu]; addend = xt_write_recseq_begin(); xt_entry_foreach(iter, loc_cpu_entry, private->size) { - ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + struct xt_counters *tmp; + + tmp = xt_get_this_cpu_counter(&iter->counters); + ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); @@ -1416,9 +1430,17 @@ static int translate_compat_table(const char *name, i = 0; xt_entry_foreach(iter1, entry1, newinfo->size) { + iter1->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(iter1->counters.pcnt)) { + ret = -ENOMEM; + break; + } + ret = check_target(iter1, name); - if (ret != 0) + if (ret != 0) { + xt_percpu_counter_free(iter1->counters.pcnt); break; + } ++i; if (strcmp(arpt_get_target(iter1)->u.user.name, XT_ERROR_TARGET) == 0) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e7abf5145edc..d190b10dabfe 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -345,6 +345,7 @@ ipt_do_table(struct sk_buff *skb, do { const struct xt_entry_target *t; const struct xt_entry_match *ematch; + struct xt_counters *counter; IP_NF_ASSERT(e); if (!ip_packet_match(ip, indev, outdev, @@ -361,7 +362,8 @@ ipt_do_table(struct sk_buff *skb, goto no_match; } - ADD_COUNTER(e->counters, skb->len, 1); + counter = xt_get_this_cpu_counter(&e->counters); + ADD_COUNTER(*counter, skb->len, 1); t = ipt_get_target(e); IP_NF_ASSERT(t->u.kernel.target); @@ -665,6 +667,10 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, if (ret) return ret; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; + j = 0; mtpar.net = net; mtpar.table = name; @@ -691,6 +697,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, ret = check_target(e, net, name); if (ret) goto err; + return 0; err: module_put(t->u.kernel.target->me); @@ -700,6 +707,9 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, break; cleanup_match(ematch, net); } + + xt_percpu_counter_free(e->counters.pcnt); + return ret; } @@ -784,6 +794,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); + xt_percpu_counter_free(e->counters.pcnt); } /* Checks and translates the user-supplied table segment (held in @@ -888,13 +899,15 @@ get_counters(const struct xt_table_info *t, i = 0; xt_entry_foreach(iter, t->entries[cpu], t->size) { + struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); - bcnt = iter->counters.bcnt; - pcnt = iter->counters.pcnt; + bcnt = tmp->bcnt; + pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); @@ -1374,7 +1387,10 @@ do_add_counters(struct net *net, const void __user *user, loc_cpu_entry = private->entries[curcpu]; addend = xt_write_recseq_begin(); xt_entry_foreach(iter, loc_cpu_entry, private->size) { - ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + struct xt_counters *tmp; + + tmp = xt_get_this_cpu_counter(&iter->counters); + ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); @@ -1608,6 +1624,10 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) unsigned int j; int ret = 0; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; + j = 0; mtpar.net = net; mtpar.table = name; @@ -1632,6 +1652,9 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) break; cleanup_match(ematch, net); } + + xt_percpu_counter_free(e->counters.pcnt); + return ret; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index cdd085f8b770..a1190ee14723 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -367,6 +367,7 @@ ip6t_do_table(struct sk_buff *skb, do { const struct xt_entry_target *t; const struct xt_entry_match *ematch; + struct xt_counters *counter; IP_NF_ASSERT(e); acpar.thoff = 0; @@ -384,7 +385,8 @@ ip6t_do_table(struct sk_buff *skb, goto no_match; } - ADD_COUNTER(e->counters, skb->len, 1); + counter = xt_get_this_cpu_counter(&e->counters); + ADD_COUNTER(*counter, skb->len, 1); t = ip6t_get_target_c(e); IP_NF_ASSERT(t->u.kernel.target); @@ -679,6 +681,10 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, if (ret) return ret; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; + j = 0; mtpar.net = net; mtpar.table = name; @@ -714,6 +720,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, break; cleanup_match(ematch, net); } + + xt_percpu_counter_free(e->counters.pcnt); + return ret; } @@ -797,6 +806,8 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); + + xt_percpu_counter_free(e->counters.pcnt); } /* Checks and translates the user-supplied table segment (held in @@ -901,13 +912,15 @@ get_counters(const struct xt_table_info *t, i = 0; xt_entry_foreach(iter, t->entries[cpu], t->size) { + struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); - bcnt = iter->counters.bcnt; - pcnt = iter->counters.pcnt; + bcnt = tmp->bcnt; + pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); @@ -1374,7 +1387,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, goto free; } - local_bh_disable(); private = t->private; if (private->number != num_counters) { @@ -1388,7 +1400,10 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, addend = xt_write_recseq_begin(); loc_cpu_entry = private->entries[curcpu]; xt_entry_foreach(iter, loc_cpu_entry, private->size) { - ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + struct xt_counters *tmp; + + tmp = xt_get_this_cpu_counter(&iter->counters); + ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); @@ -1621,6 +1636,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; j = 0; mtpar.net = net; mtpar.table = name; @@ -1645,6 +1663,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net, break; cleanup_match(ematch, net); } + + xt_percpu_counter_free(e->counters.pcnt); + return ret; } -- cgit v1.2.3 From 482cfc318559e2527dfd8513582d2fdb276e47c2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Jun 2015 01:34:55 +0200 Subject: netfilter: xtables: avoid percpu ruleset duplication We store the rule blob per (possible) cpu. Unfortunately this means we can waste lot of memory on big smp machines. ipt_entry structure ('rule head') is 112 byte, so e.g. with maxcpu=64 one single rule eats close to 8k RAM. Since previous patch made counters percpu it appears there is nothing left in the rule blob that needs to be percpu. On my test system (144 possible cpus, 400k dummy rules) this change saves close to 9 Gigabyte of RAM. Reported-by: Marcelo Ricardo Leitner Acked-by: Jesper Dangaard Brouer Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 4 +-- net/ipv4/netfilter/arp_tables.c | 50 +++++++++-------------------- net/ipv4/netfilter/ip_tables.c | 64 ++++++++++--------------------------- net/ipv6/netfilter/ip6_tables.c | 65 ++++++++++---------------------------- net/netfilter/x_tables.c | 23 +++++--------- 5 files changed, 57 insertions(+), 149 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index b77ab9f17641..9969d79dcde1 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -224,9 +224,9 @@ struct xt_table_info { unsigned int stacksize; unsigned int __percpu *stackptr; void ***jumpstack; - /* ipt_entry tables: one per CPU */ + /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ - void *entries[1]; + void *entries; }; #define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \ diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 0ada09ae6e81..d75c139393fc 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -275,7 +275,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, * pointer. */ smp_read_barrier_depends(); - table_base = private->entries[smp_processor_id()]; + table_base = private->entries; e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -711,12 +711,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) { - if (newinfo->entries[i] && newinfo->entries[i] != entry0) - memcpy(newinfo->entries[i], entry0, newinfo->size); - } - return ret; } @@ -731,7 +725,7 @@ static void get_counters(const struct xt_table_info *t, seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; - xt_entry_foreach(iter, t->entries[cpu], t->size) { + xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; @@ -785,7 +779,7 @@ static int copy_entries_to_user(unsigned int total_size, if (IS_ERR(counters)) return PTR_ERR(counters); - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; /* ... then copy entire thing ... */ if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; @@ -880,10 +874,10 @@ static int compat_table_info(const struct xt_table_info *info, if (!newinfo || !info) return -EINVAL; - /* we dont care about newinfo->entries[] */ + /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; - loc_cpu_entry = info->entries[raw_smp_processor_id()]; + loc_cpu_entry = info->entries; xt_compat_init_offsets(NFPROTO_ARP, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); @@ -1048,7 +1042,7 @@ static int __do_replace(struct net *net, const char *name, get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ - loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; + loc_cpu_old_entry = oldinfo->entries; xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) cleanup_entry(iter); @@ -1095,8 +1089,7 @@ static int do_replace(struct net *net, const void __user *user, if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1126,7 +1119,7 @@ static int do_replace(struct net *net, const void __user *user, static int do_add_counters(struct net *net, const void __user *user, unsigned int len, int compat) { - unsigned int i, curcpu; + unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1136,7 +1129,6 @@ static int do_add_counters(struct net *net, const void __user *user, struct xt_table *t; const struct xt_table_info *private; int ret = 0; - void *loc_cpu_entry; struct arpt_entry *iter; unsigned int addend; #ifdef CONFIG_COMPAT @@ -1192,11 +1184,9 @@ static int do_add_counters(struct net *net, const void __user *user, } i = 0; - /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); - loc_cpu_entry = private->entries[curcpu]; + addend = xt_write_recseq_begin(); - xt_entry_foreach(iter, loc_cpu_entry, private->size) { + xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters *tmp; tmp = xt_get_this_cpu_counter(&iter->counters); @@ -1410,7 +1400,7 @@ static int translate_compat_table(const char *name, newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } - entry1 = newinfo->entries[raw_smp_processor_id()]; + entry1 = newinfo->entries; pos = entry1; size = total_size; xt_entry_foreach(iter0, entry0, total_size) { @@ -1470,11 +1460,6 @@ static int translate_compat_table(const char *name, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) - if (newinfo->entries[i] && newinfo->entries[i] != entry1) - memcpy(newinfo->entries[i], entry1, newinfo->size); - *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1533,8 +1518,7 @@ static int compat_do_replace(struct net *net, void __user *user, if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; @@ -1631,7 +1615,6 @@ static int compat_copy_entries_to_user(unsigned int total_size, void __user *pos; unsigned int size; int ret = 0; - void *loc_cpu_entry; unsigned int i = 0; struct arpt_entry *iter; @@ -1639,11 +1622,9 @@ static int compat_copy_entries_to_user(unsigned int total_size, if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy on our node/cpu */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; pos = userptr; size = total_size; - xt_entry_foreach(iter, loc_cpu_entry, total_size) { + xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) @@ -1812,8 +1793,7 @@ struct xt_table *arpt_register_table(struct net *net, goto out; } - /* choose the copy on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(newinfo, loc_cpu_entry, repl); @@ -1844,7 +1824,7 @@ void arpt_unregister_table(struct xt_table *table) private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter); if (private->number > private->initial_entries) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index d190b10dabfe..6151500c2df8 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -254,15 +254,13 @@ static void trace_packet(const struct sk_buff *skb, const struct xt_table_info *private, const struct ipt_entry *e) { - const void *table_base; const struct ipt_entry *root; const char *hookname, *chainname, *comment; const struct ipt_entry *iter; unsigned int rulenum = 0; struct net *net = dev_net(in ? in : out); - table_base = private->entries[smp_processor_id()]; - root = get_entry(table_base, private->hook_entry[hook]); + root = get_entry(private->entries, private->hook_entry[hook]); hookname = chainname = hooknames[hook]; comment = comments[NF_IP_TRACE_COMMENT_RULE]; @@ -331,7 +329,7 @@ ipt_do_table(struct sk_buff *skb, * pointer. */ smp_read_barrier_depends(); - table_base = private->entries[cpu]; + table_base = private->entries; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; @@ -877,12 +875,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) { - if (newinfo->entries[i] && newinfo->entries[i] != entry0) - memcpy(newinfo->entries[i], entry0, newinfo->size); - } - return ret; } @@ -898,7 +890,7 @@ get_counters(const struct xt_table_info *t, seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; - xt_entry_foreach(iter, t->entries[cpu], t->size) { + xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; @@ -946,17 +938,13 @@ copy_entries_to_user(unsigned int total_size, struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; - const void *loc_cpu_entry; + void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on our node/cpu, ... - * This choice is lazy (because current thread is - * allowed to migrate to another cpu) - */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; goto free_counters; @@ -1070,10 +1058,10 @@ static int compat_table_info(const struct xt_table_info *info, if (!newinfo || !info) return -EINVAL; - /* we dont care about newinfo->entries[] */ + /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; - loc_cpu_entry = info->entries[raw_smp_processor_id()]; + loc_cpu_entry = info->entries; xt_compat_init_offsets(AF_INET, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); @@ -1194,7 +1182,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; - void *loc_cpu_old_entry; struct ipt_entry *iter; ret = 0; @@ -1237,8 +1224,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ - loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) cleanup_entry(iter, net); xt_free_table_info(oldinfo); @@ -1284,8 +1270,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1316,7 +1301,7 @@ static int do_add_counters(struct net *net, const void __user *user, unsigned int len, int compat) { - unsigned int i, curcpu; + unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1326,7 +1311,6 @@ do_add_counters(struct net *net, const void __user *user, struct xt_table *t; const struct xt_table_info *private; int ret = 0; - void *loc_cpu_entry; struct ipt_entry *iter; unsigned int addend; #ifdef CONFIG_COMPAT @@ -1382,11 +1366,8 @@ do_add_counters(struct net *net, const void __user *user, } i = 0; - /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); - loc_cpu_entry = private->entries[curcpu]; addend = xt_write_recseq_begin(); - xt_entry_foreach(iter, loc_cpu_entry, private->size) { + xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters *tmp; tmp = xt_get_this_cpu_counter(&iter->counters); @@ -1739,7 +1720,7 @@ translate_compat_table(struct net *net, newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } - entry1 = newinfo->entries[raw_smp_processor_id()]; + entry1 = newinfo->entries; pos = entry1; size = total_size; xt_entry_foreach(iter0, entry0, total_size) { @@ -1791,11 +1772,6 @@ translate_compat_table(struct net *net, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) - if (newinfo->entries[i] && newinfo->entries[i] != entry1) - memcpy(newinfo->entries[i], entry1, newinfo->size); - *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1842,8 +1818,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1914,7 +1889,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *pos; unsigned int size; int ret = 0; - const void *loc_cpu_entry; unsigned int i = 0; struct ipt_entry *iter; @@ -1922,14 +1896,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on our node/cpu, ... - * This choice is lazy (because current thread is - * allowed to migrate to another cpu) - */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; pos = userptr; size = total_size; - xt_entry_foreach(iter, loc_cpu_entry, total_size) { + xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) @@ -2104,8 +2073,7 @@ struct xt_table *ipt_register_table(struct net *net, goto out; } - /* choose the copy on our node/cpu, but dont care about preemption */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(net, newinfo, loc_cpu_entry, repl); @@ -2136,7 +2104,7 @@ void ipt_unregister_table(struct net *net, struct xt_table *table) private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter, net); if (private->number > private->initial_entries) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index a1190ee14723..80a7f0d38aa4 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -283,15 +283,13 @@ static void trace_packet(const struct sk_buff *skb, const struct xt_table_info *private, const struct ip6t_entry *e) { - const void *table_base; const struct ip6t_entry *root; const char *hookname, *chainname, *comment; const struct ip6t_entry *iter; unsigned int rulenum = 0; struct net *net = dev_net(in ? in : out); - table_base = private->entries[smp_processor_id()]; - root = get_entry(table_base, private->hook_entry[hook]); + root = get_entry(private->entries, private->hook_entry[hook]); hookname = chainname = hooknames[hook]; comment = comments[NF_IP6_TRACE_COMMENT_RULE]; @@ -357,7 +355,7 @@ ip6t_do_table(struct sk_buff *skb, */ smp_read_barrier_depends(); cpu = smp_processor_id(); - table_base = private->entries[cpu]; + table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; @@ -890,12 +888,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) { - if (newinfo->entries[i] && newinfo->entries[i] != entry0) - memcpy(newinfo->entries[i], entry0, newinfo->size); - } - return ret; } @@ -911,7 +903,7 @@ get_counters(const struct xt_table_info *t, seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; - xt_entry_foreach(iter, t->entries[cpu], t->size) { + xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; @@ -959,17 +951,13 @@ copy_entries_to_user(unsigned int total_size, struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; - const void *loc_cpu_entry; + void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on our node/cpu, ... - * This choice is lazy (because current thread is - * allowed to migrate to another cpu) - */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; goto free_counters; @@ -1083,10 +1071,10 @@ static int compat_table_info(const struct xt_table_info *info, if (!newinfo || !info) return -EINVAL; - /* we dont care about newinfo->entries[] */ + /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; - loc_cpu_entry = info->entries[raw_smp_processor_id()]; + loc_cpu_entry = info->entries; xt_compat_init_offsets(AF_INET6, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); @@ -1207,7 +1195,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; - const void *loc_cpu_old_entry; struct ip6t_entry *iter; ret = 0; @@ -1250,8 +1237,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ - loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) cleanup_entry(iter, net); xt_free_table_info(oldinfo); @@ -1297,8 +1283,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1329,7 +1314,7 @@ static int do_add_counters(struct net *net, const void __user *user, unsigned int len, int compat) { - unsigned int i, curcpu; + unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1339,7 +1324,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, struct xt_table *t; const struct xt_table_info *private; int ret = 0; - const void *loc_cpu_entry; struct ip6t_entry *iter; unsigned int addend; #ifdef CONFIG_COMPAT @@ -1395,11 +1379,8 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, } i = 0; - /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); addend = xt_write_recseq_begin(); - loc_cpu_entry = private->entries[curcpu]; - xt_entry_foreach(iter, loc_cpu_entry, private->size) { + xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters *tmp; tmp = xt_get_this_cpu_counter(&iter->counters); @@ -1407,7 +1388,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, ++i; } xt_write_recseq_end(addend); - unlock_up_free: local_bh_enable(); xt_table_unlock(t); @@ -1750,7 +1730,7 @@ translate_compat_table(struct net *net, newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } - entry1 = newinfo->entries[raw_smp_processor_id()]; + entry1 = newinfo->entries; pos = entry1; size = total_size; xt_entry_foreach(iter0, entry0, total_size) { @@ -1802,11 +1782,6 @@ translate_compat_table(struct net *net, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) - if (newinfo->entries[i] && newinfo->entries[i] != entry1) - memcpy(newinfo->entries[i], entry1, newinfo->size); - *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1853,8 +1828,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1925,7 +1899,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *pos; unsigned int size; int ret = 0; - const void *loc_cpu_entry; unsigned int i = 0; struct ip6t_entry *iter; @@ -1933,14 +1906,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on our node/cpu, ... - * This choice is lazy (because current thread is - * allowed to migrate to another cpu) - */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; pos = userptr; size = total_size; - xt_entry_foreach(iter, loc_cpu_entry, total_size) { + xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) @@ -2115,8 +2083,7 @@ struct xt_table *ip6t_register_table(struct net *net, goto out; } - /* choose the copy on our node/cpu, but dont care about preemption */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(net, newinfo, loc_cpu_entry, repl); @@ -2146,7 +2113,7 @@ void ip6t_unregister_table(struct net *net, struct xt_table *table) private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter, net); if (private->number > private->initial_entries) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 83032464a4bd..6062ce3e862c 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -659,7 +659,6 @@ EXPORT_SYMBOL_GPL(xt_compat_target_to_user); struct xt_table_info *xt_alloc_table_info(unsigned int size) { struct xt_table_info *newinfo; - int cpu; /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) @@ -671,19 +670,14 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) newinfo->size = size; - for_each_possible_cpu(cpu) { - if (size <= PAGE_SIZE) - newinfo->entries[cpu] = kmalloc_node(size, - GFP_KERNEL, - cpu_to_node(cpu)); - else - newinfo->entries[cpu] = vmalloc_node(size, - cpu_to_node(cpu)); + if (size <= PAGE_SIZE) + newinfo->entries = kmalloc(size, GFP_KERNEL); + else + newinfo->entries = vmalloc(size); - if (newinfo->entries[cpu] == NULL) { - xt_free_table_info(newinfo); - return NULL; - } + if (newinfo->entries == NULL) { + xt_free_table_info(newinfo); + return NULL; } return newinfo; @@ -694,8 +688,7 @@ void xt_free_table_info(struct xt_table_info *info) { int cpu; - for_each_possible_cpu(cpu) - kvfree(info->entries[cpu]); + kvfree(info->entries); if (info->jumpstack != NULL) { for_each_possible_cpu(cpu) -- cgit v1.2.3 From 1e98a0f08abddde87f0f93237f10629ecb4880ef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Jun 2015 19:31:32 -0700 Subject: flow_dissector: fix ipv6 dst, hop-by-hop and routing ext hdrs __skb_header_pointer() returns a pointer that must be checked. Fixes infinite loop reported by Alexei, and add __must_check to catch these errors earlier. Fixes: 6a74fcf426f5 ("flow_dissector: add support for dst, hop-by-hop and routing ext hdrs") Reported-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: Eric Dumazet Acked-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++---- net/core/flow_dissector.c | 6 ++++-- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cc612fc0a894..a7acc92aa668 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2743,8 +2743,9 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum); -static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *data, int hlen, void *buffer) +static inline void * __must_check +__skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *data, int hlen, void *buffer) { if (hlen - offset >= len) return data + offset; @@ -2756,8 +2757,8 @@ static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset, return buffer; } -static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *buffer) +static inline void * __must_check +skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) { return __skb_header_pointer(skb, offset, len, skb->data, skb_headlen(skb), buffer); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 22e4dffa0c8b..476e5dda59e1 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -394,9 +394,11 @@ ip_proto_again: opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr), data, hlen, &_opthdr); + if (!opthdr) + return false; - ip_proto = _opthdr[0]; - nhoff += (_opthdr[1] + 1) << 3; + ip_proto = opthdr[0]; + nhoff += (opthdr[1] + 1) << 3; goto ip_proto_again; } -- cgit v1.2.3 From aaeb6e24f5b6cb6a664fbdec6e08b65c3173c1b3 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 12 Jun 2015 21:07:54 +0200 Subject: netfilter: ipset: Use MSEC_PER_SEC consistently Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set_timeout.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 83c2f9e0886c..3c8842bcedaa 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -61,7 +61,7 @@ ip_set_timeout_set(unsigned long *timeout, u32 t) return; } - *timeout = msecs_to_jiffies(t * 1000) + jiffies; + *timeout = msecs_to_jiffies(t * MSEC_PER_SEC) + jiffies; if (*timeout == IPSET_ELEM_PERMANENT) /* Bingo! :-) */ (*timeout)--; @@ -71,7 +71,7 @@ static inline u32 ip_set_timeout_get(unsigned long *timeout) { return *timeout == IPSET_ELEM_PERMANENT ? 0 : - jiffies_to_msecs(*timeout - jiffies)/1000; + jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC; } #endif /* __KERNEL__ */ -- cgit v1.2.3 From f690cbaed9fe4d77592e24139db7ad790641c4fd Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 12 Jun 2015 22:11:00 +0200 Subject: netfilter: ipset: Fix cidr handling for hash:*net* types Commit "Simplify cidr handling for hash:*net* types" broke the cidr handling for the hash:*net* types when the sets were used by the SET target: entries with invalid cidr values were added to the sets. Reported by Jonathan Johnson. Testsuite entry is added to verify the fix. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 2 -- net/netfilter/ipset/ip_set_hash_gen.h | 44 ++++++++++++++++------------ net/netfilter/ipset/ip_set_hash_ipportnet.c | 4 +-- net/netfilter/ipset/ip_set_hash_net.c | 4 +-- net/netfilter/ipset/ip_set_hash_netiface.c | 4 +-- net/netfilter/ipset/ip_set_hash_netnet.c | 8 ++--- net/netfilter/ipset/ip_set_hash_netport.c | 4 +-- net/netfilter/ipset/ip_set_hash_netportnet.c | 8 ++--- 8 files changed, 42 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index ffdfdc24952a..a6fe1ce96437 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -545,8 +545,6 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, { .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \ .timeout = (set)->timeout } -#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b)) - #define IPSET_CONCAT(a, b) a##b #define IPSET_TOKEN(a, b) IPSET_CONCAT(a, b) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 8dd82db05ed0..2f1985e71f6c 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -149,17 +149,21 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #endif /* cidr + 1 is stored in net_prefixes to support /0 */ -#define SCIDR(cidr, i) (__CIDR(cidr, i) + 1) +#define NCIDR_PUT(cidr) ((cidr) + 1) +#define NCIDR_GET(cidr) ((cidr) - 1) #ifdef IP_SET_HASH_WITH_NETS_PACKED /* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */ -#define GCIDR(cidr, i) (__CIDR(cidr, i) + 1) -#define NCIDR(cidr) (cidr) +#define DCIDR_PUT(cidr) ((cidr) - 1) +#define DCIDR_GET(cidr, i) (__CIDR(cidr, i) + 1) #else -#define GCIDR(cidr, i) (__CIDR(cidr, i)) -#define NCIDR(cidr) (cidr - 1) +#define DCIDR_PUT(cidr) (cidr) +#define DCIDR_GET(cidr, i) __CIDR(cidr, i) #endif +#define INIT_CIDR(cidr, host_mask) \ + DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask)) + #define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) #ifdef IP_SET_HASH_WITH_NET0 @@ -303,7 +307,8 @@ struct htype { #ifdef IP_SET_HASH_WITH_NETS /* Network cidr size book keeping when the hash stores different - * sized networks */ + * sized networks. cidr == real cidr + 1 to support /0. + */ static void mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) { @@ -498,8 +503,10 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) pr_debug("expired %u/%u\n", i, j); #ifdef IP_SET_HASH_WITH_NETS for (k = 0; k < IPSET_NET_COUNT; k++) - mtype_del_cidr(h, SCIDR(data->cidr, k), - nets_length, k); + mtype_del_cidr(h, + NCIDR_PUT(DCIDR_GET(data->cidr, + k)), + nets_length, k); #endif ip_set_ext_destroy(set, data); if (j != n->pos - 1) @@ -692,9 +699,9 @@ reuse_slot: data = ahash_data(n, j, set->dsize); #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) { - mtype_del_cidr(h, SCIDR(data->cidr, i), + mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(data->cidr, i)), NLEN(set->family), i); - mtype_add_cidr(h, SCIDR(d->cidr, i), + mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), NLEN(set->family), i); } #endif @@ -711,8 +718,8 @@ reuse_slot: data = ahash_data(n, n->pos++, set->dsize); #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) - mtype_add_cidr(h, SCIDR(d->cidr, i), NLEN(set->family), - i); + mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), + NLEN(set->family), i); #endif h->elements++; } @@ -772,8 +779,8 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, h->elements--; #ifdef IP_SET_HASH_WITH_NETS for (j = 0; j < IPSET_NET_COUNT; j++) - mtype_del_cidr(h, SCIDR(d->cidr, j), NLEN(set->family), - j); + mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)), + NLEN(set->family), j); #endif ip_set_ext_destroy(set, data); if (n->pos + AHASH_INIT_SIZE < n->size) { @@ -836,12 +843,13 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, for (; j < nets_length && h->nets[j].cidr[0] && !multi; j++) { #if IPSET_NET_COUNT == 2 mtype_data_reset_elem(d, &orig); - mtype_data_netmask(d, NCIDR(h->nets[j].cidr[0]), false); + mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false); for (k = 0; k < nets_length && h->nets[k].cidr[1] && !multi; k++) { - mtype_data_netmask(d, NCIDR(h->nets[k].cidr[1]), true); + mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]), + true); #else - mtype_data_netmask(d, NCIDR(h->nets[j].cidr[0])); + mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0])); #endif key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); @@ -889,7 +897,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* If we test an IP address and not a network address, * try all possible network sizes */ for (i = 0; i < IPSET_NET_COUNT; i++) - if (GCIDR(d->cidr, i) != SET_HOST_MASK(set->family)) + if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family)) break; if (i == IPSET_NET_COUNT) { ret = mtype_test_cidrs(set, d, ext, mext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index e38a029f3002..50248debdc8b 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -141,7 +141,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -389,7 +389,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index d19926ac3a03..089b23fd1a94 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -120,7 +120,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -288,7 +288,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 7b69fa28d774..aac20768f6f0 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -235,7 +235,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -469,7 +469,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 11eee0077b8b..ed9cc45084dd 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -141,8 +141,8 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); - e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); + e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; @@ -364,8 +364,8 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); - e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); + e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 7a6448cbd8fb..fbaf8138e5d4 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -136,7 +136,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -348,7 +348,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 6eb5f8798304..a828cbc8bed7 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -152,8 +152,8 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); - e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); + e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; @@ -418,8 +418,8 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netportnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); - e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); + e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK; -- cgit v1.2.3 From c4c997839cf92cb1037e43a85cdb4cbf44ed39a5 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 11:59:45 +0200 Subject: netfilter: ipset: Fix parallel resizing and listing of the same set When elements added to a hash:* type of set and resizing triggered, parallel listing could start to list the original set (before resizing) and "continue" with listing the new set. Fix it by references and using the original hash table for listing. Therefore the destroying of the original hash table may happen from the resizing or listing functions. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 13 ++++++---- net/netfilter/ipset/ip_set_core.c | 29 ++++++++++++---------- net/netfilter/ipset/ip_set_hash_gen.h | 44 +++++++++++++++++++++++++++++++--- 3 files changed, 66 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index a6fe1ce96437..5674b6ac6646 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -176,6 +176,9 @@ struct ip_set_type_variant { /* List elements */ int (*list)(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb); + /* Keep listing private when resizing runs parallel */ + void (*uref)(struct ip_set *set, struct netlink_callback *cb, + bool start); /* Return true if "b" set is the same as "a" * according to the create set parameters */ @@ -380,12 +383,12 @@ ip_set_init_counter(struct ip_set_counter *counter, /* Netlink CB args */ enum { - IPSET_CB_NET = 0, - IPSET_CB_DUMP, - IPSET_CB_INDEX, - IPSET_CB_ARG0, + IPSET_CB_NET = 0, /* net namespace */ + IPSET_CB_DUMP, /* dump single set/all sets */ + IPSET_CB_INDEX, /* set index */ + IPSET_CB_PRIVATE, /* set private data */ + IPSET_CB_ARG0, /* type specific */ IPSET_CB_ARG1, - IPSET_CB_ARG2, }; /* register and unregister set references */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 68ae551f2b39..777cac6fd64d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1211,12 +1211,16 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, static int ip_set_dump_done(struct netlink_callback *cb) { - struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET]; if (cb->args[IPSET_CB_ARG0]) { - pr_debug("release set %s\n", - ip_set(inst, cb->args[IPSET_CB_INDEX])->name); - __ip_set_put_byindex(inst, - (ip_set_id_t) cb->args[IPSET_CB_INDEX]); + struct ip_set_net *inst = + (struct ip_set_net *)cb->args[IPSET_CB_NET]; + ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX]; + struct ip_set *set = ip_set(inst, index); + + if (set->variant->uref) + set->variant->uref(set, cb, false); + pr_debug("release set %s\n", set->name); + __ip_set_put_byindex(inst, index); } return 0; } @@ -1247,12 +1251,6 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_setname_policy); - /* cb->args[IPSET_CB_NET]: net namespace - * [IPSET_CB_DUMP]: dump single set/all sets - * [IPSET_CB_INDEX]: set index - * [IPSET_CB_ARG0]: type specific - */ - if (cda[IPSET_ATTR_SETNAME]) { struct ip_set *set; @@ -1359,6 +1357,8 @@ dump_last: goto release_refcount; if (dump_flags & IPSET_FLAG_LIST_HEADER) goto next_set; + if (set->variant->uref) + set->variant->uref(set, cb, true); /* Fall through and add elements */ default: read_lock_bh(&set->lock); @@ -1375,6 +1375,8 @@ dump_last: dump_type = DUMP_LAST; cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16); cb->args[IPSET_CB_INDEX] = 0; + if (set && set->variant->uref) + set->variant->uref(set, cb, false); goto dump_last; } goto out; @@ -1389,7 +1391,10 @@ next_set: release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[IPSET_CB_ARG0]) { - pr_debug("release set %s\n", ip_set(inst, index)->name); + set = ip_set(inst, index); + if (set->variant->uref) + set->variant->uref(set, cb, false); + pr_debug("release set %s\n", set->name); __ip_set_put_byindex(inst, index); cb->args[IPSET_CB_ARG0] = 0; } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 2f1985e71f6c..5fcf70b0ebc2 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -71,6 +71,8 @@ struct hbucket { /* The hash table: the table size stored here in order to make resizing easy */ struct htable { + atomic_t ref; /* References for resizing */ + atomic_t uref; /* References for dumping */ u8 htable_bits; /* size of hash table == 2^htable_bits */ struct hbucket bucket[0]; /* hashtable buckets */ }; @@ -207,6 +209,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #undef mtype_del #undef mtype_test_cidrs #undef mtype_test +#undef mtype_uref #undef mtype_expire #undef mtype_resize #undef mtype_head @@ -248,6 +251,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) #define mtype_test IPSET_TOKEN(MTYPE, _test) +#define mtype_uref IPSET_TOKEN(MTYPE, _uref) #define mtype_expire IPSET_TOKEN(MTYPE, _expire) #define mtype_resize IPSET_TOKEN(MTYPE, _resize) #define mtype_head IPSET_TOKEN(MTYPE, _head) @@ -595,6 +599,9 @@ retry: t->htable_bits = htable_bits; read_lock_bh(&set->lock); + /* There can't be another parallel resizing, but dumping is possible */ + atomic_set(&orig->ref, 1); + atomic_inc(&orig->uref); for (i = 0; i < jhash_size(orig->htable_bits); i++) { n = hbucket(orig, i); for (j = 0; j < n->pos; j++) { @@ -609,6 +616,8 @@ retry: #ifdef IP_SET_HASH_WITH_NETS mtype_data_reset_flags(data, &flags); #endif + atomic_set(&orig->ref, 0); + atomic_dec(&orig->uref); read_unlock_bh(&set->lock); mtype_ahash_destroy(set, t, false); if (ret == -EAGAIN) @@ -631,7 +640,11 @@ retry: pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); - mtype_ahash_destroy(set, orig, false); + /* If there's nobody else dumping the table, destroy it */ + if (atomic_dec_and_test(&orig->uref)) { + pr_debug("Table destroy by resize %p\n", orig); + mtype_ahash_destroy(set, orig, false); + } return 0; } @@ -961,13 +974,36 @@ nla_put_failure: return -EMSGSIZE; } +/* Make possible to run dumping parallel with resizing */ +static void +mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start) +{ + struct htype *h = set->data; + struct htable *t; + + if (start) { + rcu_read_lock_bh(); + t = rcu_dereference_bh_nfnl(h->table); + atomic_inc(&t->uref); + cb->args[IPSET_CB_PRIVATE] = (unsigned long)t; + rcu_read_unlock_bh(); + } else if (cb->args[IPSET_CB_PRIVATE]) { + t = (struct htable *)cb->args[IPSET_CB_PRIVATE]; + if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { + /* Resizing didn't destroy the hash table */ + pr_debug("Table destroy by dump: %p\n", t); + mtype_ahash_destroy(set, t, false); + } + cb->args[IPSET_CB_PRIVATE] = 0; + } +} + /* Reply a LIST/SAVE request: dump the elements of the specified set */ static int mtype_list(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb) { - const struct htype *h = set->data; - const struct htable *t = rcu_dereference_bh_nfnl(h->table); + const struct htable *t; struct nlattr *atd, *nested; const struct hbucket *n; const struct mtype_elem *e; @@ -980,6 +1016,7 @@ mtype_list(const struct ip_set *set, if (!atd) return -EMSGSIZE; pr_debug("list hash set %s\n", set->name); + t = (const struct htable *)cb->args[IPSET_CB_PRIVATE]; for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); cb->args[IPSET_CB_ARG0]++) { incomplete = skb_tail_pointer(skb); @@ -1047,6 +1084,7 @@ static const struct ip_set_type_variant mtype_variant = { .flush = mtype_flush, .head = mtype_head, .list = mtype_list, + .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, }; -- cgit v1.2.3 From b57b2d1fa53fe8563bdfc66a33b844463b9af285 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 14:22:25 +0200 Subject: netfilter: ipset: Prepare the ipset core to use RCU at set level Replace rwlock_t with spinlock_t in "struct ip_set" and change the locking accordingly. Convert the comment extension into an rcu-avare object. Also, simplify the timeout routines. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 9 ++++-- include/linux/netfilter/ipset/ip_set_comment.h | 38 +++++++++++++++------- include/linux/netfilter/ipset/ip_set_timeout.h | 25 ++++++--------- net/netfilter/ipset/ip_set_core.c | 44 +++++++++++++------------- 4 files changed, 66 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 5674b6ac6646..19b4969a25fe 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -108,8 +108,13 @@ struct ip_set_counter { atomic64_t packets; }; +struct ip_set_comment_rcu { + struct rcu_head rcu; + char str[0]; +}; + struct ip_set_comment { - char *str; + struct ip_set_comment_rcu __rcu *c; }; struct ip_set_skbinfo { @@ -226,7 +231,7 @@ struct ip_set { /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ - rwlock_t lock; + spinlock_t lock; /* References to the set */ u32 ref; /* The core set type */ diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h index 21217ea008d7..8d0248525957 100644 --- a/include/linux/netfilter/ipset/ip_set_comment.h +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb) return nla_data(tb); } +/* Called from uadd only, protected by the set spinlock. + * The kadt functions don't use the comment extensions in any way. + */ static inline void ip_set_init_comment(struct ip_set_comment *comment, const struct ip_set_ext *ext) { + struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1); size_t len = ext->comment ? strlen(ext->comment) : 0; - if (unlikely(comment->str)) { - kfree(comment->str); - comment->str = NULL; + if (unlikely(c)) { + kfree_rcu(c, rcu); + rcu_assign_pointer(comment->c, NULL); } if (!len) return; if (unlikely(len > IPSET_MAX_COMMENT_SIZE)) len = IPSET_MAX_COMMENT_SIZE; - comment->str = kzalloc(len + 1, GFP_ATOMIC); - if (unlikely(!comment->str)) + c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC); + if (unlikely(!c)) return; - strlcpy(comment->str, ext->comment, len + 1); + strlcpy(c->str, ext->comment, len + 1); + rcu_assign_pointer(comment->c, c); } +/* Used only when dumping a set, protected by rcu_read_lock_bh() */ static inline int ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment) { - if (!comment->str) + struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c); + + if (!c) return 0; - return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str); + return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str); } +/* Called from uadd/udel, flush or the garbage collectors protected + * by the set spinlock. + * Called when the set is destroyed and when there can't be any user + * of the set data anymore. + */ static inline void ip_set_comment_free(struct ip_set_comment *comment) { - if (unlikely(!comment->str)) + struct ip_set_comment_rcu *c; + + c = rcu_dereference_protected(comment->c, 1); + if (unlikely(!c)) return; - kfree(comment->str); - comment->str = NULL; + kfree_rcu(c, rcu); + rcu_assign_pointer(comment->c, NULL); } #endif diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 3c8842bcedaa..1d6a935c1ac5 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -40,31 +40,26 @@ ip_set_timeout_uget(struct nlattr *tb) } static inline bool -ip_set_timeout_test(unsigned long timeout) +ip_set_timeout_expired(unsigned long *t) { - return timeout == IPSET_ELEM_PERMANENT || - time_is_after_jiffies(timeout); -} - -static inline bool -ip_set_timeout_expired(unsigned long *timeout) -{ - return *timeout != IPSET_ELEM_PERMANENT && - time_is_before_jiffies(*timeout); + return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t); } static inline void -ip_set_timeout_set(unsigned long *timeout, u32 t) +ip_set_timeout_set(unsigned long *timeout, u32 value) { - if (!t) { + unsigned long t; + + if (!value) { *timeout = IPSET_ELEM_PERMANENT; return; } - *timeout = msecs_to_jiffies(t * MSEC_PER_SEC) + jiffies; - if (*timeout == IPSET_ELEM_PERMANENT) + t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies; + if (t == IPSET_ELEM_PERMANENT) /* Bingo! :-) */ - (*timeout)--; + t--; + *timeout = t; } static inline u32 diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 87b4182660ba..2b21a1983a98 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -209,15 +209,15 @@ ip_set_type_register(struct ip_set_type *type) pr_warn("ip_set type %s, family %s with revision min %u already registered!\n", type->name, family_name(type->family), type->revision_min); - ret = -EINVAL; - goto unlock; + ip_set_type_unlock(); + return -EINVAL; } list_add_rcu(&type->list, &ip_set_type_list); pr_debug("type %s, family %s, revision %u:%u registered.\n", type->name, family_name(type->family), type->revision_min, type->revision_max); -unlock: ip_set_type_unlock(); + return ret; } EXPORT_SYMBOL_GPL(ip_set_type_register); @@ -231,12 +231,12 @@ ip_set_type_unregister(struct ip_set_type *type) pr_warn("ip_set type %s, family %s with revision min %u not registered\n", type->name, family_name(type->family), type->revision_min); - goto unlock; + ip_set_type_unlock(); + return; } list_del_rcu(&type->list); pr_debug("type %s, family %s with revision min %u unregistered.\n", type->name, family_name(type->family), type->revision_min); -unlock: ip_set_type_unlock(); synchronize_rcu(); @@ -531,16 +531,16 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return 0; - read_lock_bh(&set->lock); + rcu_read_lock_bh(); ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt); - read_unlock_bh(&set->lock); + rcu_read_unlock_bh(); if (ret == -EAGAIN) { /* Type requests element to be completed */ pr_debug("element must be completed, ADD is triggered\n"); - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); set->variant->kadt(set, skb, par, IPSET_ADD, opt); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); ret = 1; } else { /* --return-nomatch: invert matched element */ @@ -570,9 +570,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); return ret; } @@ -593,9 +593,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); return ret; } @@ -880,7 +880,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, set = kzalloc(sizeof(struct ip_set), GFP_KERNEL); if (!set) return -ENOMEM; - rwlock_init(&set->lock); + spin_lock_init(&set->lock); strlcpy(set->name, name, IPSET_MAXNAMELEN); set->family = family; set->revision = revision; @@ -1062,9 +1062,9 @@ ip_set_flush_set(struct ip_set *set) { pr_debug("set: %s\n", set->name); - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); set->variant->flush(set); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); } static int @@ -1377,9 +1377,9 @@ dump_last: set->variant->uref(set, cb, true); /* Fall through and add elements */ default: - read_lock_bh(&set->lock); + rcu_read_lock_bh(); ret = set->variant->list(set, skb, cb); - read_unlock_bh(&set->lock); + rcu_read_unlock_bh(); if (!cb->args[IPSET_CB_ARG0]) /* Set is done, proceed with next one */ goto next_set; @@ -1462,9 +1462,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, bool eexist = flags & IPSET_FLAG_EXIST, retried = false; do { - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); retried = true; } while (ret == -EAGAIN && set->variant->resize && @@ -1644,9 +1644,9 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; - read_lock_bh(&set->lock); + rcu_read_lock_bh(); ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0); - read_unlock_bh(&set->lock); + rcu_read_unlock_bh(); /* Userspace can't trigger element to be re-added */ if (ret == -EAGAIN) ret = 1; -- cgit v1.2.3 From ca0f6a5cd99e0c6ba4bb78dc402817f636370f26 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 19:45:33 +0200 Subject: netfilter: ipset: Fix coding styles reported by checkpatch.pl Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 5 +- include/uapi/linux/netfilter/ipset/ip_set.h | 6 +- net/netfilter/ipset/ip_set_bitmap_gen.h | 11 +- net/netfilter/ipset/ip_set_bitmap_ip.c | 12 +- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 21 +-- net/netfilter/ipset/ip_set_bitmap_port.c | 7 +- net/netfilter/ipset/ip_set_core.c | 201 +++++++++++++-------------- net/netfilter/ipset/ip_set_getport.c | 13 +- net/netfilter/ipset/ip_set_hash_gen.h | 55 ++++---- net/netfilter/ipset/ip_set_hash_ip.c | 4 +- net/netfilter/ipset/ip_set_hash_ipmark.c | 9 +- net/netfilter/ipset/ip_set_hash_ipport.c | 14 +- net/netfilter/ipset/ip_set_hash_ipportip.c | 16 ++- net/netfilter/ipset/ip_set_hash_ipportnet.c | 19 ++- net/netfilter/ipset/ip_set_hash_mac.c | 6 +- net/netfilter/ipset/ip_set_hash_net.c | 8 +- net/netfilter/ipset/ip_set_hash_netiface.c | 25 ++-- net/netfilter/ipset/ip_set_hash_netnet.c | 46 +++--- net/netfilter/ipset/ip_set_hash_netport.c | 19 ++- net/netfilter/ipset/ip_set_hash_netportnet.c | 54 +++---- net/netfilter/ipset/ip_set_list_set.c | 11 +- net/netfilter/ipset/pfxlen.c | 16 +-- net/netfilter/xt_set.c | 44 +++--- 23 files changed, 327 insertions(+), 295 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 19b4969a25fe..48bb01edcf30 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -349,12 +349,11 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo) cpu_to_be64((u64)skbinfo->skbmark << 32 | skbinfo->skbmarkmask))) || (skbinfo->skbprio && - nla_put_net32(skb, IPSET_ATTR_SKBPRIO, + nla_put_net32(skb, IPSET_ATTR_SKBPRIO, cpu_to_be32(skbinfo->skbprio))) || (skbinfo->skbqueue && - nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, + nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, cpu_to_be16(skbinfo->skbqueue))); - } static inline void diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 5ab4e60894cf..63b2e34f1b60 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -15,12 +15,12 @@ /* The protocol version */ #define IPSET_PROTOCOL 6 -/* The maximum permissible comment length we will accept over netlink */ -#define IPSET_MAX_COMMENT_SIZE 255 - /* The max length of strings including NUL: set and type identifiers */ #define IPSET_MAXNAMELEN 32 +/* The maximum permissible comment length we will accept over netlink */ +#define IPSET_MAX_COMMENT_SIZE 255 + /* Message types and commands */ enum ipset_cmd { IPSET_CMD_NONE, diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 86429f369128..d05e759ed0fa 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -41,7 +41,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) struct mtype *map = set->data; init_timer(&map->gc); - map->gc.data = (unsigned long) set; + map->gc.data = (unsigned long)set; map->gc.function = gc; map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); @@ -223,7 +223,7 @@ mtype_list(const struct ip_set *set, if (!test_bit(id, map->members) || (SET_WITH_TIMEOUT(set) && #ifdef IP_SET_BITMAP_STORED_TIMEOUT - mtype_is_filled((const struct mtype_elem *) x) && + mtype_is_filled((const struct mtype_elem *)x) && #endif ip_set_timeout_expired(ext_timeout(x, set)))) continue; @@ -240,7 +240,7 @@ mtype_list(const struct ip_set *set, if (mtype_do_list(skb, map, id, set->dsize)) goto nla_put_failure; if (ip_set_put_extensions(skb, set, x, - mtype_is_filled((const struct mtype_elem *) x))) + mtype_is_filled((const struct mtype_elem *)x))) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -266,13 +266,14 @@ out: static void mtype_gc(unsigned long ul_set) { - struct ip_set *set = (struct ip_set *) ul_set; + struct ip_set *set = (struct ip_set *)ul_set; struct mtype *map = set->data; void *x; u32 id; /* We run parallel with other readers (test element) - * but adding/deleting new entries is locked out */ + * but adding/deleting new entries is locked out + */ spin_lock_bh(&set->lock); for (id = 0; id < map->elements; id++) if (mtype_gc_test(id, map, set->dsize)) { diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index b8ce474c038d..64a564334418 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -59,7 +59,7 @@ struct bitmap_ip_adt_elem { static inline u32 ip_to_id(const struct bitmap_ip *m, u32 ip) { - return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts; + return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip) / m->hosts; } /* Common functions */ @@ -175,8 +175,9 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); - } else + } else { ip_to = ip; + } if (ip_to > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; @@ -187,8 +188,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } @@ -278,8 +279,9 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], if (cidr >= HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(first_ip, last_ip, cidr); - } else + } else { return -IPSET_ERR_PROTOCOL; + } if (tb[IPSET_ATTR_NETMASK]) { netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index fe00e87decc8..1430535118fb 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -90,7 +90,7 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e, return 0; elem = get_elem(map->extensions, e->id, dsize); if (elem->filled == MAC_FILLED) - return e->ether == NULL || + return !e->ether || ether_addr_equal(e->ether, elem->ether); /* Trigger kernel to fill out the ethernet address */ return -EAGAIN; @@ -131,7 +131,8 @@ bitmap_ipmac_add_timeout(unsigned long *timeout, /* If MAC is unset yet, we store plain timeout value * because the timer is not activated yet * and we can reuse it later when MAC is filled out, - * possibly by the kernel */ + * possibly by the kernel + */ if (e->ether) ip_set_timeout_set(timeout, t); else @@ -155,7 +156,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e, /* memcpy isn't atomic */ clear_bit(e->id, map->members); smp_mb__after_atomic(); - memcpy(elem->ether, e->ether, ETH_ALEN); + ether_addr_copy(elem->ether, e->ether); } return IPSET_ADD_FAILED; } else if (!e->ether) @@ -164,19 +165,18 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e, /* Fill the MAC address and trigger the timer activation */ clear_bit(e->id, map->members); smp_mb__after_atomic(); - memcpy(elem->ether, e->ether, ETH_ALEN); + ether_addr_copy(elem->ether, e->ether); elem->filled = MAC_FILLED; return IPSET_ADD_START_STORED_TIMEOUT; } else if (e->ether) { /* We can store MAC too */ - memcpy(elem->ether, e->ether, ETH_ALEN); + ether_addr_copy(elem->ether, e->ether); elem->filled = MAC_FILLED; return 0; - } else { - elem->filled = MAC_UNSET; - /* MAC is not stored yet, don't start timer */ - return IPSET_ADD_STORE_PLAIN_TIMEOUT; } + elem->filled = MAC_UNSET; + /* MAC is not stored yet, don't start timer */ + return IPSET_ADD_STORE_PLAIN_TIMEOUT; } static inline int @@ -352,8 +352,9 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], if (cidr >= HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(first_ip, last_ip, cidr); - } else + } else { return -IPSET_ERR_PROTOCOL; + } elements = (u64)last_ip - first_ip + 1; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 2d360f951d18..5338ccd5da46 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -162,8 +162,9 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], if (port < map->first_port) return -IPSET_ERR_BITMAP_RANGE; } - } else + } else { port_to = port; + } if (port_to > map->last_port) return -IPSET_ERR_BITMAP_RANGE; @@ -174,8 +175,8 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 2b21a1983a98..338b4047776f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -35,6 +35,7 @@ struct ip_set_net { bool is_deleted; /* deleted by ip_set_net_exit */ bool is_destroyed; /* all sets are destroyed */ }; + static int ip_set_net_id __read_mostly; static inline struct ip_set_net *ip_set_pernet(struct net *net) @@ -60,8 +61,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); #define ip_set(inst, id) \ ip_set_dereference((inst)->ip_set_list)[id] -/* - * The set types are implemented in modules and registered set types +/* The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is * serialized by ip_set_type_mutex. */ @@ -131,7 +131,8 @@ __find_set_type_get(const char *name, u8 family, u8 revision, goto unlock; } /* Make sure the type is already loaded - * but we don't support the revision */ + * but we don't support the revision + */ list_for_each_entry_rcu(type, &ip_set_type_list, list) if (STRNCMP(type->name, name)) { err = -IPSET_ERR_FIND_TYPE; @@ -290,7 +291,7 @@ static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = { int ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) { - struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1]; + struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1]; if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; @@ -307,7 +308,7 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4); int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) { - struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1]; + struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1]; if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; @@ -318,7 +319,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) return -IPSET_ERR_PROTOCOL; memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]), - sizeof(struct in6_addr)); + sizeof(struct in6_addr)); return 0; } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); @@ -467,8 +468,7 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, } EXPORT_SYMBOL_GPL(ip_set_put_extensions); -/* - * Creating/destroying/renaming/swapping affect the existence and +/* Creating/destroying/renaming/swapping affect the existence and * the properties of a set. All of these can be executed from userspace * only and serialized by the nfnl mutex indirectly from nfnetlink. * @@ -495,8 +495,7 @@ __ip_set_put(struct ip_set *set) write_unlock_bh(&ip_set_ref_lock); } -/* - * Add, del and test set entries from kernel. +/* Add, del and test set entries from kernel. * * The set behind the index must exist and must be referenced * so it can't be destroyed (or changed) under our foot. @@ -524,7 +523,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, dev_net(par->in ? par->in : par->out), index); int ret = 0; - BUG_ON(set == NULL); + BUG_ON(!set); pr_debug("set %s, index %u\n", set->name, index); if (opt->dim < set->type->dimension || @@ -563,7 +562,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, dev_net(par->in ? par->in : par->out), index); int ret; - BUG_ON(set == NULL); + BUG_ON(!set); pr_debug("set %s, index %u\n", set->name, index); if (opt->dim < set->type->dimension || @@ -586,7 +585,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, dev_net(par->in ? par->in : par->out), index); int ret = 0; - BUG_ON(set == NULL); + BUG_ON(!set); pr_debug("set %s, index %u\n", set->name, index); if (opt->dim < set->type->dimension || @@ -601,8 +600,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, } EXPORT_SYMBOL_GPL(ip_set_del); -/* - * Find set by name, reference it once. The reference makes sure the +/* Find set by name, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. * */ @@ -616,7 +614,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) rcu_read_lock(); for (i = 0; i < inst->ip_set_max; i++) { s = rcu_dereference(inst->ip_set_list)[i]; - if (s != NULL && STRNCMP(s->name, name)) { + if (s && STRNCMP(s->name, name)) { __ip_set_get(s); index = i; *set = s; @@ -629,8 +627,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) } EXPORT_SYMBOL_GPL(ip_set_get_byname); -/* - * If the given set pointer points to a valid set, decrement +/* If the given set pointer points to a valid set, decrement * reference count by 1. The caller shall not assume the index * to be valid, after calling this function. * @@ -643,7 +640,7 @@ __ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index) rcu_read_lock(); set = rcu_dereference(inst->ip_set_list)[index]; - if (set != NULL) + if (set) __ip_set_put(set); rcu_read_unlock(); } @@ -657,8 +654,7 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index) } EXPORT_SYMBOL_GPL(ip_set_put_byindex); -/* - * Get the name of a set behind a set index. +/* Get the name of a set behind a set index. * We assume the set is referenced, so it does exist and * can't be destroyed. The set cannot be renamed due to * the referencing either. @@ -669,7 +665,7 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index) { const struct ip_set *set = ip_set_rcu_get(net, index); - BUG_ON(set == NULL); + BUG_ON(!set); BUG_ON(set->ref == 0); /* Referenced, so it's safe */ @@ -677,13 +673,11 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index) } EXPORT_SYMBOL_GPL(ip_set_name_byindex); -/* - * Routines to call by external subsystems, which do not +/* Routines to call by external subsystems, which do not * call nfnl_lock for us. */ -/* - * Find set by index, reference it once. The reference makes sure the +/* Find set by index, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. * * The nfnl mutex is used in the function. @@ -709,8 +703,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) } EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex); -/* - * If the given set pointer points to a valid set, decrement +/* If the given set pointer points to a valid set, decrement * reference count by 1. The caller shall not assume the index * to be valid, after calling this function. * @@ -725,15 +718,14 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index) nfnl_lock(NFNL_SUBSYS_IPSET); if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */ set = ip_set(inst, index); - if (set != NULL) + if (set) __ip_set_put(set); } nfnl_unlock(NFNL_SUBSYS_IPSET); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); -/* - * Communication protocol with userspace over netlink. +/* Communication protocol with userspace over netlink. * * The commands are serialized by the nfnl mutex. */ @@ -760,7 +752,7 @@ start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags, nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), sizeof(*nfmsg), flags); - if (nlh == NULL) + if (!nlh) return NULL; nfmsg = nlmsg_data(nlh); @@ -793,7 +785,7 @@ find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id) *id = IPSET_INVALID_ID; for (i = 0; i < inst->ip_set_max; i++) { set = ip_set(inst, i); - if (set != NULL && STRNCMP(set->name, name)) { + if (set && STRNCMP(set->name, name)) { *id = i; break; } @@ -819,7 +811,7 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, *index = IPSET_INVALID_ID; for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s == NULL) { + if (!s) { if (*index == IPSET_INVALID_ID) *index = i; } else if (STRNCMP(name, s->name)) { @@ -851,18 +843,18 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *clash = NULL; ip_set_id_t index = IPSET_INVALID_ID; - struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; + struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {}; const char *name, *typename; u8 family, revision; u32 flags = flag_exist(nlh); int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || - attr[IPSET_ATTR_TYPENAME] == NULL || - attr[IPSET_ATTR_REVISION] == NULL || - attr[IPSET_ATTR_FAMILY] == NULL || - (attr[IPSET_ATTR_DATA] != NULL && + !attr[IPSET_ATTR_SETNAME] || + !attr[IPSET_ATTR_TYPENAME] || + !attr[IPSET_ATTR_REVISION] || + !attr[IPSET_ATTR_FAMILY] || + (attr[IPSET_ATTR_DATA] && !flag_nested(attr[IPSET_ATTR_DATA])))) return -IPSET_ERR_PROTOCOL; @@ -873,11 +865,10 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n", name, typename, family_name(family), revision); - /* - * First, and without any locks, allocate and initialize + /* First, and without any locks, allocate and initialize * a normal base set structure. */ - set = kzalloc(sizeof(struct ip_set), GFP_KERNEL); + set = kzalloc(sizeof(*set), GFP_KERNEL); if (!set) return -ENOMEM; spin_lock_init(&set->lock); @@ -885,21 +876,18 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, set->family = family; set->revision = revision; - /* - * Next, check that we know the type, and take + /* Next, check that we know the type, and take * a reference on the type, to make sure it stays available * while constructing our new set. * * After referencing the type, we try to create the type * specific part of the set without holding any locks. */ - ret = find_set_type_get(typename, family, revision, &(set->type)); + ret = find_set_type_get(typename, family, revision, &set->type); if (ret) goto out; - /* - * Without holding any locks, create private part. - */ + /* Without holding any locks, create private part. */ if (attr[IPSET_ATTR_DATA] && nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], set->type->create_policy)) { @@ -913,8 +901,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, /* BTW, ret==0 here. */ - /* - * Here, we have a valid, constructed set and we are protected + /* Here, we have a valid, constructed set and we are protected * by the nfnl mutex. Find the first free index in ip_set_list * and check clashing. */ @@ -937,7 +924,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, /* Wraparound */ goto cleanup; - list = kzalloc(sizeof(struct ip_set *) * i, GFP_KERNEL); + list = kcalloc(i, sizeof(struct ip_set *), GFP_KERNEL); if (!list) goto cleanup; /* nfnl mutex is held, both lists are valid */ @@ -951,12 +938,11 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, inst->ip_set_max = i; kfree(tmp); ret = 0; - } else if (ret) + } else if (ret) { goto cleanup; + } - /* - * Finally! Add our shiny new set to the list, and be done. - */ + /* Finally! Add our shiny new set to the list, and be done. */ pr_debug("create: '%s' created with index %u!\n", set->name, index); ip_set(inst, index) = set; @@ -1018,7 +1004,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s != NULL && s->ref) { + if (s && s->ref) { ret = -IPSET_ERR_BUSY; goto out; } @@ -1037,7 +1023,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, } else { s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); - if (s == NULL) { + if (!s) { ret = -ENOENT; goto out; } else if (s->ref) { @@ -1082,12 +1068,12 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s != NULL) + if (s) ip_set_flush_set(s); } } else { s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (s == NULL) + if (!s) return -ENOENT; ip_set_flush_set(s); @@ -1119,12 +1105,12 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || - attr[IPSET_ATTR_SETNAME2] == NULL)) + !attr[IPSET_ATTR_SETNAME] || + !attr[IPSET_ATTR_SETNAME2])) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (set == NULL) + if (!set) return -ENOENT; read_lock_bh(&ip_set_ref_lock); @@ -1136,7 +1122,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s != NULL && STRNCMP(s->name, name2)) { + if (s && STRNCMP(s->name, name2)) { ret = -IPSET_ERR_EXIST_SETNAME2; goto out; } @@ -1168,23 +1154,24 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, char from_name[IPSET_MAXNAMELEN]; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || - attr[IPSET_ATTR_SETNAME2] == NULL)) + !attr[IPSET_ATTR_SETNAME] || + !attr[IPSET_ATTR_SETNAME2])) return -IPSET_ERR_PROTOCOL; from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &from_id); - if (from == NULL) + if (!from) return -ENOENT; to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id); - if (to == NULL) + if (!to) return -IPSET_ERR_EXIST_SETNAME2; /* Features must not change. - * Not an artificial restriction anymore, as we must prevent - * possible loops created by swapping in setlist type of sets. */ + * Not an artifical restriction anymore, as we must prevent + * possible loops created by swapping in setlist type of sets. + */ if (!(from->type->features == to->type->features && from->family == to->family)) return -IPSET_ERR_TYPE_MISMATCH; @@ -1246,7 +1233,7 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) { struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); - struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; + struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1]; struct nlattr *attr = (void *)nlh + min_len; u32 dump_type; ip_set_id_t index; @@ -1260,16 +1247,18 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), &index); - if (set == NULL) + if (!set) return -ENOENT; dump_type = DUMP_ONE; cb->args[IPSET_CB_INDEX] = index; - } else + } else { dump_type = DUMP_ALL; + } if (cda[IPSET_ATTR_FLAGS]) { u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]); + dump_type |= (f << 16); } cb->args[IPSET_CB_NET] = (unsigned long)inst; @@ -1295,7 +1284,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) if (ret < 0) { nlh = nlmsg_hdr(cb->skb); /* We have to create and send the error message - * manually :-( */ + * manually :-( + */ if (nlh->nlmsg_flags & NLM_F_ACK) netlink_ack(cb->skb, nlh, ret); return ret; @@ -1313,7 +1303,7 @@ dump_last: pr_debug("dump type, flag: %u %u index: %ld\n", dump_type, dump_flags, cb->args[IPSET_CB_INDEX]); for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) { - index = (ip_set_id_t) cb->args[IPSET_CB_INDEX]; + index = (ip_set_id_t)cb->args[IPSET_CB_INDEX]; write_lock_bh(&ip_set_ref_lock); set = ip_set(inst, index); is_destroyed = inst->is_destroyed; @@ -1480,12 +1470,12 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, size_t payload = min(SIZE_MAX, sizeof(*errmsg) + nlmsg_len(nlh)); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); - struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; + struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1]; struct nlattr *cmdattr; u32 *errline; skb2 = nlmsg_new(payload, GFP_KERNEL); - if (skb2 == NULL) + if (!skb2) return -ENOMEM; rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); @@ -1502,7 +1492,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, *errline = lineno; - netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); + netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, + MSG_DONTWAIT); /* Signal netlink not to send its ACK/errmsg. */ return -EINTR; } @@ -1517,25 +1508,25 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, { struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; - struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; + struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; const struct nlattr *nla; u32 flags = flag_exist(nlh); bool use_lineno; int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || + !attr[IPSET_ATTR_SETNAME] || !((attr[IPSET_ATTR_DATA] != NULL) ^ (attr[IPSET_ATTR_ADT] != NULL)) || - (attr[IPSET_ATTR_DATA] != NULL && + (attr[IPSET_ATTR_DATA] && !flag_nested(attr[IPSET_ATTR_DATA])) || - (attr[IPSET_ATTR_ADT] != NULL && + (attr[IPSET_ATTR_ADT] && (!flag_nested(attr[IPSET_ATTR_ADT]) || - attr[IPSET_ATTR_LINENO] == NULL)))) + !attr[IPSET_ATTR_LINENO])))) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (set == NULL) + if (!set) return -ENOENT; use_lineno = !!attr[IPSET_ATTR_LINENO]; @@ -1572,25 +1563,25 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, { struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; - struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; + struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; const struct nlattr *nla; u32 flags = flag_exist(nlh); bool use_lineno; int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || + !attr[IPSET_ATTR_SETNAME] || !((attr[IPSET_ATTR_DATA] != NULL) ^ (attr[IPSET_ATTR_ADT] != NULL)) || - (attr[IPSET_ATTR_DATA] != NULL && + (attr[IPSET_ATTR_DATA] && !flag_nested(attr[IPSET_ATTR_DATA])) || - (attr[IPSET_ATTR_ADT] != NULL && + (attr[IPSET_ATTR_ADT] && (!flag_nested(attr[IPSET_ATTR_ADT]) || - attr[IPSET_ATTR_LINENO] == NULL)))) + !attr[IPSET_ATTR_LINENO])))) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (set == NULL) + if (!set) return -ENOENT; use_lineno = !!attr[IPSET_ATTR_LINENO]; @@ -1627,17 +1618,17 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, { struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; - struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; + struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || - attr[IPSET_ATTR_DATA] == NULL || + !attr[IPSET_ATTR_SETNAME] || + !attr[IPSET_ATTR_DATA] || !flag_nested(attr[IPSET_ATTR_DATA]))) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (set == NULL) + if (!set) return -ENOENT; if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], @@ -1668,15 +1659,15 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL)) + !attr[IPSET_ATTR_SETNAME])) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (set == NULL) + if (!set) return -ENOENT; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) + if (!skb2) return -ENOMEM; nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, @@ -1725,8 +1716,8 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_TYPENAME] == NULL || - attr[IPSET_ATTR_FAMILY] == NULL)) + !attr[IPSET_ATTR_TYPENAME] || + !attr[IPSET_ATTR_FAMILY])) return -IPSET_ERR_PROTOCOL; family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); @@ -1736,7 +1727,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, return ret; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) + if (!skb2) return -ENOMEM; nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, @@ -1781,11 +1772,11 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, struct nlmsghdr *nlh2; int ret = 0; - if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL)) + if (unlikely(!attr[IPSET_ATTR_PROTOCOL])) return -IPSET_ERR_PROTOCOL; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) + if (!skb2) return -ENOMEM; nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, @@ -1913,7 +1904,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) ret = -EFAULT; goto done; } - op = (unsigned int *) data; + op = (unsigned int *)data; if (*op < IP_SET_OP_VERSION) { /* Check the version at the beginning of operations */ @@ -2025,7 +2016,7 @@ ip_set_net_init(struct net *net) if (inst->ip_set_max >= IPSET_INVALID_ID) inst->ip_set_max = IPSET_INVALID_ID - 1; - list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL); + list = kcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL); if (!list) return -ENOMEM; inst->is_deleted = false; @@ -2061,11 +2052,11 @@ static struct pernet_operations ip_set_net_ops = { .size = sizeof(struct ip_set_net) }; - static int __init ip_set_init(void) { int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); + if (ret != 0) { pr_err("ip_set: cannot register with nfnetlink.\n"); return ret; diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index 1981f021cc60..42c3e3ba1b94 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -30,7 +30,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, const struct tcphdr *th; th = skb_header_pointer(skb, protooff, sizeof(_tcph), &_tcph); - if (th == NULL) + if (!th) /* No choice either */ return false; @@ -42,7 +42,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, const sctp_sctphdr_t *sh; sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh); - if (sh == NULL) + if (!sh) /* No choice either */ return false; @@ -55,7 +55,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, const struct udphdr *uh; uh = skb_header_pointer(skb, protooff, sizeof(_udph), &_udph); - if (uh == NULL) + if (!uh) /* No choice either */ return false; @@ -67,7 +67,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, const struct icmphdr *ic; ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich); - if (ic == NULL) + if (!ic) return false; *port = (__force __be16)htons((ic->type << 8) | ic->code); @@ -78,7 +78,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, const struct icmp6hdr *ic; ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich); - if (ic == NULL) + if (!ic) return false; *port = (__force __be16) @@ -116,7 +116,8 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src, return false; default: /* Other protocols doesn't have ports, - so we can match fragments */ + * so we can match fragments. + */ *proto = protocol; return true; } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index f352cc022010..afe905c208af 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -35,7 +35,7 @@ /* Number of elements to store in an initial array block */ #define AHASH_INIT_SIZE 4 /* Max number of elements to store in an array block */ -#define AHASH_MAX_SIZE (3*AHASH_INIT_SIZE) +#define AHASH_MAX_SIZE (3 * AHASH_INIT_SIZE) /* Max muber of elements in the array block when tuned */ #define AHASH_MAX_TUNED 64 @@ -57,6 +57,7 @@ tune_ahash_max(u8 curr, u32 multi) */ return n > curr && n <= AHASH_MAX_TUNED ? n : curr; } + #define TUNE_AHASH_MAX(h, multi) \ ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi)) #else @@ -256,7 +257,7 @@ htable_bits(u32 hashsize) #endif #define HKEY(data, initval, htable_bits) \ -(jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval) \ +(jhash2((u32 *)(data), HKEY_DATALEN / sizeof(u32), initval) \ & jhash_mask(htable_bits)) #ifndef htype @@ -299,11 +300,11 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) /* Add in increasing prefix order, so larger cidr first */ for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) { - if (j != -1) + if (j != -1) { continue; - else if (h->nets[i].cidr[n] < cidr) + } else if (h->nets[i].cidr[n] < cidr) { j = i; - else if (h->nets[i].cidr[n] == cidr) { + } else if (h->nets[i].cidr[n] == cidr) { h->nets[cidr - 1].nets[n]++; return; } @@ -322,15 +323,15 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) u8 i, j, net_end = nets_length - 1; for (i = 0; i < nets_length; i++) { - if (h->nets[i].cidr[n] != cidr) - continue; + if (h->nets[i].cidr[n] != cidr) + continue; h->nets[cidr - 1].nets[n]--; if (h->nets[cidr - 1].nets[n] > 0) - return; + return; for (j = i; j < net_end && h->nets[j].cidr[n]; j++) - h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; + h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; h->nets[j].cidr[n] = 0; - return; + return; } } #endif @@ -426,8 +427,8 @@ mtype_destroy(struct ip_set *set) if (SET_WITH_TIMEOUT(set)) del_timer_sync(&h->gc); - mtype_ahash_destroy(set, __ipset_dereference_protected(h->table, 1), - true); + mtype_ahash_destroy(set, + __ipset_dereference_protected(h->table, 1), true); kfree(h); set->data = NULL; @@ -439,7 +440,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) struct htype *h = set->data; init_timer(&h->gc); - h->gc.data = (unsigned long) set; + h->gc.data = (unsigned long)set; h->gc.function = gc; h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&h->gc); @@ -530,7 +531,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) static void mtype_gc(unsigned long ul_set) { - struct ip_set *set = (struct ip_set *) ul_set; + struct ip_set *set = (struct ip_set *)ul_set; struct htype *h = set->data; pr_debug("called\n"); @@ -544,7 +545,8 @@ mtype_gc(unsigned long ul_set) /* Resize a hash: create a new hash table with doubling the hashsize * and inserting the elements to it. Repeat until we succeed or - * fail due to memory pressures. */ + * fail due to memory pressures. + */ static int mtype_resize(struct ip_set *set, bool retried) { @@ -687,7 +689,8 @@ cleanup: } /* Add an element to a hash and update the internal counters when succeeded, - * otherwise report the proper error code. */ + * otherwise report the proper error code. + */ static int mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) @@ -926,7 +929,8 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, #ifdef IP_SET_HASH_WITH_NETS /* Special test function which takes into account the different network - * sizes added to the set */ + * sizes added to the set + */ static int mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, const struct ip_set_ext *ext, @@ -1004,7 +1008,8 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, t = rcu_dereference_bh(h->table); #ifdef IP_SET_HASH_WITH_NETS /* If we test an IP address and not a network address, - * try all possible network sizes */ + * try all possible network sizes + */ for (i = 0; i < IPSET_NET_COUNT; i++) if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family)) break; @@ -1148,8 +1153,8 @@ mtype_list(const struct ip_set *set, nla_nest_cancel(skb, atd); ret = -EMSGSIZE; goto out; - } else - goto nla_put_failure; + } + goto nla_put_failure; } if (mtype_data_list(skb, e)) goto nla_put_failure; @@ -1171,8 +1176,9 @@ nla_put_failure: set->name); cb->args[IPSET_CB_ARG0] = 0; ret = -EMSGSIZE; - } else + } else { ipset_nest_end(skb, atd); + } out: rcu_read_unlock(); return ret; @@ -1180,12 +1186,13 @@ out: static int IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt); + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt); static int IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); + enum ipset_adt adt, u32 *lineno, u32 flags, + bool retried); static const struct ip_set_type_variant mtype_variant = { .kadt = mtype_kadt, diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index f54d7069d633..9d6bf19f7b78 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -158,8 +158,8 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index f8fbc325ad34..a0695a2ab585 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -155,8 +155,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } @@ -206,7 +206,6 @@ hash_ipmark6_data_next(struct hash_ipmark4_elem *next, #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" - static int hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, @@ -268,10 +267,8 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[], ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; - return ret; + return 0; } static struct ip_set_type hash_ipmark_type __read_mostly = { diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 9a31db8ccca6..9d84b3dff603 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -140,8 +140,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; @@ -187,8 +188,8 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } } return ret; @@ -305,8 +306,9 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; @@ -329,8 +331,8 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index fc42489f8795..215b7b942038 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -63,7 +63,7 @@ hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1, static bool hash_ipportip4_data_list(struct sk_buff *skb, - const struct hash_ipportip4_elem *data) + const struct hash_ipportip4_elem *data) { if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) || @@ -147,8 +147,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; @@ -194,8 +195,8 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } } return ret; @@ -320,8 +321,9 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; @@ -344,8 +346,8 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 2a69b9bf66b8..9ca719625ea3 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -209,14 +209,16 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -263,8 +265,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip2_from, ip2_to); if (ip2_from + UINT_MAX == ip2_to) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1); + } if (retried) ip = ntohl(h->next.ip); @@ -287,8 +290,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; ip2 = ip2_last + 1; } } @@ -466,14 +469,16 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -497,8 +502,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c index 112aff3cda96..f1e7d2c0f685 100644 --- a/net/netfilter/ipset/ip_set_hash_mac.c +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -89,10 +89,10 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb, return 0; if (skb_mac_header(skb) < skb->head || - (skb_mac_header(skb) + ETH_HLEN) > skb->data) + (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; - memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); + ether_addr_copy(e.ether, eth_hdr(skb)->h_source); if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); @@ -116,7 +116,7 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; - memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); + ether_addr_copy(e.ether, nla_data(tb[IPSET_ATTR_ETHER])); if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0) return -IPSET_ERR_HASH_ELEM; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index e49b1d010d30..3e4bffdc1cc0 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -169,6 +169,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -176,7 +177,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { e.ip = htonl(ip & ip_set_hostmask(e.cidr)); ret = adtfn(set, &e, &ext, &ext, flags); - return ip_set_enomatch(ret, flags, adt, set) ? -ret: + return ip_set_enomatch(ret, flags, adt, set) ? -ret : ip_set_eexist(ret, flags) ? 0 : ret; } @@ -198,8 +199,8 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; ip = last + 1; } return ret; @@ -339,6 +340,7 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 42c893e08842..43d8c9896fa3 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -143,7 +143,7 @@ static const char *get_physindev_name(const struct sk_buff *skb) return dev ? dev->name : NULL; } -static const char *get_phyoutdev_name(const struct sk_buff *skb) +static const char *get_physoutdev_name(const struct sk_buff *skb) { struct net_device *dev = nf_bridge_get_physoutdev(skb); @@ -178,15 +178,16 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const char *eiface = SRCDIR ? get_physindev_name(skb) : - get_phyoutdev_name(skb); + get_physoutdev_name(skb); if (!eiface) return -EINVAL; STRLCPY(e.iface, eiface); e.physdev = 1; #endif - } else + } else { STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); + } if (strlen(e.iface) == 0) return -EINVAL; @@ -229,6 +230,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_PHYSDEV) e.physdev = 1; if (cadt_flags & IPSET_FLAG_NOMATCH) @@ -249,8 +251,9 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip, ip_to); if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip, ip_to, e.cidr); + } if (retried) ip = ntohl(h->next.ip); @@ -261,8 +264,8 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; ip = last + 1; } return ret; @@ -385,15 +388,16 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const char *eiface = SRCDIR ? get_physindev_name(skb) : - get_phyoutdev_name(skb); + get_physoutdev_name(skb); + if (!eiface) return -EINVAL; - STRLCPY(e.iface, eiface); e.physdev = 1; #endif - } else + } else { STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); + } if (strlen(e.iface) == 0) return -EINVAL; @@ -403,7 +407,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, static int hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 }; @@ -440,6 +444,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_PHYSDEV) e.physdev = 1; if (cadt_flags & IPSET_FLAG_NOMATCH) diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index b5428be1f159..3c862c0a76d1 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -57,8 +57,8 @@ struct hash_netnet4_elem { static inline bool hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1, - const struct hash_netnet4_elem *ip2, - u32 *multi) + const struct hash_netnet4_elem *ip2, + u32 *multi) { return ip1->ipcmp == ip2->ipcmp && ip1->ccmp == ip2->ccmp; @@ -84,7 +84,7 @@ hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags) static inline void hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem, - struct hash_netnet4_elem *orig) + struct hash_netnet4_elem *orig) { elem->ip[1] = orig->ip[1]; } @@ -103,7 +103,7 @@ hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner) static bool hash_netnet4_data_list(struct sk_buff *skb, - const struct hash_netnet4_elem *data) + const struct hash_netnet4_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; @@ -122,7 +122,7 @@ nla_put_failure: static inline void hash_netnet4_data_next(struct hash_netnet4_elem *next, - const struct hash_netnet4_elem *d) + const struct hash_netnet4_elem *d) { next->ipcmp = d->ipcmp; } @@ -133,8 +133,8 @@ hash_netnet4_data_next(struct hash_netnet4_elem *next, static int hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt) + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -156,7 +156,7 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, static int hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -199,6 +199,7 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -221,8 +222,9 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip, ip_to); if (unlikely(ip + UINT_MAX == ip_to)) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip, ip_to, e.cidr[0]); + } ip2_to = ip2_from; if (tb[IPSET_ATTR_IP2_TO]) { @@ -233,8 +235,9 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip2_from, ip2_to); if (unlikely(ip2_from + UINT_MAX == ip2_to)) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); + } if (retried) ip = ntohl(h->next.ip[0]); @@ -251,8 +254,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; ip2 = last2 + 1; } ip = last + 1; @@ -276,8 +279,8 @@ struct hash_netnet6_elem { static inline bool hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1, - const struct hash_netnet6_elem *ip2, - u32 *multi) + const struct hash_netnet6_elem *ip2, + u32 *multi) { return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) && ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) && @@ -304,7 +307,7 @@ hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags) static inline void hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem, - struct hash_netnet6_elem *orig) + struct hash_netnet6_elem *orig) { elem->ip[1] = orig->ip[1]; } @@ -323,7 +326,7 @@ hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner) static bool hash_netnet6_data_list(struct sk_buff *skb, - const struct hash_netnet6_elem *data) + const struct hash_netnet6_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; @@ -342,7 +345,7 @@ nla_put_failure: static inline void hash_netnet6_data_next(struct hash_netnet4_elem *next, - const struct hash_netnet6_elem *d) + const struct hash_netnet6_elem *d) { } @@ -356,8 +359,8 @@ hash_netnet6_data_next(struct hash_netnet4_elem *next, static int hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt) + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -367,7 +370,7 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) - e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK; + e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6); ip6addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1].in6); @@ -379,7 +382,7 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, static int hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, }; @@ -424,6 +427,7 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 27307d0a8a5d..731813e0f08c 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -198,8 +198,9 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; @@ -208,6 +209,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -233,8 +235,9 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip, ip_to); if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip, ip_to, e.cidr + 1); + } if (retried) ip = ntohl(h->next.ip); @@ -250,8 +253,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } ip = last + 1; } @@ -413,14 +416,16 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -444,8 +449,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 1e0e47ae40a4..0c68734f5cc4 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -62,8 +62,8 @@ struct hash_netportnet4_elem { static inline bool hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1, - const struct hash_netportnet4_elem *ip2, - u32 *multi) + const struct hash_netportnet4_elem *ip2, + u32 *multi) { return ip1->ipcmp == ip2->ipcmp && ip1->ccmp == ip2->ccmp && @@ -91,7 +91,7 @@ hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags) static inline void hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem, - struct hash_netportnet4_elem *orig) + struct hash_netportnet4_elem *orig) { elem->ip[1] = orig->ip[1]; } @@ -111,7 +111,7 @@ hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem, static bool hash_netportnet4_data_list(struct sk_buff *skb, - const struct hash_netportnet4_elem *data) + const struct hash_netportnet4_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; @@ -132,7 +132,7 @@ nla_put_failure: static inline void hash_netportnet4_data_next(struct hash_netportnet4_elem *next, - const struct hash_netportnet4_elem *d) + const struct hash_netportnet4_elem *d) { next->ipcmp = d->ipcmp; next->port = d->port; @@ -144,8 +144,8 @@ hash_netportnet4_data_next(struct hash_netportnet4_elem *next, static int hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt) + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -171,7 +171,7 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, static int hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -223,14 +223,16 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -254,8 +256,9 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip, ip_to); if (unlikely(ip + UINT_MAX == ip_to)) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip, ip_to, e.cidr[0]); + } port_to = port = ntohs(e.port); if (tb[IPSET_ATTR_PORT_TO]) { @@ -273,8 +276,9 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip2_from, ip2_to); if (unlikely(ip2_from + UINT_MAX == ip2_to)) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); + } if (retried) ip = ntohl(h->next.ip[0]); @@ -296,8 +300,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; ip2 = ip2_last + 1; } } @@ -324,8 +328,8 @@ struct hash_netportnet6_elem { static inline bool hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1, - const struct hash_netportnet6_elem *ip2, - u32 *multi) + const struct hash_netportnet6_elem *ip2, + u32 *multi) { return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) && ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) && @@ -354,7 +358,7 @@ hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags) static inline void hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem, - struct hash_netportnet6_elem *orig) + struct hash_netportnet6_elem *orig) { elem->ip[1] = orig->ip[1]; } @@ -374,7 +378,7 @@ hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem, static bool hash_netportnet6_data_list(struct sk_buff *skb, - const struct hash_netportnet6_elem *data) + const struct hash_netportnet6_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; @@ -395,7 +399,7 @@ nla_put_failure: static inline void hash_netportnet6_data_next(struct hash_netportnet4_elem *next, - const struct hash_netportnet6_elem *d) + const struct hash_netportnet6_elem *d) { next->port = d->port; } @@ -410,8 +414,8 @@ hash_netportnet6_data_next(struct hash_netportnet4_elem *next, static int hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt) + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -437,7 +441,7 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, static int hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -493,14 +497,16 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -524,8 +530,8 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 9f624ee9a41e..a1fe5377a2b3 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -206,14 +206,15 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, continue; } - if (d->before == 0) + if (d->before == 0) { ret = 1; - else if (d->before > 0) { + } else if (d->before > 0) { next = list_next_entry(e, list); ret = !list_is_last(&e->list, &map->members) && next->id == d->refid; - } else + } else { ret = prev && prev->id == d->refid; + } return ret; } return 0; @@ -558,7 +559,7 @@ static const struct ip_set_type_variant set_variant = { static void list_set_gc(unsigned long ul_set) { - struct ip_set *set = (struct ip_set *) ul_set; + struct ip_set *set = (struct ip_set *)ul_set; struct list_set *map = set->data; spin_lock_bh(&set->lock); @@ -575,7 +576,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) struct list_set *map = set->data; init_timer(&map->gc); - map->gc.data = (unsigned long) set; + map->gc.data = (unsigned long)set; map->gc.function = gc; map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c index 04d15fdc99ee..1c8a42c1056c 100644 --- a/net/netfilter/ipset/pfxlen.c +++ b/net/netfilter/ipset/pfxlen.c @@ -1,9 +1,7 @@ #include #include -/* - * Prefixlen maps for fast conversions, by Jan Engelhardt. - */ +/* Prefixlen maps for fast conversions, by Jan Engelhardt. */ #define E(a, b, c, d) \ {.ip6 = { \ @@ -11,8 +9,7 @@ htonl(c), htonl(d), \ } } -/* - * This table works for both IPv4 and IPv6; +/* This table works for both IPv4 and IPv6; * just use prefixlen_netmask_map[prefixlength].ip. */ const union nf_inet_addr ip_set_netmask_map[] = { @@ -149,13 +146,12 @@ const union nf_inet_addr ip_set_netmask_map[] = { EXPORT_SYMBOL_GPL(ip_set_netmask_map); #undef E -#define E(a, b, c, d) \ - {.ip6 = { (__force __be32) a, (__force __be32) b, \ - (__force __be32) c, (__force __be32) d, \ +#define E(a, b, c, d) \ + {.ip6 = { (__force __be32)a, (__force __be32)b, \ + (__force __be32)c, (__force __be32)d, \ } } -/* - * This table works for both IPv4 and IPv6; +/* This table works for both IPv4 and IPv6; * just use prefixlen_hostmask_map[prefixlength].ip. */ const union nf_inet_addr ip_set_hostmask_map[] = { diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index b103e9627716..5669e5b453f4 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -9,7 +9,8 @@ */ /* Kernel module which implements the set match and SET target - * for netfilter/iptables. */ + * for netfilter/iptables. + */ #include #include @@ -53,6 +54,7 @@ static bool set_match_v0(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v0 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.u.compat.dim, info->match_set.u.compat.flags, 0, UINT_MAX); @@ -69,10 +71,10 @@ compat_flags(struct xt_set_info_v0 *info) info->u.compat.dim = IPSET_DIM_ZERO; if (info->u.flags[0] & IPSET_MATCH_INV) info->u.compat.flags |= IPSET_INV_MATCH; - for (i = 0; i < IPSET_DIM_MAX-1 && info->u.flags[i]; i++) { + for (i = 0; i < IPSET_DIM_MAX - 1 && info->u.flags[i]; i++) { info->u.compat.dim++; if (info->u.flags[i] & IPSET_SRC) - info->u.compat.flags |= (1<u.compat.dim); + info->u.compat.flags |= (1 << info->u.compat.dim); } } @@ -89,7 +91,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) info->match_set.index); return -ENOENT; } - if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) { + if (info->match_set.u.flags[IPSET_DIM_MAX - 1] != 0) { pr_warn("Protocol error: set match dimension is over the limit!\n"); ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; @@ -115,6 +117,7 @@ static bool set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v1 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.dim, info->match_set.flags, 0, UINT_MAX); @@ -179,9 +182,10 @@ static bool set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v3 *info = par->matchinfo; + int ret; + ADT_OPT(opt, par->family, info->match_set.dim, info->match_set.flags, info->flags, UINT_MAX); - int ret; if (info->packets.op != IPSET_COUNTER_NONE || info->bytes.op != IPSET_COUNTER_NONE) @@ -225,9 +229,10 @@ static bool set_match_v4(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v4 *info = par->matchinfo; + int ret; + ADT_OPT(opt, par->family, info->match_set.dim, info->match_set.flags, info->flags, UINT_MAX); - int ret; if (info->packets.op != IPSET_COUNTER_NONE || info->bytes.op != IPSET_COUNTER_NONE) @@ -253,6 +258,7 @@ static unsigned int set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v0 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim, info->add_set.u.compat.flags, 0, UINT_MAX); ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim, @@ -291,8 +297,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) return -ENOENT; } } - if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 || - info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) { + if (info->add_set.u.flags[IPSET_DIM_MAX - 1] != 0 || + info->del_set.u.flags[IPSET_DIM_MAX - 1] != 0) { pr_warn("Protocol error: SET target dimension is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); @@ -325,6 +331,7 @@ static unsigned int set_target_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v1 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.dim, info->add_set.flags, 0, UINT_MAX); ADT_OPT(del_opt, par->family, info->del_set.dim, @@ -393,6 +400,7 @@ static unsigned int set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v2 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.dim, info->add_set.flags, info->flags, info->timeout); ADT_OPT(del_opt, par->family, info->del_set.dim, @@ -400,8 +408,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) /* Normalize to fit into jiffies */ if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && - add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC) - add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC; + add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC) + add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC; if (info->add_set.index != IPSET_INVALID_ID) ip_set_add(info->add_set.index, skb, par, &add_opt); if (info->del_set.index != IPSET_INVALID_ID) @@ -419,6 +427,8 @@ static unsigned int set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v3 *info = par->targinfo; + int ret; + ADT_OPT(add_opt, par->family, info->add_set.dim, info->add_set.flags, info->flags, info->timeout); ADT_OPT(del_opt, par->family, info->del_set.dim, @@ -426,12 +436,10 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) ADT_OPT(map_opt, par->family, info->map_set.dim, info->map_set.flags, 0, UINT_MAX); - int ret; - /* Normalize to fit into jiffies */ if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && - add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC) - add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC; + add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC) + add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC; if (info->add_set.index != IPSET_INVALID_ID) ip_set_add(info->add_set.index, skb, par, &add_opt); if (info->del_set.index != IPSET_INVALID_ID) @@ -457,7 +465,6 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } - static int set_target_v3_checkentry(const struct xt_tgchk_param *par) { @@ -497,8 +504,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) !(par->hook_mask & (1 << NF_INET_FORWARD | 1 << NF_INET_LOCAL_OUT | 1 << NF_INET_POST_ROUTING))) { - pr_warn("mapping of prio or/and queue is allowed only" - "from OUTPUT/FORWARD/POSTROUTING chains\n"); + pr_warn("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); return -EINVAL; } index = ip_set_nfnl_get_byindex(par->net, @@ -519,8 +525,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) if (info->add_set.dim > IPSET_DIM_MAX || info->del_set.dim > IPSET_DIM_MAX || info->map_set.dim > IPSET_DIM_MAX) { - pr_warn("Protocol error: SET target dimension " - "is over the limit!\n"); + pr_warn("Protocol error: SET target dimension is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) @@ -546,7 +551,6 @@ set_target_v3_destroy(const struct xt_tgdtor_param *par) ip_set_nfnl_put(par->net, info->map_set.index); } - static struct xt_match set_matches[] __read_mostly = { { .name = "set", -- cgit v1.2.3 From 2d45a02d0166caf2627fe91897c6ffc3b19514c4 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 12 Jun 2015 10:16:41 -0300 Subject: sctp: fix ASCONF list handling ->auto_asconf_splist is per namespace and mangled by functions like sctp_setsockopt_auto_asconf() which doesn't guarantee any serialization. Also, the call to inet_sk_copy_descendant() was backuping ->auto_asconf_list through the copy but was not honoring ->do_auto_asconf, which could lead to list corruption if it was different between both sockets. This commit thus fixes the list handling by using ->addr_wq_lock spinlock to protect the list. A special handling is done upon socket creation and destruction for that. Error handlig on sctp_init_sock() will never return an error after having initialized asconf, so sctp_destroy_sock() can be called without addrq_wq_lock. The lock now will be take on sctp_close_sock(), before locking the socket, so we don't do it in inverse order compared to sctp_addr_wq_timeout_handler(). Instead of taking the lock on sctp_sock_migrate() for copying and restoring the list values, it's preferred to avoid rewritting it by implementing sctp_copy_descendant(). Issue was found with a test application that kept flipping sysctl default_auto_asconf on and off, but one could trigger it by issuing simultaneous setsockopt() calls on multiple sockets or by creating/destroying sockets fast enough. This is only triggerable locally. Fixes: 9f7d653b67ae ("sctp: Add Auto-ASCONF support (core).") Reported-by: Ji Jianwen Suggested-by: Neil Horman Suggested-by: Hannes Frederic Sowa Acked-by: Hannes Frederic Sowa Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/netns/sctp.h | 1 + include/net/sctp/structs.h | 4 ++++ net/sctp/socket.c | 43 ++++++++++++++++++++++++++++++++----------- 3 files changed, 37 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 3573a81815ad..8ba379f9e467 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -31,6 +31,7 @@ struct netns_sctp { struct list_head addr_waitq; struct timer_list addr_wq_timer; struct list_head auto_asconf_splist; + /* Lock that protects both addr_waitq and auto_asconf_splist */ spinlock_t addr_wq_lock; /* Lock that protects the local_addr_list writers */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2bb2fcf5b11f..495c87e367b3 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -223,6 +223,10 @@ struct sctp_sock { atomic_t pd_mode; /* Receive to here while partial delivery is in effect. */ struct sk_buff_head pd_lobby; + + /* These must be the last fields, as they will skipped on copies, + * like on accept and peeloff operations + */ struct list_head auto_asconf_list; int do_auto_asconf; }; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f09de7fac2e6..5f6c4e61325b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1528,8 +1528,10 @@ static void sctp_close(struct sock *sk, long timeout) /* Supposedly, no process has access to the socket, but * the net layers still may. + * Also, sctp_destroy_sock() needs to be called with addr_wq_lock + * held and that should be grabbed before socket lock. */ - local_bh_disable(); + spin_lock_bh(&net->sctp.addr_wq_lock); bh_lock_sock(sk); /* Hold the sock, since sk_common_release() will put sock_put() @@ -1539,7 +1541,7 @@ static void sctp_close(struct sock *sk, long timeout) sk_common_release(sk); bh_unlock_sock(sk); - local_bh_enable(); + spin_unlock_bh(&net->sctp.addr_wq_lock); sock_put(sk); @@ -3580,6 +3582,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, if ((val && sp->do_auto_asconf) || (!val && !sp->do_auto_asconf)) return 0; + spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock); if (val == 0 && sp->do_auto_asconf) { list_del(&sp->auto_asconf_list); sp->do_auto_asconf = 0; @@ -3588,6 +3591,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, &sock_net(sk)->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; } + spin_unlock_bh(&sock_net(sk)->sctp.addr_wq_lock); return 0; } @@ -4121,18 +4125,28 @@ static int sctp_init_sock(struct sock *sk) local_bh_disable(); percpu_counter_inc(&sctp_sockets_allocated); sock_prot_inuse_add(net, sk->sk_prot, 1); + + /* Nothing can fail after this block, otherwise + * sctp_destroy_sock() will be called without addr_wq_lock held + */ if (net->sctp.default_auto_asconf) { + spin_lock(&sock_net(sk)->sctp.addr_wq_lock); list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; - } else + spin_unlock(&sock_net(sk)->sctp.addr_wq_lock); + } else { sp->do_auto_asconf = 0; + } + local_bh_enable(); return 0; } -/* Cleanup any SCTP per socket resources. */ +/* Cleanup any SCTP per socket resources. Must be called with + * sock_net(sk)->sctp.addr_wq_lock held if sp->do_auto_asconf is true + */ static void sctp_destroy_sock(struct sock *sk) { struct sctp_sock *sp; @@ -7195,6 +7209,19 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newinet->mc_list = NULL; } +static inline void sctp_copy_descendant(struct sock *sk_to, + const struct sock *sk_from) +{ + int ancestor_size = sizeof(struct inet_sock) + + sizeof(struct sctp_sock) - + offsetof(struct sctp_sock, auto_asconf_list); + + if (sk_from->sk_family == PF_INET6) + ancestor_size += sizeof(struct ipv6_pinfo); + + __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size); +} + /* Populate the fields of the newsk from the oldsk and migrate the assoc * and its messages to the newsk. */ @@ -7209,7 +7236,6 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, struct sk_buff *skb, *tmp; struct sctp_ulpevent *event; struct sctp_bind_hashbucket *head; - struct list_head tmplist; /* Migrate socket buffer sizes and all the socket level options to the * new socket. @@ -7217,12 +7243,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsk->sk_sndbuf = oldsk->sk_sndbuf; newsk->sk_rcvbuf = oldsk->sk_rcvbuf; /* Brute force copy old sctp opt. */ - if (oldsp->do_auto_asconf) { - memcpy(&tmplist, &newsp->auto_asconf_list, sizeof(tmplist)); - inet_sk_copy_descendant(newsk, oldsk); - memcpy(&newsp->auto_asconf_list, &tmplist, sizeof(tmplist)); - } else - inet_sk_copy_descendant(newsk, oldsk); + sctp_copy_descendant(newsk, oldsk); /* Restore the ep value that was overwritten with the above structure * copy. -- cgit v1.2.3 From 711bdde6a884354ddae8da2fcb495b2a9364cc90 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Jun 2015 09:57:30 -0700 Subject: netfilter: x_tables: remove XT_TABLE_INFO_SZ and a dereference. After Florian patches, there is no need for XT_TABLE_INFO_SZ anymore : Only one copy of table is kept, instead of one copy per cpu. We also can avoid a dereference if we put table data right after xt_table_info. It reduces register pressure and helps compiler. Then, we attempt a kmalloc() if total size is under order-3 allocation, to reduce TLB pressure, as in many cases, rules fit in 32 KB. Signed-off-by: Eric Dumazet Cc: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 5 +---- net/ipv4/netfilter/arp_tables.c | 4 ++-- net/ipv4/netfilter/ip_tables.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 4 ++-- net/netfilter/x_tables.c | 32 ++++++++++++-------------------- 5 files changed, 19 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9969d79dcde1..95693c4cebdd 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -225,12 +225,9 @@ struct xt_table_info { unsigned int __percpu *stackptr; void ***jumpstack; - /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ - void *entries; + unsigned char entries[0] __aligned(8); }; -#define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \ - + nr_cpu_ids * sizeof(char *)) int xt_register_target(struct xt_target *target); void xt_unregister_target(struct xt_target *target); int xt_register_targets(struct xt_target *target, unsigned int n); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index d75c139393fc..95c9b6eece25 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -256,7 +256,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, const struct arphdr *arp; struct arpt_entry *e, *back; const char *indev, *outdev; - void *table_base; + const void *table_base; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; @@ -868,7 +868,7 @@ static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct arpt_entry *iter; - void *loc_cpu_entry; + const void *loc_cpu_entry; int ret; if (!newinfo || !info) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 6151500c2df8..6c72fbb7b49e 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -938,7 +938,7 @@ copy_entries_to_user(unsigned int total_size, struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; - void *loc_cpu_entry; + const void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) @@ -1052,7 +1052,7 @@ static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct ipt_entry *iter; - void *loc_cpu_entry; + const void *loc_cpu_entry; int ret; if (!newinfo || !info) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 80a7f0d38aa4..3c35ced39b42 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -951,7 +951,7 @@ copy_entries_to_user(unsigned int total_size, struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; - void *loc_cpu_entry; + const void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) @@ -1065,7 +1065,7 @@ static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct ip6t_entry *iter; - void *loc_cpu_entry; + const void *loc_cpu_entry; int ret; if (!newinfo || !info) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 6062ce3e862c..d324fe71260c 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -658,29 +658,23 @@ EXPORT_SYMBOL_GPL(xt_compat_target_to_user); struct xt_table_info *xt_alloc_table_info(unsigned int size) { - struct xt_table_info *newinfo; + struct xt_table_info *info = NULL; + size_t sz = sizeof(*info) + size; /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) return NULL; - newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL); - if (!newinfo) - return NULL; - - newinfo->size = size; - - if (size <= PAGE_SIZE) - newinfo->entries = kmalloc(size, GFP_KERNEL); - else - newinfo->entries = vmalloc(size); - - if (newinfo->entries == NULL) { - xt_free_table_info(newinfo); - return NULL; + if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + if (!info) { + info = vmalloc(sz); + if (!info) + return NULL; } - - return newinfo; + memset(info, 0, sizeof(*info)); + info->size = size; + return info; } EXPORT_SYMBOL(xt_alloc_table_info); @@ -688,8 +682,6 @@ void xt_free_table_info(struct xt_table_info *info) { int cpu; - kvfree(info->entries); - if (info->jumpstack != NULL) { for_each_possible_cpu(cpu) kvfree(info->jumpstack[cpu]); @@ -698,7 +690,7 @@ void xt_free_table_info(struct xt_table_info *info) free_percpu(info->stackptr); - kfree(info); + kvfree(info); } EXPORT_SYMBOL(xt_free_table_info); -- cgit v1.2.3 From 2cbce139fc57bc2625f88add055d0b94f00c3352 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 12 Jun 2015 13:55:41 +0200 Subject: netfilter: nf_tables: attach net_device to basechain The device is part of the hook configuration, so instead of a global configuration per table, set it to each of the basechain that we create. This patch reworks ebddf1a8d78a ("netfilter: nf_tables: allow to bind table to net_device"). Note that this adds a dev_name field in the nft_base_chain structure which is required the netdev notification subscription that follows up in a patch to handle gone net_devices. Suggested-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 +- include/uapi/linux/netfilter/nf_tables.h | 4 +- net/netfilter/nf_tables_api.c | 79 +++++++++++++++++--------------- 3 files changed, 46 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3d6f48ca40a7..09d6f8df60f0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -791,6 +791,7 @@ struct nft_stats { * @policy: default policy * @stats: per-cpu chain stats * @chain: the chain + * @dev_name: device name that this base chain is attached to (if any) */ struct nft_base_chain { struct nf_hook_ops ops[NFT_HOOK_OPS_MAX]; @@ -799,6 +800,7 @@ struct nft_base_chain { u8 policy; struct nft_stats __percpu *stats; struct nft_chain chain; + char dev_name[IFNAMSIZ]; }; static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain) @@ -819,7 +821,6 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) * @name: name of the table - * @dev: this table is bound to this device (if any) */ struct nft_table { struct list_head list; @@ -829,7 +830,6 @@ struct nft_table { u32 use; u16 flags; char name[NFT_TABLE_MAXNAMELEN]; - struct net_device *dev; }; enum nft_af_flags { diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 89a671e0f5e7..a99e6a997140 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -122,11 +122,13 @@ enum nft_list_attributes { * * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32) * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32) + * @NFTA_HOOK_DEV: netdevice name (NLA_STRING) */ enum nft_hook_attributes { NFTA_HOOK_UNSPEC, NFTA_HOOK_HOOKNUM, NFTA_HOOK_PRIORITY, + NFTA_HOOK_DEV, __NFTA_HOOK_MAX }; #define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1) @@ -146,14 +148,12 @@ enum nft_table_flags { * @NFTA_TABLE_NAME: name of the table (NLA_STRING) * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32) * @NFTA_TABLE_USE: number of chains in this table (NLA_U32) - * @NFTA_TABLE_DEV: net device name (NLA_STRING) */ enum nft_table_attributes { NFTA_TABLE_UNSPEC, NFTA_TABLE_NAME, NFTA_TABLE_FLAGS, NFTA_TABLE_USE, - NFTA_TABLE_DEV, __NFTA_TABLE_MAX }; #define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4528f122bcd2..900c81a2f89a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -399,8 +399,6 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, - [NFTA_TABLE_DEV] = { .type = NLA_STRING, - .len = IFNAMSIZ - 1 }, }; static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, @@ -425,10 +423,6 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use))) goto nla_put_failure; - if (table->dev && - nla_put_string(skb, NFTA_TABLE_DEV, table->dev->name)) - goto nla_put_failure; - nlmsg_end(skb, nlh); return 0; @@ -614,11 +608,6 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if (flags == ctx->table->flags) return 0; - if ((ctx->afi->flags & NFT_AF_NEEDS_DEV) && - ctx->nla[NFTA_TABLE_DEV] && - nla_strcmp(ctx->nla[NFTA_TABLE_DEV], ctx->table->dev->name)) - return -EOPNOTSUPP; - trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, sizeof(struct nft_trans_table)); if (trans == NULL) @@ -656,7 +645,6 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, struct nft_table *table; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - struct net_device *dev = NULL; u32 flags = 0; struct nft_ctx ctx; int err; @@ -691,20 +679,6 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, return -EINVAL; } - if (afi->flags & NFT_AF_NEEDS_DEV) { - char ifname[IFNAMSIZ]; - - if (!nla[NFTA_TABLE_DEV]) - return -EOPNOTSUPP; - - nla_strlcpy(ifname, nla[NFTA_TABLE_DEV], IFNAMSIZ); - dev = dev_get_by_name(net, ifname); - if (!dev) - return -ENOENT; - } else if (nla[NFTA_TABLE_DEV]) { - return -EOPNOTSUPP; - } - err = -EAFNOSUPPORT; if (!try_module_get(afi->owner)) goto err1; @@ -718,7 +692,6 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); table->flags = flags; - table->dev = dev; nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); @@ -732,9 +705,6 @@ err3: err2: module_put(afi->owner); err1: - if (dev != NULL) - dev_put(dev); - return err; } @@ -838,9 +808,6 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx) { BUG_ON(ctx->table->use > 0); - if (ctx->table->dev) - dev_put(ctx->table->dev); - kfree(ctx->table); module_put(ctx->afi->owner); } @@ -916,6 +883,8 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { [NFTA_HOOK_HOOKNUM] = { .type = NLA_U32 }, [NFTA_HOOK_PRIORITY] = { .type = NLA_U32 }, + [NFTA_HOOK_DEV] = { .type = NLA_STRING, + .len = IFNAMSIZ - 1 }, }; static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) @@ -989,6 +958,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority))) goto nla_put_failure; + if (basechain->dev_name[0] && + nla_put_string(skb, NFTA_HOOK_DEV, basechain->dev_name)) + goto nla_put_failure; nla_nest_end(skb, nest); if (nla_put_be32(skb, NFTA_CHAIN_POLICY, @@ -1200,9 +1172,13 @@ static void nf_tables_chain_destroy(struct nft_chain *chain) BUG_ON(chain->use > 0); if (chain->flags & NFT_BASE_CHAIN) { - module_put(nft_base_chain(chain)->type->owner); - free_percpu(nft_base_chain(chain)->stats); - kfree(nft_base_chain(chain)); + struct nft_base_chain *basechain = nft_base_chain(chain); + + module_put(basechain->type->owner); + free_percpu(basechain->stats); + if (basechain->ops[0].dev != NULL) + dev_put(basechain->ops[0].dev); + kfree(basechain); } else { kfree(chain); } @@ -1221,6 +1197,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, struct nlattr *ha[NFTA_HOOK_MAX + 1]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct net_device *dev = NULL; u8 policy = NF_ACCEPT; u64 handle = 0; unsigned int i; @@ -1360,17 +1337,43 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, return -ENOENT; hookfn = type->hooks[hooknum]; + if (afi->flags & NFT_AF_NEEDS_DEV) { + char ifname[IFNAMSIZ]; + + if (!ha[NFTA_HOOK_DEV]) { + module_put(type->owner); + return -EOPNOTSUPP; + } + + nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ); + dev = dev_get_by_name(net, ifname); + if (!dev) { + module_put(type->owner); + return -ENOENT; + } + } else if (ha[NFTA_HOOK_DEV]) { + module_put(type->owner); + return -EOPNOTSUPP; + } + basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); if (basechain == NULL) { module_put(type->owner); + if (dev != NULL) + dev_put(dev); return -ENOMEM; } + if (dev != NULL) + strncpy(basechain->dev_name, dev->name, IFNAMSIZ); + if (nla[NFTA_CHAIN_COUNTERS]) { stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); if (IS_ERR(stats)) { module_put(type->owner); kfree(basechain); + if (dev != NULL) + dev_put(dev); return PTR_ERR(stats); } basechain->stats = stats; @@ -1379,6 +1382,8 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (stats == NULL) { module_put(type->owner); kfree(basechain); + if (dev != NULL) + dev_put(dev); return -ENOMEM; } rcu_assign_pointer(basechain->stats, stats); @@ -1396,7 +1401,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, ops->priority = priority; ops->priv = chain; ops->hook = afi->hooks[ops->hooknum]; - ops->dev = table->dev; + ops->dev = dev; if (hookfn) ops->hook = hookfn; if (afi->hook_ops_init) -- cgit v1.2.3 From 835b803377f5f11f9ccf234f70ed667a82605c45 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 15 Jun 2015 12:12:01 +0200 Subject: netfilter: nf_tables_netdev: unregister hooks on net_device removal In case the net_device is gone, we have to unregister the hooks and put back the reference on the net_device object. Once it comes back, register them again. This also covers the device rename case. This patch also adds a new flag to indicate that the basechain is disabled, so their hooks are not registered. This flag is used by the netdev family to handle the case where the net_device object is gone. Currently this flag is not exposed to userspace. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 7 ++++ net/netfilter/nf_tables_api.c | 16 ++++++--- net/netfilter/nf_tables_netdev.c | 75 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 09d6f8df60f0..2a246680a6c3 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -781,6 +781,7 @@ struct nft_stats { }; #define NFT_HOOK_OPS_MAX 2 +#define NFT_BASECHAIN_DISABLED (1 << 0) /** * struct nft_base_chain - nf_tables base chain @@ -798,6 +799,7 @@ struct nft_base_chain { possible_net_t pnet; const struct nf_chain_type *type; u8 policy; + u8 flags; struct nft_stats __percpu *stats; struct nft_chain chain; char dev_name[IFNAMSIZ]; @@ -808,6 +810,11 @@ static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chai return container_of(chain, struct nft_base_chain, chain); } +int nft_register_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops); +void nft_unregister_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops); + unsigned int nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c66dc62afb46..cfe636808541 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -127,17 +127,25 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } -static int nft_register_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops) +int nft_register_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops) { + if (basechain->flags & NFT_BASECHAIN_DISABLED) + return 0; + return nf_register_hooks(basechain->ops, hook_nops); } +EXPORT_SYMBOL_GPL(nft_register_basechain); -static void nft_unregister_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops) +void nft_unregister_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops) { + if (basechain->flags & NFT_BASECHAIN_DISABLED) + return; + nf_unregister_hooks(basechain->ops, hook_nops); } +EXPORT_SYMBOL_GPL(nft_unregister_basechain); static int nf_tables_register_hooks(const struct nft_table *table, struct nft_chain *chain, diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 04cb17057f46..2cae4d4a03b7 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -157,6 +158,77 @@ static const struct nf_chain_type nft_filter_chain_netdev = { .hook_mask = (1 << NF_NETDEV_INGRESS), }; +static void nft_netdev_event(unsigned long event, struct nft_af_info *afi, + struct net_device *dev, struct nft_table *table, + struct nft_base_chain *basechain) +{ + switch (event) { + case NETDEV_REGISTER: + if (strcmp(basechain->dev_name, dev->name) != 0) + return; + + BUG_ON(!(basechain->flags & NFT_BASECHAIN_DISABLED)); + + dev_hold(dev); + basechain->ops[0].dev = dev; + basechain->flags &= ~NFT_BASECHAIN_DISABLED; + if (!(table->flags & NFT_TABLE_F_DORMANT)) + nft_register_basechain(basechain, afi->nops); + break; + case NETDEV_UNREGISTER: + if (strcmp(basechain->dev_name, dev->name) != 0) + return; + + BUG_ON(basechain->flags & NFT_BASECHAIN_DISABLED); + + if (!(table->flags & NFT_TABLE_F_DORMANT)) + nft_unregister_basechain(basechain, afi->nops); + + dev_put(basechain->ops[0].dev); + basechain->ops[0].dev = NULL; + basechain->flags |= NFT_BASECHAIN_DISABLED; + break; + case NETDEV_CHANGENAME: + if (dev->ifindex != basechain->ops[0].dev->ifindex) + return; + + strncpy(basechain->dev_name, dev->name, IFNAMSIZ); + break; + } +} + +static int nf_tables_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) { + if (afi->family != NFPROTO_NETDEV) + continue; + + list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry(chain, &table->chains, list) { + if (!(chain->flags & NFT_BASE_CHAIN)) + continue; + + nft_netdev_event(event, afi, dev, table, + nft_base_chain(chain)); + } + } + } + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + + return NOTIFY_DONE; +} + +static struct notifier_block nf_tables_netdev_notifier = { + .notifier_call = nf_tables_netdev_event, +}; + static int __init nf_tables_netdev_init(void) { int ret; @@ -166,11 +238,14 @@ static int __init nf_tables_netdev_init(void) if (ret < 0) nft_unregister_chain_type(&nft_filter_chain_netdev); + register_netdevice_notifier(&nf_tables_netdev_notifier); + return ret; } static void __exit nf_tables_netdev_exit(void) { + unregister_netdevice_notifier(&nf_tables_netdev_notifier); unregister_pernet_subsys(&nf_tables_netdev_net_ops); nft_unregister_chain_type(&nft_filter_chain_netdev); } -- cgit v1.2.3 From ffeedafbf0236f03aeb2e8db273b3e5ae5f5bc89 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 12 Jun 2015 19:39:12 -0700 Subject: bpf: introduce current->pid, tgid, uid, gid, comm accessors eBPF programs attached to kprobes need to filter based on current->pid, uid and other fields, so introduce helper functions: u64 bpf_get_current_pid_tgid(void) Return: current->tgid << 32 | current->pid u64 bpf_get_current_uid_gid(void) Return: current_gid << 32 | current_uid bpf_get_current_comm(char *buf, int size_of_buf) stores current->comm into buf They can be used from the programs attached to TC as well to classify packets based on current task fields. Update tracex2 example to print histogram of write syscalls for each process instead of aggregated for all. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 +++ include/uapi/linux/bpf.h | 19 +++++++++++++ kernel/bpf/core.c | 3 +++ kernel/bpf/helpers.c | 58 +++++++++++++++++++++++++++++++++++++++ kernel/trace/bpf_trace.c | 6 +++++ net/core/filter.c | 6 +++++ samples/bpf/bpf_helpers.h | 6 +++++ samples/bpf/tracex2_kern.c | 24 +++++++++++++---- samples/bpf/tracex2_user.c | 67 +++++++++++++++++++++++++++++++++++++++------- 9 files changed, 178 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2235aee8096a..1b9a3f5b27f6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -188,5 +188,8 @@ extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; extern const struct bpf_func_proto bpf_ktime_get_ns_proto; +extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; +extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; +extern const struct bpf_func_proto bpf_get_current_comm_proto; #endif /* _LINUX_BPF_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 602f05b7a275..29ef6f99e43d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -230,6 +230,25 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_clone_redirect, + + /** + * u64 bpf_get_current_pid_tgid(void) + * Return: current->tgid << 32 | current->pid + */ + BPF_FUNC_get_current_pid_tgid, + + /** + * u64 bpf_get_current_uid_gid(void) + * Return: current_gid << 32 | current_uid + */ + BPF_FUNC_get_current_uid_gid, + + /** + * bpf_get_current_comm(char *buf, int size_of_buf) + * stores current->comm into buf + * Return: 0 on success + */ + BPF_FUNC_get_current_comm, __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1e00aa3316dc..1fc45cc83076 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -730,6 +730,9 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; +const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; +const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; +const struct bpf_func_proto bpf_get_current_comm_proto __weak; /* Always built-in helper functions. */ const struct bpf_func_proto bpf_tail_call_proto = { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 7ad5d8842d5b..1447ec09421e 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return @@ -124,3 +126,59 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = { .gpl_only = true, .ret_type = RET_INTEGER, }; + +static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct task_struct *task = current; + + if (!task) + return -EINVAL; + + return (u64) task->tgid << 32 | task->pid; +} + +const struct bpf_func_proto bpf_get_current_pid_tgid_proto = { + .func = bpf_get_current_pid_tgid, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct task_struct *task = current; + kuid_t uid; + kgid_t gid; + + if (!task) + return -EINVAL; + + current_uid_gid(&uid, &gid); + return (u64) from_kgid(&init_user_ns, gid) << 32 | + from_kuid(&init_user_ns, uid); +} + +const struct bpf_func_proto bpf_get_current_uid_gid_proto = { + .func = bpf_get_current_uid_gid, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5) +{ + struct task_struct *task = current; + char *buf = (char *) (long) r1; + + if (!task) + return -EINVAL; + + memcpy(buf, task->comm, min_t(size_t, size, sizeof(task->comm))); + return 0; +} + +const struct bpf_func_proto bpf_get_current_comm_proto = { + .func = bpf_get_current_comm, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_CONST_STACK_SIZE, +}; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 50c4015a8ad3..3a17638cdf46 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -162,6 +162,12 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_ktime_get_ns_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; + case BPF_FUNC_get_current_pid_tgid: + return &bpf_get_current_pid_tgid_proto; + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_get_current_comm: + return &bpf_get_current_comm_proto; case BPF_FUNC_trace_printk: /* diff --git a/net/core/filter.c b/net/core/filter.c index d271c06bf01f..20aa51ccbf9d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1459,6 +1459,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_l4_csum_replace_proto; case BPF_FUNC_clone_redirect: return &bpf_clone_redirect_proto; + case BPF_FUNC_get_current_pid_tgid: + return &bpf_get_current_pid_tgid_proto; + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_get_current_comm: + return &bpf_get_current_comm_proto; default: return sk_filter_func_proto(func_id); } diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index f531a0b3282d..bdf1c1607b80 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -25,6 +25,12 @@ static void (*bpf_tail_call)(void *ctx, void *map, int index) = (void *) BPF_FUNC_tail_call; static unsigned long long (*bpf_get_smp_processor_id)(void) = (void *) BPF_FUNC_get_smp_processor_id; +static unsigned long long (*bpf_get_current_pid_tgid)(void) = + (void *) BPF_FUNC_get_current_pid_tgid; +static unsigned long long (*bpf_get_current_uid_gid)(void) = + (void *) BPF_FUNC_get_current_uid_gid; +static int (*bpf_get_current_comm)(void *buf, int buf_size) = + (void *) BPF_FUNC_get_current_comm; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c index 19ec1cfc45db..dc50f4f2943f 100644 --- a/samples/bpf/tracex2_kern.c +++ b/samples/bpf/tracex2_kern.c @@ -62,11 +62,18 @@ static unsigned int log2l(unsigned long v) return log2(v); } +struct hist_key { + char comm[16]; + u64 pid_tgid; + u64 uid_gid; + u32 index; +}; + struct bpf_map_def SEC("maps") my_hist_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct hist_key), .value_size = sizeof(long), - .max_entries = 64, + .max_entries = 1024, }; SEC("kprobe/sys_write") @@ -75,11 +82,18 @@ int bpf_prog3(struct pt_regs *ctx) long write_size = ctx->dx; /* arg3 */ long init_val = 1; long *value; - u32 index = log2l(write_size); + struct hist_key key = {}; + + key.index = log2l(write_size); + key.pid_tgid = bpf_get_current_pid_tgid(); + key.uid_gid = bpf_get_current_uid_gid(); + bpf_get_current_comm(&key.comm, sizeof(key.comm)); - value = bpf_map_lookup_elem(&my_hist_map, &index); + value = bpf_map_lookup_elem(&my_hist_map, &key); if (value) __sync_fetch_and_add(value, 1); + else + bpf_map_update_elem(&my_hist_map, &key, &init_val, BPF_ANY); return 0; } char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index 91b8d0896fbb..cd0241c1447a 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "libbpf.h" #include "bpf_load.h" @@ -20,23 +21,42 @@ static void stars(char *str, long val, long max, int width) str[i] = '\0'; } -static void print_hist(int fd) +struct task { + char comm[16]; + __u64 pid_tgid; + __u64 uid_gid; +}; + +struct hist_key { + struct task t; + __u32 index; +}; + +#define SIZE sizeof(struct task) + +static void print_hist_for_pid(int fd, void *task) { - int key; + struct hist_key key = {}, next_key; + char starstr[MAX_STARS]; long value; long data[MAX_INDEX] = {}; - char starstr[MAX_STARS]; - int i; int max_ind = -1; long max_value = 0; + int i, ind; - for (key = 0; key < MAX_INDEX; key++) { - bpf_lookup_elem(fd, &key, &value); - data[key] = value; - if (value && key > max_ind) - max_ind = key; + while (bpf_get_next_key(fd, &key, &next_key) == 0) { + if (memcmp(&next_key, task, SIZE)) { + key = next_key; + continue; + } + bpf_lookup_elem(fd, &next_key, &value); + ind = next_key.index; + data[ind] = value; + if (value && ind > max_ind) + max_ind = ind; if (value > max_value) max_value = value; + key = next_key; } printf(" syscall write() stats\n"); @@ -48,6 +68,35 @@ static void print_hist(int fd) MAX_STARS, starstr); } } + +static void print_hist(int fd) +{ + struct hist_key key = {}, next_key; + static struct task tasks[1024]; + int task_cnt = 0; + int i; + + while (bpf_get_next_key(fd, &key, &next_key) == 0) { + int found = 0; + + for (i = 0; i < task_cnt; i++) + if (memcmp(&tasks[i], &next_key, SIZE) == 0) + found = 1; + if (!found) + memcpy(&tasks[task_cnt++], &next_key, SIZE); + key = next_key; + } + + for (i = 0; i < task_cnt; i++) { + printf("\npid %d cmd %s uid %d\n", + (__u32) tasks[i].pid_tgid, + tasks[i].comm, + (__u32) tasks[i].uid_gid); + print_hist_for_pid(fd, &tasks[i]); + } + +} + static void int_exit(int sig) { print_hist(map_fd[1]); -- cgit v1.2.3 From 0756ea3e85139d23a8148ebaa95411c2f0aa4f11 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 12 Jun 2015 19:39:13 -0700 Subject: bpf: allow networking programs to use bpf_trace_printk() for debugging bpf_trace_printk() is a helper function used to debug eBPF programs. Let socket and TC programs use it as well. Note, it's DEBUG ONLY helper. If it's used in the program, the kernel will print warning banner to make sure users don't use it in production. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + kernel/bpf/core.c | 4 ++++ kernel/trace/bpf_trace.c | 20 ++++++++++++-------- net/core/filter.c | 2 ++ 4 files changed, 19 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1b9a3f5b27f6..4383476a0d48 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -150,6 +150,7 @@ struct bpf_array { u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); void bpf_prog_array_map_clear(struct bpf_map *map); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); +const struct bpf_func_proto *bpf_get_trace_printk_proto(void); #ifdef CONFIG_BPF_SYSCALL void bpf_register_prog_type(struct bpf_prog_type_list *tl); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1fc45cc83076..c5bedc82bc1c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -733,6 +733,10 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; const struct bpf_func_proto bpf_get_current_comm_proto __weak; +const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) +{ + return NULL; +} /* Always built-in helper functions. */ const struct bpf_func_proto bpf_tail_call_proto = { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 3a17638cdf46..4f9b5d41869b 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -147,6 +147,17 @@ static const struct bpf_func_proto bpf_trace_printk_proto = { .arg2_type = ARG_CONST_STACK_SIZE, }; +const struct bpf_func_proto *bpf_get_trace_printk_proto(void) +{ + /* + * this program might be calling bpf_trace_printk, + * so allocate per-cpu printk buffers + */ + trace_printk_init_buffers(); + + return &bpf_trace_printk_proto; +} + static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -168,15 +179,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_get_current_uid_gid_proto; case BPF_FUNC_get_current_comm: return &bpf_get_current_comm_proto; - case BPF_FUNC_trace_printk: - /* - * this program might be calling bpf_trace_printk, - * so allocate per-cpu printk buffers - */ - trace_printk_init_buffers(); - - return &bpf_trace_printk_proto; + return bpf_get_trace_printk_proto(); default: return NULL; } diff --git a/net/core/filter.c b/net/core/filter.c index 20aa51ccbf9d..65ff107d3d29 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1442,6 +1442,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; + case BPF_FUNC_trace_printk: + return bpf_get_trace_printk_proto(); default: return NULL; } -- cgit v1.2.3 From 9464ca650008615d28d9aaf7de99b4edf61f0109 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Jun 2015 19:44:48 -0700 Subject: net: make u64_stats_init() a function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using a function instead of a macro is cleaner and remove following W=1 warnings (extract) In file included from net/ipv6/ip6_vti.c:29:0: net/ipv6/ip6_vti.c: In function ‘vti6_dev_init_gen’: include/linux/netdevice.h:2029:18: warning: variable ‘stat’ set but not used [-Wunused-but-set-variable] typeof(type) *stat; \ ^ net/ipv6/ip6_vti.c:862:16: note: in expansion of macro ‘netdev_alloc_pcpu_stats’ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); ^ CC [M] net/ipv6/sit.o In file included from net/ipv6/sit.c:30:0: net/ipv6/sit.c: In function ‘ipip6_tunnel_init’: include/linux/netdevice.h:2029:18: warning: variable ‘stat’ set but not used [-Wunused-but-set-variable] typeof(type) *stat; \ ^ Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/u64_stats_sync.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 4b4439e75f45..df89c9bcba7d 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -68,11 +68,12 @@ struct u64_stats_sync { }; +static inline void u64_stats_init(struct u64_stats_sync *syncp) +{ #if BITS_PER_LONG == 32 && defined(CONFIG_SMP) -# define u64_stats_init(syncp) seqcount_init(syncp.seq) -#else -# define u64_stats_init(syncp) do { } while (0) + seqcount_init(&syncp->seq); #endif +} static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { -- cgit v1.2.3 From 254cb6dbfd8894743fbf814ec856ccd0874af691 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sun, 14 Jun 2015 16:36:34 +0300 Subject: bonding: export slave's actor_oper_port_state via sysfs and netlink Export the actor_oper_port_state of each port via sysfs and netlink. In 802.3ad mode it is valuable for the user to be able to check the actor_oper state, it is already exported via bond's proc entry. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Andy Gospodarek Signed-off-by: David S. Miller --- drivers/net/bonding/bond_netlink.c | 10 +++++++++- drivers/net/bonding/bond_sysfs_slave.c | 16 ++++++++++++++++ include/uapi/linux/if_link.h | 1 + 3 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index f7015eb4f8db..a0e600db4236 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -28,6 +28,7 @@ static size_t bond_get_slave_size(const struct net_device *bond_dev, nla_total_size(MAX_ADDR_LEN) + /* IFLA_BOND_SLAVE_PERM_HWADDR */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_QUEUE_ID */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_AGGREGATOR_ID */ + nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */ 0; } @@ -56,12 +57,19 @@ static int bond_fill_slave_info(struct sk_buff *skb, if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { const struct aggregator *agg; + const struct port *ad_port; + ad_port = &SLAVE_AD_INFO(slave)->port; agg = SLAVE_AD_INFO(slave)->port.aggregator; - if (agg) + if (agg) { if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, agg->aggregator_identifier)) goto nla_put_failure; + if (nla_put_u8(skb, + IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, + ad_port->actor_oper_port_state)) + goto nla_put_failure; + } } return 0; diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c index 23618a831612..f6c197cee669 100644 --- a/drivers/net/bonding/bond_sysfs_slave.c +++ b/drivers/net/bonding/bond_sysfs_slave.c @@ -80,6 +80,21 @@ static ssize_t ad_aggregator_id_show(struct slave *slave, char *buf) } static SLAVE_ATTR_RO(ad_aggregator_id); +static ssize_t ad_actor_oper_port_state_show(struct slave *slave, char *buf) +{ + const struct port *ad_port; + + if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { + ad_port = &SLAVE_AD_INFO(slave)->port; + if (ad_port->aggregator) + return sprintf(buf, "%u\n", + ad_port->actor_oper_port_state); + } + + return sprintf(buf, "N/A\n"); +} +static SLAVE_ATTR_RO(ad_actor_oper_port_state); + static const struct slave_attribute *slave_attrs[] = { &slave_attr_state, &slave_attr_mii_status, @@ -87,6 +102,7 @@ static const struct slave_attribute *slave_attrs[] = { &slave_attr_perm_hwaddr, &slave_attr_queue_id, &slave_attr_ad_aggregator_id, + &slave_attr_ad_actor_oper_port_state, NULL }; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 1737b7a8272b..1b3e357223f2 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -456,6 +456,7 @@ enum { IFLA_BOND_SLAVE_PERM_HWADDR, IFLA_BOND_SLAVE_QUEUE_ID, IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, + IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, __IFLA_BOND_SLAVE_MAX, }; -- cgit v1.2.3 From 46ea297ed67cdeeb0142244873458b911037d0ba Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sun, 14 Jun 2015 16:36:35 +0300 Subject: bonding: export slave's partner_oper_port_state via sysfs and netlink Export the partner_oper_port_state of each port via sysfs and netlink. In 802.3ad mode it is valuable for the user to be able to check the partner_oper state, it is already exported via bond's proc entry. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Andy Gospodarek Signed-off-by: David S. Miller --- drivers/net/bonding/bond_netlink.c | 5 +++++ drivers/net/bonding/bond_sysfs_slave.c | 16 ++++++++++++++++ include/uapi/linux/if_link.h | 1 + 3 files changed, 22 insertions(+) (limited to 'include') diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index a0e600db4236..5580fcde738f 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -29,6 +29,7 @@ static size_t bond_get_slave_size(const struct net_device *bond_dev, nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_QUEUE_ID */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_AGGREGATOR_ID */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */ + nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */ 0; } @@ -69,6 +70,10 @@ static int bond_fill_slave_info(struct sk_buff *skb, IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, ad_port->actor_oper_port_state)) goto nla_put_failure; + if (nla_put_u16(skb, + IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, + ad_port->partner_oper.port_state)) + goto nla_put_failure; } } diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c index f6c197cee669..7d16c51e6913 100644 --- a/drivers/net/bonding/bond_sysfs_slave.c +++ b/drivers/net/bonding/bond_sysfs_slave.c @@ -95,6 +95,21 @@ static ssize_t ad_actor_oper_port_state_show(struct slave *slave, char *buf) } static SLAVE_ATTR_RO(ad_actor_oper_port_state); +static ssize_t ad_partner_oper_port_state_show(struct slave *slave, char *buf) +{ + const struct port *ad_port; + + if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { + ad_port = &SLAVE_AD_INFO(slave)->port; + if (ad_port->aggregator) + return sprintf(buf, "%u\n", + ad_port->partner_oper.port_state); + } + + return sprintf(buf, "N/A\n"); +} +static SLAVE_ATTR_RO(ad_partner_oper_port_state); + static const struct slave_attribute *slave_attrs[] = { &slave_attr_state, &slave_attr_mii_status, @@ -103,6 +118,7 @@ static const struct slave_attribute *slave_attrs[] = { &slave_attr_queue_id, &slave_attr_ad_aggregator_id, &slave_attr_ad_actor_oper_port_state, + &slave_attr_ad_partner_oper_port_state, NULL }; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 1b3e357223f2..510efb360580 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -457,6 +457,7 @@ enum { IFLA_BOND_SLAVE_QUEUE_ID, IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, + IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, __IFLA_BOND_SLAVE_MAX, }; -- cgit v1.2.3 From 47d8417f5914012c794684f651213ffae1b91619 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 15 Jun 2015 17:58:58 +0300 Subject: net/mlx4_core: Add sink counter Reserve the last valid counter index for "sink" counter, when a new counter cannot be allocated, the driver will use this counter. In order to avoid allocating this counter on any other flow, fix the indices bitmap allocation range, and reserve the sink counter index. Add macro for the sink counter index and replace all appearences of the index with the macro. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/qp.c | 3 ++- drivers/net/ethernet/mellanox/mlx4/en_resources.c | 2 +- drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++---- include/linux/mlx4/device.h | 1 + 4 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 02fc91c68027..c7d916fab68a 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1544,7 +1544,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, dev->counters[qp->port - 1]; optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX; } else - context->pri_path.counter_index = 0xff; + context->pri_path.counter_index = + MLX4_SINK_COUNTER_INDEX(dev->dev); if (qp->flags & MLX4_IB_QP_NETIF) { mlx4_ib_steer_qp_reg(dev, qp, 1); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index 34f2fdf4fe5d..97bcb9135f8d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -66,7 +66,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, context->pri_path.sched_queue |= user_prio << 3; context->pri_path.feup = MLX4_FEUP_FORCE_ETH_UP; } - context->pri_path.counter_index = 0xff; + context->pri_path.counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); context->cqn_send = cpu_to_be32(cqn); context->cqn_recv = cpu_to_be32(cqn); context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 1072dc1054dd..2bf54687f9fb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -479,7 +479,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) } } - dev->caps.max_counters = 1 << ilog2(dev_cap->max_counters); + dev->caps.max_counters = dev_cap->max_counters; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = @@ -2193,13 +2193,16 @@ err_free_icm: static int mlx4_init_counters_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - int nent; + int nent_pow2; if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) return -ENOENT; - nent = dev->caps.max_counters; - return mlx4_bitmap_init(&priv->counters_bitmap, nent, nent - 1, 0, 0); + nent_pow2 = roundup_pow_of_two(dev->caps.max_counters); + /* reserve last counter index for sink counter */ + return mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2, + nent_pow2 - 1, 0, + nent_pow2 - dev->caps.max_counters + 1); } static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ad31e476873f..312c50420dde 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -957,6 +957,7 @@ struct mlx4_mad_ifc { ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) #define MLX4_INVALID_SLAVE_ID 0xFF +#define MLX4_SINK_COUNTER_INDEX(dev) (dev->caps.max_counters - 1) void handle_port_mgmt_change_event(struct work_struct *work); -- cgit v1.2.3 From 6de5f7f6a1fa2288552d46b3effbb6d5571413e5 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 15 Jun 2015 17:59:02 +0300 Subject: net/mlx4_core: Allocate default counter per port Default counter per port will be allocated at the mlx4 core driver load. Every QP opened by the Ethernet driver will be attached to the port's default counter. This is an infrastructure step to collect VF statistics from the PF. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 5 ++ drivers/net/ethernet/mellanox/mlx4/en_resources.c | 2 +- drivers/net/ethernet/mellanox/mlx4/main.c | 71 ++++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 + drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + include/linux/mlx4/device.h | 1 + 6 files changed, 77 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 98efb5842fca..048fca0ca9a4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1597,6 +1597,9 @@ int mlx4_en_start_port(struct net_device *dev) } mdev->mac_removed[priv->port] = 0; + priv->counter_index = + mlx4_get_default_counter_index(mdev->dev, priv->port); + err = mlx4_en_config_rss_steer(priv); if (err) { en_err(priv, "Failed configuring rss steering\n"); @@ -1755,6 +1758,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) /* Set port as not active */ priv->port_up = false; + priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); /* Promsicuous mode */ if (mdev->dev->caps.steering_mode == @@ -2778,6 +2782,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv = netdev_priv(dev); memset(priv, 0, sizeof(struct mlx4_en_priv)); + priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); spin_lock_init(&priv->stats_lock); INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); INIT_WORK(&priv->watchdog_task, mlx4_en_restart); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index 97bcb9135f8d..e482fa1bb741 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -66,7 +66,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, context->pri_path.sched_queue |= user_prio << 3; context->pri_path.feup = MLX4_FEUP_FORCE_ETH_UP; } - context->pri_path.counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); + context->pri_path.counter_index = priv->counter_index; context->cqn_send = cpu_to_be32(cqn); context->cqn_recv = cpu_to_be32(cqn); context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 8b27bfcf809d..4e69cf52a579 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2219,6 +2219,47 @@ static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap); } +static void mlx4_cleanup_default_counters(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int port; + + for (port = 0; port < dev->caps.num_ports; port++) + if (priv->def_counter[port] != -1) + mlx4_counter_free(dev, priv->def_counter[port]); +} + +static int mlx4_allocate_default_counters(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int port, err = 0; + u32 idx; + + for (port = 0; port < dev->caps.num_ports; port++) + priv->def_counter[port] = -1; + + for (port = 0; port < dev->caps.num_ports; port++) { + err = mlx4_counter_alloc(dev, &idx); + + if (!err || err == -ENOSPC) { + priv->def_counter[port] = idx; + } else if (err == -ENOENT) { + err = 0; + continue; + } else { + mlx4_err(dev, "%s: failed to allocate default counter port %d err %d\n", + __func__, port + 1, err); + mlx4_cleanup_default_counters(dev); + return err; + } + + mlx4_dbg(dev, "%s: default counter index %d for port %d\n", + __func__, priv->def_counter[port], port + 1); + } + + return err; +} + int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -2227,8 +2268,10 @@ int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) return -ENOENT; *idx = mlx4_bitmap_alloc(&priv->counters_bitmap); - if (*idx == -1) - return -ENOMEM; + if (*idx == -1) { + *idx = MLX4_SINK_COUNTER_INDEX(dev); + return -ENOSPC; + } return 0; } @@ -2275,6 +2318,9 @@ void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx) if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) return; + if (idx == MLX4_SINK_COUNTER_INDEX(dev)) + return; + __mlx4_clear_if_stat(dev, idx); mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR); @@ -2296,6 +2342,14 @@ void mlx4_counter_free(struct mlx4_dev *dev, u32 idx) } EXPORT_SYMBOL_GPL(mlx4_counter_free); +int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + return priv->def_counter[port - 1]; +} +EXPORT_SYMBOL_GPL(mlx4_get_default_counter_index); + void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -2439,6 +2493,12 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) } } + err = mlx4_allocate_default_counters(dev); + if (err) { + mlx4_err(dev, "Failed to allocate default counters, aborting\n"); + goto err_counters_table_free; + } + if (!mlx4_is_slave(dev)) { for (port = 1; port <= dev->caps.num_ports; port++) { ib_port_default_caps = 0; @@ -2470,13 +2530,16 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) if (err) { mlx4_err(dev, "Failed to set port %d, aborting\n", port); - goto err_counters_table_free; + goto err_default_countes_free; } } } return 0; +err_default_countes_free: + mlx4_cleanup_default_counters(dev); + err_counters_table_free: if (!mlx4_is_slave(dev)) mlx4_cleanup_counters_table(dev); @@ -3212,6 +3275,7 @@ err_port: for (--port; port >= 1; --port) mlx4_cleanup_port_info(&priv->port[port]); + mlx4_cleanup_default_counters(dev); if (!mlx4_is_slave(dev)) mlx4_cleanup_counters_table(dev); mlx4_cleanup_qp_table(dev); @@ -3511,6 +3575,7 @@ static void mlx4_unload_one(struct pci_dev *pdev) mlx4_free_resource_tracker(dev, RES_TR_FREE_SLAVES_ONLY); + mlx4_cleanup_default_counters(dev); if (!mlx4_is_slave(dev)) mlx4_cleanup_counters_table(dev); mlx4_cleanup_qp_table(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 8d2d3594bf8f..e9e94bc0e75d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -876,6 +876,7 @@ struct mlx4_priv { struct mlx4_qp_table qp_table; struct mlx4_mcg_table mcg_table; struct mlx4_bitmap counters_bitmap; + int def_counter[MLX4_MAX_PORTS]; struct mlx4_catas_err catas_err; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index edd8fd69ec9a..f6c3e128cc0c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -582,6 +582,7 @@ struct mlx4_en_priv { int base_tx_qpn; struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE]; struct hwtstamp_config hwtstamp_config; + u32 counter_index; #ifdef CONFIG_MLX4_EN_DCB struct ieee_ets ets; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 312c50420dde..4820080ac394 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1348,6 +1348,7 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); +int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port); void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port); -- cgit v1.2.3 From 9616982f3fcc9e6577d7f41009c4ef2df19a71ec Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 15 Jun 2015 17:59:05 +0300 Subject: net/mlx4_core: Add helper to query counters This is an infrastructure step for querying VF and PF counters. This code was in the IB driver, move it to the mlx4 core driver so it will be accessible for more use cases. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/mad.c | 32 +++++------------- drivers/net/ethernet/mellanox/mlx4/cmd.c | 57 ++++++++++++++++++++++++++++++++ include/linux/mlx4/cmd.h | 3 ++ include/linux/mlx4/device.h | 8 +++++ 4 files changed, 76 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index bc698b14683f..bc09b4e1f57c 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -64,14 +64,6 @@ enum { #define GUID_TBL_BLK_NUM_ENTRIES 8 #define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES) -/* Counters should be saturate once they reach their maximum value */ -#define ASSIGN_32BIT_COUNTER(counter, value) do {\ - if ((value) > U32_MAX) \ - counter = cpu_to_be32(U32_MAX); \ - else \ - counter = cpu_to_be32(value); \ -} while (0) - struct mlx4_mad_rcv_buf { struct ib_grh grh; u8 payload[256]; @@ -828,31 +820,25 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad) { - struct mlx4_cmd_mailbox *mailbox; + struct mlx4_counter counter_stats; struct mlx4_ib_dev *dev = to_mdev(ibdev); int err; - u32 inmod = dev->counters[port_num - 1].index & 0xffff; - u8 mode; if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) return -EINVAL; - mailbox = mlx4_alloc_cmd_mailbox(dev->dev); - if (IS_ERR(mailbox)) - return IB_MAD_RESULT_FAILURE; - - err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0, - MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, - MLX4_CMD_WRAPPED); + memset(&counter_stats, 0, sizeof(counter_stats)); + err = mlx4_get_counter_stats(dev->dev, + dev->counters[port_num - 1].index, + &counter_stats, 0); if (err) err = IB_MAD_RESULT_FAILURE; else { memset(out_mad->data, 0, sizeof out_mad->data); - mode = ((struct mlx4_counter *)mailbox->buf)->counter_mode; - switch (mode & 0xf) { + switch (counter_stats.counter_mode & 0xf) { case 0: - edit_counter(mailbox->buf, - (void *)(out_mad->data + 40)); + edit_counter(&counter_stats, + (void *)(out_mad->data + 40)); err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; break; default: @@ -860,8 +846,6 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } } - mlx4_free_cmd_mailbox(dev->dev, mailbox); - return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 68ae765873a9..44b8f7715ade 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -49,6 +49,7 @@ #include "mlx4.h" #include "fw.h" #include "fw_qos.h" +#include "mlx4_stats.h" #define CMD_POLL_TOKEN 0xffff #define INBOX_MASK 0xffffffffffffff00ULL @@ -3166,6 +3167,62 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat } EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state); +int mlx4_get_counter_stats(struct mlx4_dev *dev, int counter_index, + struct mlx4_counter *counter_stats, int reset) +{ + struct mlx4_cmd_mailbox *mailbox = NULL; + struct mlx4_counter *tmp_counter; + int err; + u32 if_stat_in_mod; + + if (!counter_stats) + return -EINVAL; + + if (counter_index == MLX4_SINK_COUNTER_INDEX(dev)) + return 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memset(mailbox->buf, 0, sizeof(struct mlx4_counter)); + if_stat_in_mod = counter_index; + if (reset) + if_stat_in_mod |= MLX4_QUERY_IF_STAT_RESET; + err = mlx4_cmd_box(dev, 0, mailbox->dma, + if_stat_in_mod, 0, + MLX4_CMD_QUERY_IF_STAT, + MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (err) { + mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n", + __func__, counter_index); + goto if_stat_out; + } + tmp_counter = (struct mlx4_counter *)mailbox->buf; + counter_stats->counter_mode = tmp_counter->counter_mode; + if (counter_stats->counter_mode == 0) { + counter_stats->rx_frames = + cpu_to_be64(be64_to_cpu(counter_stats->rx_frames) + + be64_to_cpu(tmp_counter->rx_frames)); + counter_stats->tx_frames = + cpu_to_be64(be64_to_cpu(counter_stats->tx_frames) + + be64_to_cpu(tmp_counter->tx_frames)); + counter_stats->rx_bytes = + cpu_to_be64(be64_to_cpu(counter_stats->rx_bytes) + + be64_to_cpu(tmp_counter->rx_bytes)); + counter_stats->tx_bytes = + cpu_to_be64(be64_to_cpu(counter_stats->tx_bytes) + + be64_to_cpu(tmp_counter->tx_bytes)); + } + +if_stat_out: + mlx4_free_cmd_mailbox(dev, mailbox); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_get_counter_stats); + int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index f62e7cf227c6..5dffc869988b 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -35,6 +35,7 @@ #include #include +#include enum { /* initialization and general commands */ @@ -300,6 +301,8 @@ static inline int mlx4_cmd_imm(struct mlx4_dev *dev, u64 in_param, u64 *out_para struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev); void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox); +int mlx4_get_counter_stats(struct mlx4_dev *dev, int counter_index, + struct mlx4_counter *counter_stats, int reset); u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 4820080ac394..efe80c754b2f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -771,6 +771,14 @@ union mlx4_ext_av { struct mlx4_eth_av eth; }; +/* Counters should be saturate once they reach their maximum value */ +#define ASSIGN_32BIT_COUNTER(counter, value) do { \ + if ((value) > U32_MAX) \ + counter = cpu_to_be32(U32_MAX); \ + else \ + counter = cpu_to_be32(value); \ +} while (0) + struct mlx4_counter { u8 reserved1[3]; u8 counter_mode; -- cgit v1.2.3 From 3b766cd832328fcb87db3507e7b98cf42f21689d Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 15 Jun 2015 17:59:07 +0300 Subject: net/core: Add reading VF statistics through the PF netdevice Add ndo_get_vf_stats where the PF retrieves and fills the VFs traffic statistics. We encode the VF stats in a nested manner to allow for future extensions. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/if_link.h | 9 ++++++++ include/linux/netdevice.h | 4 ++++ include/uapi/linux/if_link.h | 13 +++++++++++ net/core/rtnetlink.c | 51 ++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 75 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index da4929927f69..ae5d0d22955d 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -5,6 +5,15 @@ /* We don't want this structure exposed to user space */ +struct ifla_vf_stats { + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 broadcast; + __u64 multicast; +}; + struct ifla_vf_info { __u32 vf; __u8 mac[32]; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6f5f71ff5169..e20979dfd6a9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1100,6 +1100,10 @@ struct net_device_ops { struct ifla_vf_info *ivf); int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state); + int (*ndo_get_vf_stats)(struct net_device *dev, + int vf, + struct ifla_vf_stats + *vf_stats); int (*ndo_set_vf_port)(struct net_device *dev, int vf, struct nlattr *port[]); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 510efb360580..2c7e8e3d3981 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -484,6 +484,7 @@ enum { IFLA_VF_RSS_QUERY_EN, /* RSS Redirection Table and Hash Key query * on/off switch */ + IFLA_VF_STATS, /* network device statistics */ __IFLA_VF_MAX, }; @@ -533,6 +534,18 @@ struct ifla_vf_rss_query_en { __u32 setting; }; +enum { + IFLA_VF_STATS_RX_PACKETS, + IFLA_VF_STATS_TX_PACKETS, + IFLA_VF_STATS_RX_BYTES, + IFLA_VF_STATS_TX_BYTES, + IFLA_VF_STATS_BROADCAST, + IFLA_VF_STATS_MULTICAST, + __IFLA_VF_STATS_MAX, +}; + +#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1) + /* VF ports management section * * Nested layout of set/get msg is: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 077b6d280371..2d102ce1474f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -819,7 +819,19 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, nla_total_size(sizeof(struct ifla_vf_spoofchk)) + nla_total_size(sizeof(struct ifla_vf_rate)) + nla_total_size(sizeof(struct ifla_vf_link_state)) + - nla_total_size(sizeof(struct ifla_vf_rss_query_en))); + nla_total_size(sizeof(struct ifla_vf_rss_query_en)) + + /* IFLA_VF_STATS_RX_PACKETS */ + nla_total_size(sizeof(__u64)) + + /* IFLA_VF_STATS_TX_PACKETS */ + nla_total_size(sizeof(__u64)) + + /* IFLA_VF_STATS_RX_BYTES */ + nla_total_size(sizeof(__u64)) + + /* IFLA_VF_STATS_TX_BYTES */ + nla_total_size(sizeof(__u64)) + + /* IFLA_VF_STATS_BROADCAST */ + nla_total_size(sizeof(__u64)) + + /* IFLA_VF_STATS_MULTICAST */ + nla_total_size(sizeof(__u64))); return size; } else return 0; @@ -1123,7 +1135,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, && (ext_filter_mask & RTEXT_FILTER_VF)) { int i; - struct nlattr *vfinfo, *vf; + struct nlattr *vfinfo, *vf, *vfstats; int num_vfs = dev_num_vf(dev->dev.parent); vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); @@ -1138,6 +1150,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_spoofchk vf_spoofchk; struct ifla_vf_link_state vf_linkstate; struct ifla_vf_rss_query_en vf_rss_query_en; + struct ifla_vf_stats vf_stats; /* * Not all SR-IOV capable drivers support the @@ -1190,6 +1203,30 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, sizeof(vf_rss_query_en), &vf_rss_query_en)) goto nla_put_failure; + memset(&vf_stats, 0, sizeof(vf_stats)); + if (dev->netdev_ops->ndo_get_vf_stats) + dev->netdev_ops->ndo_get_vf_stats(dev, i, + &vf_stats); + vfstats = nla_nest_start(skb, IFLA_VF_STATS); + if (!vfstats) { + nla_nest_cancel(skb, vf); + nla_nest_cancel(skb, vfinfo); + goto nla_put_failure; + } + if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS, + vf_stats.rx_packets) || + nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS, + vf_stats.tx_packets) || + nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES, + vf_stats.rx_bytes) || + nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES, + vf_stats.tx_bytes) || + nla_put_u64(skb, IFLA_VF_STATS_BROADCAST, + vf_stats.broadcast) || + nla_put_u64(skb, IFLA_VF_STATS_MULTICAST, + vf_stats.multicast)) + goto nla_put_failure; + nla_nest_end(skb, vfstats); nla_nest_end(skb, vf); } nla_nest_end(skb, vfinfo); @@ -1303,6 +1340,16 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) }, [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) }, [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, + [IFLA_VF_STATS] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = { + [IFLA_VF_STATS_RX_PACKETS] = { .type = NLA_U64 }, + [IFLA_VF_STATS_TX_PACKETS] = { .type = NLA_U64 }, + [IFLA_VF_STATS_RX_BYTES] = { .type = NLA_U64 }, + [IFLA_VF_STATS_TX_BYTES] = { .type = NLA_U64 }, + [IFLA_VF_STATS_BROADCAST] = { .type = NLA_U64 }, + [IFLA_VF_STATS_MULTICAST] = { .type = NLA_U64 }, }; static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { -- cgit v1.2.3 From 62a890557f57e6cbebe9cc6c32aef045405d4fa2 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 15 Jun 2015 17:59:08 +0300 Subject: net/mlx4_en: Support ndo_get_vf_stats Implement the ndo to gather VF statistics through the PF. All counters related to this VF are stored in a per slave list, run over the slave's list and collect all statistics. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 30 ++++++++++++++ drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 10 +++++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 + .../net/ethernet/mellanox/mlx4/resource_tracker.c | 48 ++++++++++++++++++++++ include/linux/mlx4/cmd.h | 3 ++ 5 files changed, 93 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 44b8f7715ade..82040137d7d9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -3223,6 +3223,36 @@ if_stat_out: } EXPORT_SYMBOL_GPL(mlx4_get_counter_stats); +int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx, + struct ifla_vf_stats *vf_stats) +{ + struct mlx4_counter tmp_vf_stats; + int slave; + int err = 0; + + if (!vf_stats) + return -EINVAL; + + if (!mlx4_is_master(dev)) + return -EPROTONOSUPPORT; + + slave = mlx4_get_slave_indx(dev, vf_idx); + if (slave < 0) + return -EINVAL; + + port = mlx4_slaves_closest_port(dev, slave, port); + err = mlx4_calc_vf_counters(dev, slave, port, &tmp_vf_stats); + if (!err && tmp_vf_stats.counter_mode == 0) { + vf_stats->rx_packets = be64_to_cpu(tmp_vf_stats.rx_frames); + vf_stats->tx_packets = be64_to_cpu(tmp_vf_stats.tx_frames); + vf_stats->rx_bytes = be64_to_cpu(tmp_vf_stats.rx_bytes); + vf_stats->tx_bytes = be64_to_cpu(tmp_vf_stats.tx_bytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_get_vf_stats); + int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index f9142f22d630..77179d7ae4cc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2292,6 +2292,15 @@ static int mlx4_en_set_vf_link_state(struct net_device *dev, int vf, int link_st return mlx4_set_vf_link_state(mdev->dev, en_priv->port, vf, link_state); } +static int mlx4_en_get_vf_stats(struct net_device *dev, int vf, + struct ifla_vf_stats *vf_stats) +{ + struct mlx4_en_priv *en_priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + return mlx4_get_vf_stats(mdev->dev, en_priv->port, vf, vf_stats); +} + #define PORT_ID_BYTE_LEN 8 static int mlx4_en_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid) @@ -2489,6 +2498,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_set_vf_rate = mlx4_en_set_vf_rate, .ndo_set_vf_spoofchk = mlx4_en_set_vf_spoofchk, .ndo_set_vf_link_state = mlx4_en_set_vf_link_state, + .ndo_get_vf_stats = mlx4_en_get_vf_stats, .ndo_get_vf_config = mlx4_en_get_vf_config, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mlx4_en_netpoll, diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index e9e94bc0e75d..a092c5c34d43 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1010,6 +1010,8 @@ int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx); +int mlx4_calc_vf_counters(struct mlx4_dev *dev, int slave, int port, + struct mlx4_counter *data); int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn); void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 73db584bc240..731423ca575d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -46,6 +46,7 @@ #include "mlx4.h" #include "fw.h" +#include "mlx4_stats.h" #define MLX4_MAC_VALID (1ull << 63) #define MLX4_PF_COUNTERS_PER_PORT 2 @@ -1147,6 +1148,53 @@ static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave, return ret; } +int mlx4_calc_vf_counters(struct mlx4_dev *dev, int slave, int port, + struct mlx4_counter *data) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct res_common *tmp; + struct res_counter *counter; + int *counters_arr; + int i = 0, err = 0; + + memset(data, 0, sizeof(*data)); + + counters_arr = kmalloc_array(dev->caps.max_counters, + sizeof(*counters_arr), GFP_KERNEL); + if (!counters_arr) + return -ENOMEM; + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry(tmp, + &tracker->slave_list[slave].res_list[RES_COUNTER], + list) { + counter = container_of(tmp, struct res_counter, com); + if (counter->port == port) { + counters_arr[i] = (int)tmp->res_id; + i++; + } + } + spin_unlock_irq(mlx4_tlock(dev)); + counters_arr[i] = -1; + + i = 0; + + while (counters_arr[i] != -1) { + err = mlx4_get_counter_stats(dev, counters_arr[i], data, + 0); + if (err) { + memset(data, 0, sizeof(*data)); + goto table_changed; + } + i++; + } + +table_changed: + kfree(counters_arr); + return 0; +} + static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, enum mlx4_resource type, int extra) { diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 5dffc869988b..58391f2e0414 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -36,6 +36,7 @@ #include #include #include +#include enum { /* initialization and general commands */ @@ -303,6 +304,8 @@ void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbo int mlx4_get_counter_stats(struct mlx4_dev *dev, int counter_index, struct mlx4_counter *counter_stats, int reset); +int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx, + struct ifla_vf_stats *vf_stats); u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); -- cgit v1.2.3 From eb4cb008529ca08e0d8c0fa54e8f739520197a65 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 15 Jun 2015 11:26:18 -0400 Subject: sock_diag: define destruction multicast groups These groups will contain socket-destruction events for AF_INET/AF_INET6, IPPROTO_TCP/IPPROTO_UDP. Near the end of socket destruction, a check for listeners is performed. In the presence of a listener, rather than completely cleanup the socket, a unit of work will be added to a private work queue which will first broadcast information about the socket and then finish the cleanup operation. Signed-off-by: Craig Gallek Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 42 +++++++++++++++++++++ include/net/sock.h | 1 + include/uapi/linux/sock_diag.h | 10 +++++ net/core/sock.c | 11 +++++- net/core/sock_diag.c | 85 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 148 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 083ac388098e..fddebc617469 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -1,7 +1,10 @@ #ifndef __SOCK_DIAG_H__ #define __SOCK_DIAG_H__ +#include #include +#include +#include #include struct sk_buff; @@ -11,6 +14,7 @@ struct sock; struct sock_diag_handler { __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); + int (*get_info)(struct sk_buff *skb, struct sock *sk); }; int sock_diag_register(const struct sock_diag_handler *h); @@ -26,4 +30,42 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, struct sk_buff *skb, int attrtype); +static inline +enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk) +{ + switch (sk->sk_family) { + case AF_INET: + switch (sk->sk_protocol) { + case IPPROTO_TCP: + return SKNLGRP_INET_TCP_DESTROY; + case IPPROTO_UDP: + return SKNLGRP_INET_UDP_DESTROY; + default: + return SKNLGRP_NONE; + } + case AF_INET6: + switch (sk->sk_protocol) { + case IPPROTO_TCP: + return SKNLGRP_INET6_TCP_DESTROY; + case IPPROTO_UDP: + return SKNLGRP_INET6_UDP_DESTROY; + default: + return SKNLGRP_NONE; + } + default: + return SKNLGRP_NONE; + } +} + +static inline +bool sock_diag_has_destroy_listeners(const struct sock *sk) +{ + const struct net *n = sock_net(sk); + const enum sknetlink_groups group = sock_diag_destroy_group(sk); + + return group != SKNLGRP_NONE && n->diag_nlsk && + netlink_has_listeners(n->diag_nlsk, group); +} +void sock_diag_broadcast_destroy(struct sock *sk); + #endif diff --git a/include/net/sock.h b/include/net/sock.h index 26c1c3171e00..3e8258699270 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1518,6 +1518,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern); void sk_free(struct sock *sk); +void sk_destruct(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h index b00e29efb161..49230d36f9ce 100644 --- a/include/uapi/linux/sock_diag.h +++ b/include/uapi/linux/sock_diag.h @@ -23,4 +23,14 @@ enum { SK_MEMINFO_VARS, }; +enum sknetlink_groups { + SKNLGRP_NONE, + SKNLGRP_INET_TCP_DESTROY, + SKNLGRP_INET_UDP_DESTROY, + SKNLGRP_INET6_TCP_DESTROY, + SKNLGRP_INET6_UDP_DESTROY, + __SKNLGRP_MAX, +}; +#define SKNLGRP_MAX (__SKNLGRP_MAX - 1) + #endif /* _UAPI__SOCK_DIAG_H__ */ diff --git a/net/core/sock.c b/net/core/sock.c index 7063c329c1b6..1e1fe9a68d83 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -131,6 +131,7 @@ #include #include #include +#include #include @@ -1423,7 +1424,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, } EXPORT_SYMBOL(sk_alloc); -static void __sk_free(struct sock *sk) +void sk_destruct(struct sock *sk) { struct sk_filter *filter; @@ -1451,6 +1452,14 @@ static void __sk_free(struct sock *sk) sk_prot_free(sk->sk_prot_creator, sk); } +static void __sk_free(struct sock *sk) +{ + if (unlikely(sock_diag_has_destroy_listeners(sk))) + sock_diag_broadcast_destroy(sk); + else + sk_destruct(sk); +} + void sk_free(struct sock *sk) { /* diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 74dddf84adcd..d79866c5f8bc 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -5,6 +5,9 @@ #include #include #include +#include +#include +#include #include #include @@ -12,6 +15,7 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); +static struct workqueue_struct *broadcast_wq; static u64 sock_gen_cookie(struct sock *sk) { @@ -101,6 +105,62 @@ out: } EXPORT_SYMBOL(sock_diag_put_filterinfo); +struct broadcast_sk { + struct sock *sk; + struct work_struct work; +}; + +static size_t sock_diag_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct inet_diag_msg) + + nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */ + + nla_total_size(sizeof(struct tcp_info))); /* INET_DIAG_INFO */ +} + +static void sock_diag_broadcast_destroy_work(struct work_struct *work) +{ + struct broadcast_sk *bsk = + container_of(work, struct broadcast_sk, work); + struct sock *sk = bsk->sk; + const struct sock_diag_handler *hndl; + struct sk_buff *skb; + const enum sknetlink_groups group = sock_diag_destroy_group(sk); + int err = -1; + + WARN_ON(group == SKNLGRP_NONE); + + skb = nlmsg_new(sock_diag_nlmsg_size(), GFP_KERNEL); + if (!skb) + goto out; + + mutex_lock(&sock_diag_table_mutex); + hndl = sock_diag_handlers[sk->sk_family]; + if (hndl && hndl->get_info) + err = hndl->get_info(skb, sk); + mutex_unlock(&sock_diag_table_mutex); + + if (!err) + nlmsg_multicast(sock_net(sk)->diag_nlsk, skb, 0, group, + GFP_KERNEL); + else + kfree_skb(skb); +out: + sk_destruct(sk); + kfree(bsk); +} + +void sock_diag_broadcast_destroy(struct sock *sk) +{ + /* Note, this function is often called from an interrupt context. */ + struct broadcast_sk *bsk = + kmalloc(sizeof(struct broadcast_sk), GFP_ATOMIC); + if (!bsk) + return sk_destruct(sk); + bsk->sk = sk; + INIT_WORK(&bsk->work, sock_diag_broadcast_destroy_work); + queue_work(broadcast_wq, &bsk->work); +} + void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)) { mutex_lock(&sock_diag_table_mutex); @@ -211,10 +271,32 @@ static void sock_diag_rcv(struct sk_buff *skb) mutex_unlock(&sock_diag_mutex); } +static int sock_diag_bind(struct net *net, int group) +{ + switch (group) { + case SKNLGRP_INET_TCP_DESTROY: + case SKNLGRP_INET_UDP_DESTROY: + if (!sock_diag_handlers[AF_INET]) + request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, AF_INET); + break; + case SKNLGRP_INET6_TCP_DESTROY: + case SKNLGRP_INET6_UDP_DESTROY: + if (!sock_diag_handlers[AF_INET6]) + request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, AF_INET); + break; + } + return 0; +} + static int __net_init diag_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { + .groups = SKNLGRP_MAX, .input = sock_diag_rcv, + .bind = sock_diag_bind, + .flags = NL_CFG_F_NONROOT_RECV, }; net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg); @@ -234,12 +316,15 @@ static struct pernet_operations diag_net_ops = { static int __init sock_diag_init(void) { + broadcast_wq = alloc_workqueue("sock_diag_events", 0, 0); + BUG_ON(!broadcast_wq); return register_pernet_subsys(&diag_net_ops); } static void __exit sock_diag_exit(void) { unregister_pernet_subsys(&diag_net_ops); + destroy_workqueue(broadcast_wq); } module_init(sock_diag_init); -- cgit v1.2.3 From 3fd22af808f4d7455ba91596d334438c7ee0f889 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 15 Jun 2015 11:26:19 -0400 Subject: sock_diag: specify info_size per inet protocol Previously, there was no clear distinction between the inet protocols that used struct tcp_info to report information and those that didn't. This change adds a specific size attribute to the inet_diag_handler struct which defines these interfaces. This will make dispatching sock_diag get_info requests identical for all inet protocols in a following patch. Tested: ss -au Tested: ss -at Signed-off-by: Craig Gallek Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 1 + net/dccp/diag.c | 1 + net/ipv4/inet_diag.c | 4 ++-- net/ipv4/tcp_diag.c | 1 + net/ipv4/udp_diag.c | 2 ++ 5 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index ac48b10c9395..0e707f0c1a3e 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -24,6 +24,7 @@ struct inet_diag_handler { struct inet_diag_msg *r, void *info); __u16 idiag_type; + __u16 idiag_info_size; }; struct inet_connection_sock; diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 5a45f8de5d99..2d84303ea6bf 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -66,6 +66,7 @@ static const struct inet_diag_handler dccp_diag_handler = { .dump_one = dccp_diag_dump_one, .idiag_get_info = dccp_diag_get_info, .idiag_type = IPPROTO_DCCP, + .idiag_info_size = sizeof(struct tcp_info), }; static int __init dccp_diag_init(void) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 4d32262c7502..b1f01174bf32 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -200,9 +200,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, } #undef EXPIRES_IN_MS - if (ext & (1 << (INET_DIAG_INFO - 1))) { + if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { attr = nla_reserve(skb, INET_DIAG_INFO, - sizeof(struct tcp_info)); + handler->idiag_info_size); if (!attr) goto errout; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 79b34a0f4a4a..423e3881a40b 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -50,6 +50,7 @@ static const struct inet_diag_handler tcp_diag_handler = { .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, .idiag_type = IPPROTO_TCP, + .idiag_info_size = sizeof(struct tcp_info), }; static int __init tcp_diag_init(void) diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index b763c39ae1d7..6116604bf6e8 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -170,6 +170,7 @@ static const struct inet_diag_handler udp_diag_handler = { .dump_one = udp_diag_dump_one, .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDP, + .idiag_info_size = 0, }; static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, @@ -190,6 +191,7 @@ static const struct inet_diag_handler udplite_diag_handler = { .dump_one = udplite_diag_dump_one, .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDPLITE, + .idiag_info_size = 0, }; static int __init udp_diag_init(void) -- cgit v1.2.3 From 35ac838a9b96470f999db04320f53a2033642bfb Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 15 Jun 2015 11:26:20 -0400 Subject: sock_diag: implement a get_info handler for inet This get_info handler will simply dispatch to the appropriate existing inet protocol handler. This patch also includes a new netlink attribute (INET_DIAG_PROTOCOL). This attribute is currently only used for multicast messages. Without this attribute, there is no way of knowing the IP protocol used by the socket information being broadcast. This attribute is not necessary in the 'dump' variant of this protocol (though it could easily be added) because dump requests are issued for specific family/protocol pairs. Tested: ss -E (note, the -E option has not yet been merged into the upstream version of ss). Signed-off-by: Craig Gallek Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 3 ++- net/ipv4/inet_diag.c | 46 ++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp.c | 4 +++- net/ipv4/tcp_diag.c | 5 +++-- 4 files changed, 54 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index c7093c75bdd6..b629fc53b109 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -111,9 +111,10 @@ enum { INET_DIAG_SKMEMINFO, INET_DIAG_SHUTDOWN, INET_DIAG_DCTCPINFO, + INET_DIAG_PROTOCOL, /* response attribute only */ }; -#define INET_DIAG_MAX INET_DIAG_DCTCPINFO +#define INET_DIAG_MAX INET_DIAG_PROTOCOL /* INET_DIAG_MEM */ diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index b1f01174bf32..21985d8d41e7 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1078,14 +1078,60 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) return inet_diag_get_exact(skb, h, nlmsg_data(h)); } +static +int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) +{ + const struct inet_diag_handler *handler; + struct nlmsghdr *nlh; + struct nlattr *attr; + struct inet_diag_msg *r; + void *info = NULL; + int err = 0; + + nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0); + if (!nlh) + return -ENOMEM; + + r = nlmsg_data(nlh); + memset(r, 0, sizeof(*r)); + inet_diag_msg_common_fill(r, sk); + r->idiag_state = sk->sk_state; + + if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) { + nlmsg_cancel(skb, nlh); + return err; + } + + handler = inet_diag_lock_handler(sk->sk_protocol); + if (IS_ERR(handler)) { + inet_diag_unlock_handler(handler); + nlmsg_cancel(skb, nlh); + return PTR_ERR(handler); + } + + attr = handler->idiag_info_size + ? nla_reserve(skb, INET_DIAG_INFO, handler->idiag_info_size) + : NULL; + if (attr) + info = nla_data(attr); + + handler->idiag_get_info(sk, r, info); + inet_diag_unlock_handler(handler); + + nlmsg_end(skb, nlh); + return 0; +} + static const struct sock_diag_handler inet_diag_handler = { .family = AF_INET, .dump = inet_diag_handler_dump, + .get_info = inet_diag_handler_get_info, }; static const struct sock_diag_handler inet6_diag_handler = { .family = AF_INET6, .dump = inet_diag_handler_dump, + .get_info = inet_diag_handler_get_info, }; int inet_diag_register(const struct inet_diag_handler *h) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 65f791f74845..697b86dd45b3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2624,13 +2624,15 @@ EXPORT_SYMBOL(compat_tcp_setsockopt); /* Return information about state of tcp endpoint in API format. */ void tcp_get_info(struct sock *sk, struct tcp_info *info) { - const struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; unsigned int start; u32 rate; memset(info, 0, sizeof(*info)); + if (sk->sk_type != SOCK_STREAM) + return; info->tcpi_state = sk->sk_state; info->tcpi_ca_state = icsk->icsk_ca_state; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 423e3881a40b..479f34946177 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -19,13 +19,14 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *_info) { - const struct tcp_sock *tp = tcp_sk(sk); struct tcp_info *info = _info; if (sk->sk_state == TCP_LISTEN) { r->idiag_rqueue = sk->sk_ack_backlog; r->idiag_wqueue = sk->sk_max_ack_backlog; - } else { + } else if (sk->sk_type == SOCK_STREAM) { + const struct tcp_sock *tp = tcp_sk(sk); + r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); r->idiag_wqueue = tp->write_seq - tp->snd_una; } -- cgit v1.2.3 From ef493bd930ae482c608c5999b40d79dda3f2a674 Mon Sep 17 00:00:00 2001 From: Roman Kubiak Date: Fri, 12 Jun 2015 12:32:57 +0200 Subject: netfilter: nfnetlink_queue: add security context information This patch adds an additional attribute when sending packet information via netlink in netfilter_queue module. It will send additional security context data, so that userspace applications can verify this context against their own security databases. Signed-off-by: Roman Kubiak Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_queue.h | 4 ++- net/netfilter/nfnetlink_queue_core.c | 35 +++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h index 8dd819e2b5fe..b67a853638ff 100644 --- a/include/uapi/linux/netfilter/nfnetlink_queue.h +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h @@ -49,6 +49,7 @@ enum nfqnl_attr_type { NFQA_EXP, /* nf_conntrack_netlink.h */ NFQA_UID, /* __u32 sk uid */ NFQA_GID, /* __u32 sk gid */ + NFQA_SECCTX, /* security context string */ __NFQA_MAX }; @@ -102,7 +103,8 @@ enum nfqnl_attr_config { #define NFQA_CFG_F_CONNTRACK (1 << 1) #define NFQA_CFG_F_GSO (1 << 2) #define NFQA_CFG_F_UID_GID (1 << 3) -#define NFQA_CFG_F_MAX (1 << 4) +#define NFQA_CFG_F_SECCTX (1 << 4) +#define NFQA_CFG_F_MAX (1 << 5) /* flags for NFQA_SKB_INFO */ /* packet appears to have wrong checksums, but they are ok */ diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 22a5ac76683e..6eccf0fcdc63 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -278,6 +278,23 @@ nla_put_failure: return -1; } +static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata) +{ + u32 seclen = 0; +#if IS_ENABLED(CONFIG_NETWORK_SECMARK) + if (!skb || !sk_fullsock(skb->sk)) + return 0; + + read_lock_bh(&skb->sk->sk_callback_lock); + + if (skb->secmark) + security_secid_to_secctx(skb->secmark, secdata, &seclen); + + read_unlock_bh(&skb->sk->sk_callback_lock); +#endif + return seclen; +} + static struct sk_buff * nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry, @@ -297,6 +314,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_conn *ct = NULL; enum ip_conntrack_info uninitialized_var(ctinfo); bool csum_verify; + char *secdata = NULL; + u32 seclen = 0; size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) @@ -352,6 +371,12 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, + nla_total_size(sizeof(u_int32_t))); /* gid */ } + if ((queue->flags & NFQA_CFG_F_SECCTX) && entskb->sk) { + seclen = nfqnl_get_sk_secctx(entskb, &secdata); + if (seclen) + size += nla_total_size(seclen); + } + skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, GFP_ATOMIC); if (!skb) { @@ -479,6 +504,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, nfqnl_put_sk_uidgid(skb, entskb->sk) < 0) goto nla_put_failure; + if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata)) + goto nla_put_failure; + if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) goto nla_put_failure; @@ -1142,7 +1170,12 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ret = -EOPNOTSUPP; goto err_out_unlock; } - +#if !IS_ENABLED(CONFIG_NETWORK_SECMARK) + if (flags & mask & NFQA_CFG_F_SECCTX) { + ret = -EOPNOTSUPP; + goto err_out_unlock; + } +#endif spin_lock_bh(&queue->lock); queue->flags &= ~mask; queue->flags |= flags & mask; -- cgit v1.2.3 From 01555e74bde51444c6898ef1800fb2bc697d479e Mon Sep 17 00:00:00 2001 From: Harout Hedeshian Date: Mon, 15 Jun 2015 18:40:43 -0600 Subject: netfilter: xt_socket: add XT_SOCKET_RESTORESKMARK flag xt_socket is useful for matching sockets with IP_TRANSPARENT and taking some action on the matching packets. However, it lacks the ability to match only a small subset of transparent sockets. Suppose there are 2 applications, each with its own set of transparent sockets. The first application wants all matching packets dropped, while the second application wants them forwarded somewhere else. Add the ability to retore the skb->mark from the sk_mark. The mark is only restored if a matching socket is found and the transparent / nowildcard conditions are satisfied. Now the 2 hypothetical applications can differentiate their sockets based on a mark value set with SO_MARK. iptables -t mangle -I PREROUTING -m socket --transparent \ --restore-skmark -j action iptables -t mangle -A action -m mark --mark 10 -j action2 iptables -t mangle -A action -m mark --mark 11 -j action3 Signed-off-by: Harout Hedeshian Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_socket.h | 8 +++++ net/netfilter/xt_socket.c | 59 ++++++++++++++++++++++++++++---- 2 files changed, 61 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/xt_socket.h b/include/uapi/linux/netfilter/xt_socket.h index 6315e2ac3474..87644f832494 100644 --- a/include/uapi/linux/netfilter/xt_socket.h +++ b/include/uapi/linux/netfilter/xt_socket.h @@ -6,6 +6,7 @@ enum { XT_SOCKET_TRANSPARENT = 1 << 0, XT_SOCKET_NOWILDCARD = 1 << 1, + XT_SOCKET_RESTORESKMARK = 1 << 2, }; struct xt_socket_mtinfo1 { @@ -18,4 +19,11 @@ struct xt_socket_mtinfo2 { }; #define XT_SOCKET_FLAGS_V2 (XT_SOCKET_TRANSPARENT | XT_SOCKET_NOWILDCARD) +struct xt_socket_mtinfo3 { + __u8 flags; +}; +#define XT_SOCKET_FLAGS_V3 (XT_SOCKET_TRANSPARENT \ + | XT_SOCKET_NOWILDCARD \ + | XT_SOCKET_RESTORESKMARK) + #endif /* _XT_SOCKET_H */ diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index e092cb046326..43e26c881100 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -205,6 +205,7 @@ static bool socket_match(const struct sk_buff *skb, struct xt_action_param *par, const struct xt_socket_mtinfo1 *info) { + struct sk_buff *pskb = (struct sk_buff *)skb; struct sock *sk = skb->sk; if (!sk) @@ -226,6 +227,10 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, if (info->flags & XT_SOCKET_TRANSPARENT) transparent = xt_socket_sk_is_transparent(sk); + if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && + transparent) + pskb->mark = sk->sk_mark; + if (sk != skb->sk) sock_gen_put(sk); @@ -247,7 +252,7 @@ socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par) } static bool -socket_mt4_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) +socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) { return socket_match(skb, par, par->matchinfo); } @@ -371,9 +376,10 @@ static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb, } static bool -socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) +socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; + struct sk_buff *pskb = (struct sk_buff *)skb; struct sock *sk = skb->sk; if (!sk) @@ -395,6 +401,10 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) if (info->flags & XT_SOCKET_TRANSPARENT) transparent = xt_socket_sk_is_transparent(sk); + if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && + transparent) + pskb->mark = sk->sk_mark; + if (sk != skb->sk) sock_gen_put(sk); @@ -428,6 +438,19 @@ static int socket_mt_v2_check(const struct xt_mtchk_param *par) return 0; } +static int socket_mt_v3_check(const struct xt_mtchk_param *par) +{ + const struct xt_socket_mtinfo3 *info = + (struct xt_socket_mtinfo3 *)par->matchinfo; + + if (info->flags & ~XT_SOCKET_FLAGS_V3) { + pr_info("unknown flags 0x%x\n", + info->flags & ~XT_SOCKET_FLAGS_V3); + return -EINVAL; + } + return 0; +} + static struct xt_match socket_mt_reg[] __read_mostly = { { .name = "socket", @@ -442,7 +465,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 1, .family = NFPROTO_IPV4, - .match = socket_mt4_v1_v2, + .match = socket_mt4_v1_v2_v3, .checkentry = socket_mt_v1_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | @@ -454,7 +477,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 1, .family = NFPROTO_IPV6, - .match = socket_mt6_v1_v2, + .match = socket_mt6_v1_v2_v3, .checkentry = socket_mt_v1_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | @@ -466,7 +489,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 2, .family = NFPROTO_IPV4, - .match = socket_mt4_v1_v2, + .match = socket_mt4_v1_v2_v3, .checkentry = socket_mt_v2_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | @@ -478,13 +501,37 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 2, .family = NFPROTO_IPV6, - .match = socket_mt6_v1_v2, + .match = socket_mt6_v1_v2_v3, .checkentry = socket_mt_v2_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, +#endif + { + .name = "socket", + .revision = 3, + .family = NFPROTO_IPV4, + .match = socket_mt4_v1_v2_v3, + .checkentry = socket_mt_v3_check, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +#ifdef XT_SOCKET_HAVE_IPV6 + { + .name = "socket", + .revision = 3, + .family = NFPROTO_IPV6, + .match = socket_mt6_v1_v2_v3, + .checkentry = socket_mt_v3_check, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, #endif }; -- cgit v1.2.3 From a1a56aaa0735c7821e85c37268a7c12e132415cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Jun 2015 18:10:13 -0700 Subject: netfilter: x_tables: align per cpu xt_counter Let's force a 16 bytes alignment on xt_counter percpu allocations, so that bytes and packets sit in same cache line. xt_counter being exported to user space, we cannot add __align(16) on the structure itself. Signed-off-by: Eric Dumazet Cc: Florian Westphal Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 95693c4cebdd..1c97a2204379 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -356,7 +356,8 @@ static inline unsigned long ifname_compare_aligned(const char *_a, * so nothing needs to be done there. * * xt_percpu_counter_alloc returns the address of the percpu - * counter, or 0 on !SMP. + * counter, or 0 on !SMP. We force an alignment of 16 bytes + * so that bytes/packets share a common cache line. * * Hence caller must use IS_ERR_VALUE to check for error, this * allows us to return 0 for single core systems without forcing @@ -365,7 +366,8 @@ static inline unsigned long ifname_compare_aligned(const char *_a, static inline u64 xt_percpu_counter_alloc(void) { if (nr_cpu_ids > 1) { - void __percpu *res = alloc_percpu(struct xt_counters); + void __percpu *res = __alloc_percpu(sizeof(struct xt_counters), + sizeof(struct xt_counters)); if (res == NULL) return (u64) -ENOMEM; -- cgit v1.2.3 From d2609b345ebf0547015a78588c4d7ad68c9ccf26 Mon Sep 17 00:00:00 2001 From: Florian Grandel Date: Thu, 18 Jun 2015 03:16:34 +0200 Subject: Bluetooth: hci_core/mgmt: Introduce multi-adv list The current hci dev structure only supports a single advertising instance. To support multi-instance advertising it is necessary to introduce a linked list of advertising instances so that multiple advertising instances can be dynamically added and/or removed. In a first step, the existing adv_instance member of the hci_dev struct is supplemented by a linked list of advertising instances. This patch introduces the list and supporting list management infrastructure. The list is not being used yet. Signed-off-by: Florian Grandel Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 17 ++++++ net/bluetooth/hci_core.c | 117 +++++++++++++++++++++++++++++++++++++++ net/bluetooth/mgmt.c | 2 +- 3 files changed, 135 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 3fbb793e634d..4242dbfb4cf5 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -157,15 +157,20 @@ struct oob_data { struct adv_info { struct delayed_work timeout_exp; + struct list_head list; __u8 instance; __u32 flags; __u16 timeout; + __u16 duration; __u16 adv_data_len; __u8 adv_data[HCI_MAX_AD_LENGTH]; __u16 scan_rsp_len; __u8 scan_rsp_data[HCI_MAX_AD_LENGTH]; }; +#define HCI_MAX_ADV_INSTANCES 1 +#define HCI_DEFAULT_ADV_DURATION 2 + #define HCI_MAX_SHORT_NAME_LENGTH 10 /* Default LE RPA expiry time, 15 minutes */ @@ -374,6 +379,9 @@ struct hci_dev { __u8 scan_rsp_data_len; struct adv_info adv_instance; + struct list_head adv_instances; + unsigned int adv_instance_cnt; + __u8 cur_adv_instance; __u8 irk[16]; __u32 rpa_timeout; @@ -1019,6 +1027,15 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type); +void hci_adv_instances_clear(struct hci_dev *hdev); +struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance); +struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance); +int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, + u16 adv_data_len, u8 *adv_data, + u16 scan_rsp_len, u8 *scan_rsp_data, + u16 timeout, u16 duration); +int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance); + void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb); int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 573711c2d09e..ebf37ebcfd12 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2610,6 +2610,119 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, return 0; } +/* This function requires the caller holds hdev->lock */ +struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance) +{ + struct adv_info *adv_instance; + + list_for_each_entry(adv_instance, &hdev->adv_instances, list) { + if (adv_instance->instance == instance) + return adv_instance; + } + + return NULL; +} + +/* This function requires the caller holds hdev->lock */ +struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance) { + struct adv_info *cur_instance; + + cur_instance = hci_find_adv_instance(hdev, instance); + if (!cur_instance) + return NULL; + + if (cur_instance == list_last_entry(&hdev->adv_instances, + struct adv_info, list)) + return list_first_entry(&hdev->adv_instances, + struct adv_info, list); + else + return list_next_entry(cur_instance, list); +} + +/* This function requires the caller holds hdev->lock */ +int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance) +{ + struct adv_info *adv_instance; + + adv_instance = hci_find_adv_instance(hdev, instance); + if (!adv_instance) + return -ENOENT; + + BT_DBG("%s removing %dMR", hdev->name, instance); + + list_del(&adv_instance->list); + kfree(adv_instance); + + hdev->adv_instance_cnt--; + + return 0; +} + +/* This function requires the caller holds hdev->lock */ +void hci_adv_instances_clear(struct hci_dev *hdev) +{ + struct adv_info *adv_instance, *n; + + list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) { + list_del(&adv_instance->list); + kfree(adv_instance); + } + + hdev->adv_instance_cnt = 0; +} + +/* This function requires the caller holds hdev->lock */ +int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, + u16 adv_data_len, u8 *adv_data, + u16 scan_rsp_len, u8 *scan_rsp_data, + u16 timeout, u16 duration) +{ + struct adv_info *adv_instance; + + adv_instance = hci_find_adv_instance(hdev, instance); + if (adv_instance) { + memset(adv_instance->adv_data, 0, + sizeof(adv_instance->adv_data)); + memset(adv_instance->scan_rsp_data, 0, + sizeof(adv_instance->scan_rsp_data)); + } else { + if (hdev->adv_instance_cnt >= HCI_MAX_ADV_INSTANCES || + instance < 1 || instance > HCI_MAX_ADV_INSTANCES) + return -EOVERFLOW; + + adv_instance = kmalloc(sizeof(*adv_instance), GFP_KERNEL); + if (!adv_instance) + return -ENOMEM; + + memset(adv_instance, 0, sizeof(*adv_instance)); + adv_instance->instance = instance; + list_add(&adv_instance->list, &hdev->adv_instances); + hdev->adv_instance_cnt++; + } + + adv_instance->flags = flags; + adv_instance->adv_data_len = adv_data_len; + adv_instance->scan_rsp_len = scan_rsp_len; + + if (adv_data_len) + memcpy(adv_instance->adv_data, adv_data, adv_data_len); + + if (scan_rsp_len) + memcpy(adv_instance->scan_rsp_data, + scan_rsp_data, scan_rsp_len); + + adv_instance->timeout = timeout; + + if (duration == 0) + adv_instance->duration = HCI_DEFAULT_ADV_DURATION; + else + adv_instance->duration = duration; + + BT_DBG("%s for %dMR", hdev->name, instance); + + return 0; +} + struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list, bdaddr_t *bdaddr, u8 type) { @@ -3015,6 +3128,8 @@ struct hci_dev *hci_alloc_dev(void) hdev->manufacturer = 0xffff; /* Default to internal use */ hdev->inq_tx_power = HCI_TX_POWER_INVALID; hdev->adv_tx_power = HCI_TX_POWER_INVALID; + hdev->adv_instance_cnt = 0; + hdev->cur_adv_instance = 0x00; hdev->sniff_max_interval = 800; hdev->sniff_min_interval = 80; @@ -3056,6 +3171,7 @@ struct hci_dev *hci_alloc_dev(void) INIT_LIST_HEAD(&hdev->pend_le_conns); INIT_LIST_HEAD(&hdev->pend_le_reports); INIT_LIST_HEAD(&hdev->conn_hash.list); + INIT_LIST_HEAD(&hdev->adv_instances); INIT_WORK(&hdev->rx_work, hci_rx_work); INIT_WORK(&hdev->cmd_work, hci_cmd_work); @@ -3249,6 +3365,7 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_smp_ltks_clear(hdev); hci_smp_irks_clear(hdev); hci_remote_oob_data_clear(hdev); + hci_adv_instances_clear(hdev); hci_bdaddr_list_clear(&hdev->le_white_list); hci_conn_params_clear_all(hdev); hci_discovery_filter_clear(hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e41bbe28a36e..92c50a17fdf9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6813,7 +6813,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, rp->supported_flags = cpu_to_le32(supported_flags); rp->max_adv_data_len = HCI_MAX_AD_LENGTH; rp->max_scan_rsp_len = HCI_MAX_AD_LENGTH; - rp->max_instances = 1; + rp->max_instances = HCI_MAX_ADV_INSTANCES; /* Currently only one instance is supported, so simply return the * current instance number. -- cgit v1.2.3 From 5d900e4601391576a3c0644d7fcad1ebf41a516e Mon Sep 17 00:00:00 2001 From: Florian Grandel Date: Thu, 18 Jun 2015 03:16:35 +0200 Subject: Bluetooth: hci_core/mgmt: move adv timeout to hdev Currently the delayed work managing advertising duration and timeout is part of the advertising instance structure. This is not correct as only a single instance can be advertised at any given time. To implement round robin advertising a single delayed work structure is needed. To fix this the delayed work structure is being moved to the hci_dev structure. The instance specific variable is renamed to "remaining_time" to make it clear that this is the remaining lifetime of the instance and not the current advertising timeout. Signed-off-by: Florian Grandel Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 5 ++++- net/bluetooth/hci_core.c | 29 +++++++++++++++++++++++++++++ net/bluetooth/mgmt.c | 27 +++++++++++---------------- 3 files changed, 44 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4242dbfb4cf5..b53e1b113621 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -156,11 +156,11 @@ struct oob_data { }; struct adv_info { - struct delayed_work timeout_exp; struct list_head list; __u8 instance; __u32 flags; __u16 timeout; + __u16 remaining_time; __u16 duration; __u16 adv_data_len; __u8 adv_data[HCI_MAX_AD_LENGTH]; @@ -382,6 +382,8 @@ struct hci_dev { struct list_head adv_instances; unsigned int adv_instance_cnt; __u8 cur_adv_instance; + __u16 adv_instance_timeout; + struct delayed_work adv_instance_expire; __u8 irk[16]; __u32 rpa_timeout; @@ -1379,6 +1381,7 @@ void mgmt_set_powered_failed(struct hci_dev *hdev, int err); int mgmt_powered(struct hci_dev *hdev, u8 powered); int mgmt_update_adv_data(struct hci_dev *hdev); void mgmt_discoverable_timeout(struct hci_dev *hdev); +void mgmt_adv_timeout_expired(struct hci_dev *hdev); void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent); void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ebf37ebcfd12..d1110db3b0d4 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1591,6 +1591,11 @@ static int hci_dev_do_close(struct hci_dev *hdev) if (hci_dev_test_flag(hdev, HCI_MGMT)) cancel_delayed_work_sync(&hdev->rpa_expired); + if (hdev->adv_instance_timeout) { + cancel_delayed_work_sync(&hdev->adv_instance_expire); + hdev->adv_instance_timeout = 0; + } + /* Avoid potential lockdep warnings from the *_flush() calls by * ensuring the workqueue is empty up front. */ @@ -2147,6 +2152,17 @@ static void hci_discov_off(struct work_struct *work) mgmt_discoverable_timeout(hdev); } +static void hci_adv_timeout_expire(struct work_struct *work) +{ + struct hci_dev *hdev; + + hdev = container_of(work, struct hci_dev, adv_instance_expire.work); + + BT_DBG("%s", hdev->name); + + mgmt_adv_timeout_expired(hdev); +} + void hci_uuids_clear(struct hci_dev *hdev) { struct bt_uuid *uuid, *tmp; @@ -2650,6 +2666,11 @@ int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance) BT_DBG("%s removing %dMR", hdev->name, instance); + if (hdev->cur_adv_instance == instance && hdev->adv_instance_timeout) { + cancel_delayed_work(&hdev->adv_instance_expire); + hdev->adv_instance_timeout = 0; + } + list_del(&adv_instance->list); kfree(adv_instance); @@ -2663,6 +2684,11 @@ void hci_adv_instances_clear(struct hci_dev *hdev) { struct adv_info *adv_instance, *n; + if (hdev->adv_instance_timeout) { + cancel_delayed_work(&hdev->adv_instance_expire); + hdev->adv_instance_timeout = 0; + } + list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) { list_del(&adv_instance->list); kfree(adv_instance); @@ -2712,6 +2738,7 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, scan_rsp_data, scan_rsp_len); adv_instance->timeout = timeout; + adv_instance->remaining_time = timeout; if (duration == 0) adv_instance->duration = HCI_DEFAULT_ADV_DURATION; @@ -3130,6 +3157,7 @@ struct hci_dev *hci_alloc_dev(void) hdev->adv_tx_power = HCI_TX_POWER_INVALID; hdev->adv_instance_cnt = 0; hdev->cur_adv_instance = 0x00; + hdev->adv_instance_timeout = 0; hdev->sniff_max_interval = 800; hdev->sniff_min_interval = 80; @@ -3183,6 +3211,7 @@ struct hci_dev *hci_alloc_dev(void) INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); + INIT_DELAYED_WORK(&hdev->adv_instance_expire, hci_adv_timeout_expire); skb_queue_head_init(&hdev->rx_q); skb_queue_head_init(&hdev->cmd_q); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 92c50a17fdf9..a8319f6cfa65 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1466,8 +1466,8 @@ static void clear_adv_instance(struct hci_dev *hdev) if (!hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) return; - if (hdev->adv_instance.timeout) - cancel_delayed_work(&hdev->adv_instance.timeout_exp); + if (hdev->adv_instance_timeout) + cancel_delayed_work(&hdev->adv_instance_expire); memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance)); advertising_removed(NULL, hdev, 1); @@ -1497,7 +1497,7 @@ static int clean_up_hci_state(struct hci_dev *hdev) hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } - if (hdev->adv_instance.timeout) + if (hdev->adv_instance_timeout) clear_adv_instance(hdev); if (hci_dev_test_flag(hdev, HCI_LE_ADV)) @@ -6914,12 +6914,9 @@ unlock: hci_dev_unlock(hdev); } -static void adv_timeout_expired(struct work_struct *work) +void mgmt_adv_timeout_expired(struct hci_dev *hdev) { - struct hci_dev *hdev = container_of(work, struct hci_dev, - adv_instance.timeout_exp.work); - - hdev->adv_instance.timeout = 0; + hdev->adv_instance_timeout = 0; hci_dev_lock(hdev); clear_adv_instance(hdev); @@ -6981,8 +6978,6 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, goto unlock; } - INIT_DELAYED_WORK(&hdev->adv_instance.timeout_exp, adv_timeout_expired); - hdev->adv_instance.flags = flags; hdev->adv_instance.adv_data_len = cp->adv_data_len; hdev->adv_instance.scan_rsp_len = cp->scan_rsp_len; @@ -6994,14 +6989,14 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, memcpy(hdev->adv_instance.scan_rsp_data, cp->data + cp->adv_data_len, cp->scan_rsp_len); - if (hdev->adv_instance.timeout) - cancel_delayed_work(&hdev->adv_instance.timeout_exp); + if (hdev->adv_instance_timeout) + cancel_delayed_work(&hdev->adv_instance_expire); - hdev->adv_instance.timeout = timeout; + hdev->adv_instance_timeout = timeout; if (timeout) queue_delayed_work(hdev->workqueue, - &hdev->adv_instance.timeout_exp, + &hdev->adv_instance_expire, msecs_to_jiffies(timeout * 1000)); if (!hci_dev_test_and_set_flag(hdev, HCI_ADVERTISING_INSTANCE)) @@ -7106,8 +7101,8 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (hdev->adv_instance.timeout) - cancel_delayed_work(&hdev->adv_instance.timeout_exp); + if (hdev->adv_instance_timeout) + cancel_delayed_work(&hdev->adv_instance_expire); memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance)); -- cgit v1.2.3 From fffd38bca51c9a1c00508b754ab66edb6f39cf37 Mon Sep 17 00:00:00 2001 From: Florian Grandel Date: Thu, 18 Jun 2015 03:16:47 +0200 Subject: Bluetooth: mgmt/hci_core: multi-adv for add_advertising*() The add_advertising() and add_advertising_complete() functions reference the now obsolete hdev->adv_instance struct. Both methods are being refactored to access the dynamic advertising instance list instead. This patch also introduces all logic necessary to actually deal with multiple instance advertising. Notably the mgmt_adv_inst_expired() and schedule_adv_inst() method are being referenced to schedule instances in a round robin fashion. This patch also introduces a "pending" flag into the adv_info struct. This is necessary to identify and remove recently added advertising instances when the HCI commands return with an error status code. Otherwise new advertising instances could be leaked without properly informing userspace about their existence. Signed-off-by: Florian Grandel Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 1 + net/bluetooth/mgmt.c | 108 ++++++++++++++++++++++++++------------- 3 files changed, 75 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index b53e1b113621..4f58a0e6e0d3 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -157,6 +157,7 @@ struct oob_data { struct adv_info { struct list_head list; + bool pending; __u8 instance; __u32 flags; __u16 timeout; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d1110db3b0d4..e50f7c3c67f8 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2721,6 +2721,7 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, return -ENOMEM; memset(adv_instance, 0, sizeof(*adv_instance)); + adv_instance->pending = true; adv_instance->instance = instance; list_add(&adv_instance->list, &hdev->adv_instances); hdev->adv_instance_cnt++; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ac5fc357c757..0cc685495510 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7033,7 +7033,10 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct mgmt_pending_cmd *cmd; + struct mgmt_cp_add_advertising *cp; struct mgmt_rp_add_advertising rp; + struct adv_info *adv_instance, *n; + u8 instance; BT_DBG("status %d", status); @@ -7041,16 +7044,32 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status, cmd = pending_find(MGMT_OP_ADD_ADVERTISING, hdev); - if (status) { + if (status) hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); - memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance)); - advertising_removed(cmd ? cmd->sk : NULL, hdev, 1); + + list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) { + if (!adv_instance->pending) + continue; + + if (!status) { + adv_instance->pending = false; + continue; + } + + instance = adv_instance->instance; + + if (hdev->cur_adv_instance == instance) + cancel_adv_timeout(hdev); + + hci_remove_adv_instance(hdev, instance); + advertising_removed(cmd ? cmd->sk : NULL, hdev, instance); } if (!cmd) goto unlock; - rp.instance = 0x01; + cp = cmd->param; + rp.instance = cp->instance; if (status) mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, @@ -7098,7 +7117,10 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, u32 flags; u32 supported_flags; u8 status; - u16 timeout; + u16 timeout, duration; + unsigned int prev_instance_cnt = hdev->adv_instance_cnt; + u8 schedule_instance = 0; + struct adv_info *next_instance; int err; struct mgmt_pending_cmd *cmd; struct hci_request req; @@ -7112,12 +7134,13 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, flags = __le32_to_cpu(cp->flags); timeout = __le16_to_cpu(cp->timeout); + duration = __le16_to_cpu(cp->duration); - /* The current implementation only supports adding one instance and only - * a subset of the specified flags. + /* The current implementation only supports a subset of the specified + * flags. */ supported_flags = get_supported_adv_flags(hdev); - if (cp->instance != 0x01 || (flags & ~supported_flags)) + if (flags & ~supported_flags) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); @@ -7145,36 +7168,51 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, goto unlock; } - hdev->adv_instance.flags = flags; - hdev->adv_instance.adv_data_len = cp->adv_data_len; - hdev->adv_instance.scan_rsp_len = cp->scan_rsp_len; - - if (cp->adv_data_len) - memcpy(hdev->adv_instance.adv_data, cp->data, cp->adv_data_len); - - if (cp->scan_rsp_len) - memcpy(hdev->adv_instance.scan_rsp_data, - cp->data + cp->adv_data_len, cp->scan_rsp_len); + err = hci_add_adv_instance(hdev, cp->instance, flags, + cp->adv_data_len, cp->data, + cp->scan_rsp_len, + cp->data + cp->adv_data_len, + timeout, duration); + if (err < 0) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_FAILED); + goto unlock; + } - if (hdev->adv_instance_timeout) - cancel_delayed_work(&hdev->adv_instance_expire); + /* Only trigger an advertising added event if a new instance was + * actually added. + */ + if (hdev->adv_instance_cnt > prev_instance_cnt) + advertising_added(sk, hdev, cp->instance); - hdev->adv_instance_timeout = timeout; + hci_dev_set_flag(hdev, HCI_ADVERTISING_INSTANCE); - if (timeout) - queue_delayed_work(hdev->workqueue, - &hdev->adv_instance_expire, - msecs_to_jiffies(timeout * 1000)); + if (hdev->cur_adv_instance == cp->instance) { + /* If the currently advertised instance is being changed then + * cancel the current advertising and schedule the next + * instance. If there is only one instance then the overridden + * advertising data will be visible right away. + */ + cancel_adv_timeout(hdev); - if (!hci_dev_test_and_set_flag(hdev, HCI_ADVERTISING_INSTANCE)) - advertising_added(sk, hdev, 1); + next_instance = hci_get_next_instance(hdev, cp->instance); + if (next_instance) + schedule_instance = next_instance->instance; + } else if (!hdev->adv_instance_timeout) { + /* Immediately advertise the new instance if no other + * instance is currently being advertised. + */ + schedule_instance = cp->instance; + } - /* If the HCI_ADVERTISING flag is set or the device isn't powered then - * we have no HCI communication to make. Simply return. + /* If the HCI_ADVERTISING flag is set or the device isn't powered or + * there is no instance to be advertised then we have no HCI + * communication to make. Simply return. */ if (!hdev_is_powered(hdev) || - hci_dev_test_flag(hdev, HCI_ADVERTISING)) { - rp.instance = 0x01; + hci_dev_test_flag(hdev, HCI_ADVERTISING) || + !schedule_instance) { + rp.instance = cp->instance; err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); goto unlock; @@ -7192,11 +7230,11 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); - update_adv_data(&req); - update_scan_rsp_data(&req); - enable_advertising(&req); + err = schedule_adv_instance(&req, schedule_instance, true); + + if (!err) + err = hci_req_run(&req, add_advertising_complete); - err = hci_req_run(&req, add_advertising_complete); if (err < 0) mgmt_pending_remove(cmd); -- cgit v1.2.3 From d4c5af8f71c8104504a83f7c71911550ebe43ac3 Mon Sep 17 00:00:00 2001 From: Florian Grandel Date: Thu, 18 Jun 2015 03:16:52 +0200 Subject: Bluetooth: hci_core: remove obsolete adv_instance Now that the obsolete adv_instance is no longer being referenced anywhere in the code it can be removed without breaking the build. Signed-off-by: Florian Grandel Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 6 ------ net/bluetooth/hci_core.c | 1 - 2 files changed, 7 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4f58a0e6e0d3..a6cec6d21aff 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -379,7 +379,6 @@ struct hci_dev { __u8 scan_rsp_data[HCI_MAX_AD_LENGTH]; __u8 scan_rsp_data_len; - struct adv_info adv_instance; struct list_head adv_instances; unsigned int adv_instance_cnt; __u8 cur_adv_instance; @@ -584,11 +583,6 @@ static inline void hci_discovery_filter_clear(struct hci_dev *hdev) hdev->discovery.scan_duration = 0; } -static inline void adv_info_init(struct hci_dev *hdev) -{ - memset(&hdev->adv_instance, 0, sizeof(struct adv_info)); -} - bool hci_discovery_active(struct hci_dev *hdev); void hci_discovery_set_state(struct hci_dev *hdev, int state); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index e50f7c3c67f8..86ed44e39649 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3224,7 +3224,6 @@ struct hci_dev *hci_alloc_dev(void) hci_init_sysfs(hdev); discovery_init(hdev); - adv_info_init(hdev); return hdev; } -- cgit v1.2.3 From db25be6657a56ba2d68aae1f90d796f527f65689 Mon Sep 17 00:00:00 2001 From: Florian Grandel Date: Thu, 18 Jun 2015 03:16:53 +0200 Subject: Bluetooth: hci_core: increase max adv inst Now that all preconditions are present for actual multi-advertising, the number of allowed advertising instances can be larger than one. This patch increases the number of allowed advertising instances to 5. Signed-off-by: Florian Grandel Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a6cec6d21aff..3bd618d3e55d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -169,7 +169,7 @@ struct adv_info { __u8 scan_rsp_data[HCI_MAX_AD_LENGTH]; }; -#define HCI_MAX_ADV_INSTANCES 1 +#define HCI_MAX_ADV_INSTANCES 5 #define HCI_DEFAULT_ADV_DURATION 2 #define HCI_MAX_SHORT_NAME_LENGTH 10 -- cgit v1.2.3 From 230ac490f7fba2aea52914c69d14b15dd515e49c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 16 Jun 2015 14:07:03 +0200 Subject: netfilter: bridge: split ipv6 code into separated file Resolve compilation breakage when CONFIG_IPV6 is not set by moving the IPv6 code into a separated br_netfilter_ipv6.c file. Fixes: efb6de9b4ba0 ("netfilter: bridge: forward IPv6 fragmented packets") Reported-by: kbuild test robot Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/br_netfilter.h | 60 +++++++++ net/bridge/Makefile | 1 + net/bridge/br_netfilter_hooks.c | 248 ++--------------------------------- net/bridge/br_netfilter_ipv6.c | 245 ++++++++++++++++++++++++++++++++++ 4 files changed, 315 insertions(+), 239 deletions(-) create mode 100644 net/bridge/br_netfilter_ipv6.c (limited to 'include') diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index 2aa6048a55c1..bab824bde92c 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -1,6 +1,66 @@ #ifndef _BR_NETFILTER_H_ #define _BR_NETFILTER_H_ +#include "../../../net/bridge/br_private.h" + +static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) +{ + skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC); + + if (likely(skb->nf_bridge)) + atomic_set(&(skb->nf_bridge->use), 1); + + return skb->nf_bridge; +} + +void nf_bridge_update_protocol(struct sk_buff *skb); + +static inline struct nf_bridge_info * +nf_bridge_info_get(const struct sk_buff *skb) +{ + return skb->nf_bridge; +} + +unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb); + +static inline void nf_bridge_push_encap_header(struct sk_buff *skb) +{ + unsigned int len = nf_bridge_encap_header_len(skb); + + skb_push(skb, len); + skb->network_header -= len; +} + +int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb); + +static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) +{ + struct net_bridge_port *port; + + port = br_port_get_rcu(dev); + return port ? &port->br->fake_rtable : NULL; +} + +struct net_device *setup_pre_routing(struct sk_buff *skb); void br_netfilter_enable(void); +#if IS_ENABLED(CONFIG_IPV6) +int br_validate_ipv6(struct sk_buff *skb); +unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state); +#else +static inline int br_validate_ipv6(struct sk_buff *skb) +{ + return -1; +} + +static inline unsigned int +br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return NF_DROP; +} +#endif + #endif /* _BR_NETFILTER_H_ */ diff --git a/net/bridge/Makefile b/net/bridge/Makefile index c52577ac718e..a1cda5d4718d 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -13,6 +13,7 @@ bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o bridge-$(subst m,y,$(CONFIG_BRIDGE_NETFILTER)) += br_nf_core.o br_netfilter-y := br_netfilter_hooks.o +br_netfilter-$(subst m,y,$(CONFIG_IPV6)) += br_netfilter_ipv6.o obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index e4e5f2f29173..d89f4fac0bc5 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -123,11 +123,6 @@ struct brnf_frag_data { static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); #endif -static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb) -{ - return skb->nf_bridge; -} - static void nf_bridge_info_free(struct sk_buff *skb) { if (skb->nf_bridge) { @@ -136,14 +131,6 @@ static void nf_bridge_info_free(struct sk_buff *skb) } } -static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) -{ - struct net_bridge_port *port; - - port = br_port_get_rcu(dev); - return port ? &port->br->fake_rtable : NULL; -} - static inline struct net_device *bridge_parent(const struct net_device *dev) { struct net_bridge_port *port; @@ -152,15 +139,6 @@ static inline struct net_device *bridge_parent(const struct net_device *dev) return port ? port->br->dev : NULL; } -static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) -{ - skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC); - if (likely(skb->nf_bridge)) - atomic_set(&(skb->nf_bridge->use), 1); - - return skb->nf_bridge; -} - static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; @@ -178,7 +156,7 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) return nf_bridge; } -static unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) +unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) { switch (skb->protocol) { case __cpu_to_be16(ETH_P_8021Q): @@ -190,14 +168,6 @@ static unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) } } -static inline void nf_bridge_push_encap_header(struct sk_buff *skb) -{ - unsigned int len = nf_bridge_encap_header_len(skb); - - skb_push(skb, len); - skb->network_header -= len; -} - static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) { unsigned int len = nf_bridge_encap_header_len(skb); @@ -267,112 +237,7 @@ drop: return -1; } -/* We only check the length. A bridge shouldn't do any hop-by-hop stuff - * anyway - */ -static int check_hbh_len(struct sk_buff *skb) -{ - unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1); - u32 pkt_len; - const unsigned char *nh = skb_network_header(skb); - int off = raw - nh; - int len = (raw[1] + 1) << 3; - - if ((raw + len) - skb->data > skb_headlen(skb)) - goto bad; - - off += 2; - len -= 2; - - while (len > 0) { - int optlen = nh[off + 1] + 2; - - switch (nh[off]) { - case IPV6_TLV_PAD1: - optlen = 1; - break; - - case IPV6_TLV_PADN: - break; - - case IPV6_TLV_JUMBO: - if (nh[off + 1] != 4 || (off & 3) != 2) - goto bad; - pkt_len = ntohl(*(__be32 *)(nh + off + 2)); - if (pkt_len <= IPV6_MAXPLEN || - ipv6_hdr(skb)->payload_len) - goto bad; - if (pkt_len > skb->len - sizeof(struct ipv6hdr)) - goto bad; - if (pskb_trim_rcsum(skb, - pkt_len + sizeof(struct ipv6hdr))) - goto bad; - nh = skb_network_header(skb); - break; - default: - if (optlen > len) - goto bad; - break; - } - off += optlen; - len -= optlen; - } - if (len == 0) - return 0; -bad: - return -1; -} - -/* Equivalent to br_validate_ipv4 for IPv6 */ -static int br_validate_ipv6(struct sk_buff *skb) -{ - const struct ipv6hdr *hdr; - struct net_device *dev = skb->dev; - struct inet6_dev *idev = in6_dev_get(skb->dev); - u32 pkt_len; - u8 ip6h_len = sizeof(struct ipv6hdr); - - if (!pskb_may_pull(skb, ip6h_len)) - goto inhdr_error; - - if (skb->len < ip6h_len) - goto drop; - - hdr = ipv6_hdr(skb); - - if (hdr->version != 6) - goto inhdr_error; - - pkt_len = ntohs(hdr->payload_len); - - if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { - if (pkt_len + ip6h_len > skb->len) { - IP6_INC_STATS_BH(dev_net(dev), idev, - IPSTATS_MIB_INTRUNCATEDPKTS); - goto drop; - } - if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { - IP6_INC_STATS_BH(dev_net(dev), idev, - IPSTATS_MIB_INDISCARDS); - goto drop; - } - } - if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) - goto drop; - - memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); - /* No IP options in IPv6 header; however it should be - * checked if some next headers need special treatment - */ - return 0; - -inhdr_error: - IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS); -drop: - return -1; -} - -static void nf_bridge_update_protocol(struct sk_buff *skb) +void nf_bridge_update_protocol(struct sk_buff *skb) { switch (skb->nf_bridge->orig_proto) { case BRNF_PROTO_8021Q: @@ -391,7 +256,7 @@ static void nf_bridge_update_protocol(struct sk_buff *skb) * don't, we use the neighbour framework to find out. In both cases, we make * sure that br_handle_frame_finish() is called afterwards. */ -static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) +int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) { struct neighbour *neigh; struct dst_entry *dst; @@ -431,77 +296,11 @@ free_skb: return 0; } -static bool daddr_was_changed(const struct sk_buff *skb, - const struct nf_bridge_info *nf_bridge) +static inline bool +br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, + const struct nf_bridge_info *nf_bridge) { - switch (skb->protocol) { - case htons(ETH_P_IP): - return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; - case htons(ETH_P_IPV6): - return memcmp(&nf_bridge->ipv6_daddr, &ipv6_hdr(skb)->daddr, - sizeof(ipv6_hdr(skb)->daddr)) != 0; - default: - return false; - } -} - -/* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables - * PREROUTING and continue the bridge PRE_ROUTING hook. See comment - * for br_nf_pre_routing_finish(), same logic is used here but - * equivalent IPv6 function ip6_route_input() called indirectly. - */ -static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) -{ - struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - struct rtable *rt; - struct net_device *dev = skb->dev; - const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); - - nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; - - if (nf_bridge->pkt_otherhost) { - skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->pkt_otherhost = false; - } - nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; - if (daddr_was_changed(skb, nf_bridge)) { - skb_dst_drop(skb); - v6ops->route_input(skb); - - if (skb_dst(skb)->error) { - kfree_skb(skb); - return 0; - } - - if (skb_dst(skb)->dev == dev) { - skb->dev = nf_bridge->physindev; - nf_bridge_update_protocol(skb); - nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, - sk, skb, skb->dev, NULL, - br_nf_pre_routing_finish_bridge, - 1); - return 0; - } - ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); - skb->pkt_type = PACKET_HOST; - } else { - rt = bridge_parent_rtable(nf_bridge->physindev); - if (!rt) { - kfree_skb(skb); - return 0; - } - skb_dst_set_noref(skb, &rt->dst); - } - - skb->dev = nf_bridge->physindev; - nf_bridge_update_protocol(skb); - nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, - skb->dev, NULL, - br_handle_frame_finish, 1); - - return 0; + return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; } /* This requires some explaining. If DNAT has taken place, @@ -558,7 +357,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) nf_bridge->pkt_otherhost = false; } nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; - if (daddr_was_changed(skb, nf_bridge)) { + if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -636,7 +435,7 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct } /* Some common code for IPv4/IPv6 */ -static struct net_device *setup_pre_routing(struct sk_buff *skb) +struct net_device *setup_pre_routing(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); @@ -659,35 +458,6 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb) return skb->dev; } -/* Replicate the checks that IPv6 does on packet reception and pass the packet - * to ip6tables. - */ -static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nf_bridge_info *nf_bridge; - - if (br_validate_ipv6(skb)) - return NF_DROP; - - nf_bridge_put(skb->nf_bridge); - if (!nf_bridge_alloc(skb)) - return NF_DROP; - if (!setup_pre_routing(skb)) - return NF_DROP; - - nf_bridge = nf_bridge_info_get(skb); - nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; - - skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb, - skb->dev, NULL, - br_nf_pre_routing_finish_ipv6); - - return NF_STOLEN; -} - /* Direct IPv6 traffic to br_nf_pre_routing_ipv6. * Replicate the checks that IPv4 does on packet reception. * Set skb->dev to the bridge device (i.e. parent of the diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c new file mode 100644 index 000000000000..6d12d2675c80 --- /dev/null +++ b/net/bridge/br_netfilter_ipv6.c @@ -0,0 +1,245 @@ +/* + * Handle firewalling + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek + * Bart De Schuymer + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Lennert dedicates this file to Kerstin Wurdinger. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include "br_private.h" +#ifdef CONFIG_SYSCTL +#include +#endif + +/* We only check the length. A bridge shouldn't do any hop-by-hop stuff + * anyway + */ +static int br_nf_check_hbh_len(struct sk_buff *skb) +{ + unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1); + u32 pkt_len; + const unsigned char *nh = skb_network_header(skb); + int off = raw - nh; + int len = (raw[1] + 1) << 3; + + if ((raw + len) - skb->data > skb_headlen(skb)) + goto bad; + + off += 2; + len -= 2; + + while (len > 0) { + int optlen = nh[off + 1] + 2; + + switch (nh[off]) { + case IPV6_TLV_PAD1: + optlen = 1; + break; + + case IPV6_TLV_PADN: + break; + + case IPV6_TLV_JUMBO: + if (nh[off + 1] != 4 || (off & 3) != 2) + goto bad; + pkt_len = ntohl(*(__be32 *)(nh + off + 2)); + if (pkt_len <= IPV6_MAXPLEN || + ipv6_hdr(skb)->payload_len) + goto bad; + if (pkt_len > skb->len - sizeof(struct ipv6hdr)) + goto bad; + if (pskb_trim_rcsum(skb, + pkt_len + sizeof(struct ipv6hdr))) + goto bad; + nh = skb_network_header(skb); + break; + default: + if (optlen > len) + goto bad; + break; + } + off += optlen; + len -= optlen; + } + if (len == 0) + return 0; +bad: + return -1; +} + +int br_validate_ipv6(struct sk_buff *skb) +{ + const struct ipv6hdr *hdr; + struct net_device *dev = skb->dev; + struct inet6_dev *idev = in6_dev_get(skb->dev); + u32 pkt_len; + u8 ip6h_len = sizeof(struct ipv6hdr); + + if (!pskb_may_pull(skb, ip6h_len)) + goto inhdr_error; + + if (skb->len < ip6h_len) + goto drop; + + hdr = ipv6_hdr(skb); + + if (hdr->version != 6) + goto inhdr_error; + + pkt_len = ntohs(hdr->payload_len); + + if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { + if (pkt_len + ip6h_len > skb->len) { + IP6_INC_STATS_BH(dev_net(dev), idev, + IPSTATS_MIB_INTRUNCATEDPKTS); + goto drop; + } + if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { + IP6_INC_STATS_BH(dev_net(dev), idev, + IPSTATS_MIB_INDISCARDS); + goto drop; + } + } + if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb)) + goto drop; + + memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); + /* No IP options in IPv6 header; however it should be + * checked if some next headers need special treatment + */ + return 0; + +inhdr_error: + IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS); +drop: + return -1; +} + +static inline bool +br_nf_ipv6_daddr_was_changed(const struct sk_buff *skb, + const struct nf_bridge_info *nf_bridge) +{ + return memcmp(&nf_bridge->ipv6_daddr, &ipv6_hdr(skb)->daddr, + sizeof(ipv6_hdr(skb)->daddr)) != 0; +} + +/* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables + * PREROUTING and continue the bridge PRE_ROUTING hook. See comment + * for br_nf_pre_routing_finish(), same logic is used here but + * equivalent IPv6 function ip6_route_input() called indirectly. + */ +static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + struct rtable *rt; + struct net_device *dev = skb->dev; + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + + nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; + + if (nf_bridge->pkt_otherhost) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->pkt_otherhost = false; + } + nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; + if (br_nf_ipv6_daddr_was_changed(skb, nf_bridge)) { + skb_dst_drop(skb); + v6ops->route_input(skb); + + if (skb_dst(skb)->error) { + kfree_skb(skb); + return 0; + } + + if (skb_dst(skb)->dev == dev) { + skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, + sk, skb, skb->dev, NULL, + br_nf_pre_routing_finish_bridge, + 1); + return 0; + } + ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); + skb->pkt_type = PACKET_HOST; + } else { + rt = bridge_parent_rtable(nf_bridge->physindev); + if (!rt) { + kfree_skb(skb); + return 0; + } + skb_dst_set_noref(skb, &rt->dst); + } + + skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, + skb->dev, NULL, + br_handle_frame_finish, 1); + + return 0; +} + +/* Replicate the checks that IPv6 does on packet reception and pass the packet + * to ip6tables. + */ +unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_bridge_info *nf_bridge; + + if (br_validate_ipv6(skb)) + return NF_DROP; + + nf_bridge_put(skb->nf_bridge); + if (!nf_bridge_alloc(skb)) + return NF_DROP; + if (!setup_pre_routing(skb)) + return NF_DROP; + + nf_bridge = nf_bridge_info_get(skb); + nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; + + skb->protocol = htons(ETH_P_IPV6); + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb, + skb->dev, NULL, + br_nf_pre_routing_finish_ipv6); + + return NF_STOLEN; +} -- cgit v1.2.3 From 04c52dec1473c5dff9d07cd39a68c9b23def6c42 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Jun 2015 10:28:25 -0500 Subject: net: include missing headers in net/net_namespace.h Include linux/idr.h and linux/skbuff.h since they are required by objects that are declared in the net structure. struct net { ... struct idr netns_ids; ... struct sk_buff_head wext_nlevents; ... Signed-off-by: Pablo Neira Ayuso Signed-off-by: Eric W. Biederman --- include/net/net_namespace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 72eb23723294..e951453e0a23 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -28,6 +28,8 @@ #include #include #include +#include +#include struct user_namespace; struct proc_dir_entry; -- cgit v1.2.3 From 10c04a8e715cca824f96bcbf4af07f5a40985357 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Jun 2015 10:28:26 -0500 Subject: netfilter: use forward declaration instead of including linux/proc_fs.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to pull the full definitions in that file, a simple forward declaration is enough. Moreover, include linux/procfs.h from nf_synproxy_core, otherwise this hits a compilation error due to missing declarations, ie. net/netfilter/nf_synproxy_core.c: In function ‘synproxy_proc_init’: net/netfilter/nf_synproxy_core.c:326:2: error: implicit declaration of function ‘proc_create’ [-Werror=implicit-function-declaration] if (!proc_create("synproxy", S_IRUGO, net->proc_net_stat, ^ Signed-off-by: Pablo Neira Ayuso Signed-off-by: Eric W. Biederman --- include/net/netns/netfilter.h | 2 +- net/netfilter/nf_synproxy_core.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 88740024ccf3..cf25b5e35f3c 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -1,9 +1,9 @@ #ifndef __NETNS_NETFILTER_H #define __NETNS_NETFILTER_H -#include #include +struct proc_dir_entry; struct nf_logger; struct netns_nf { diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 52e20c9a46a5..789feeae6c44 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From a263653ed798216c0069922d7b5237ca49436007 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Jun 2015 10:28:27 -0500 Subject: netfilter: don't pull include/linux/netfilter.h from netns headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This pulls the full hook netfilter definitions from all those that include net_namespace.h. Instead let's just include the bare minimum required in the new linux/netfilter_defs.h file, and use it from the netfilter netns header files. I also needed to include in.h and in6.h from linux/netfilter.h otherwise we hit this compilation error: In file included from include/linux/netfilter_defs.h:4:0, from include/net/netns/netfilter.h:4, from include/net/net_namespace.h:22, from include/linux/netdevice.h:43, from net/netfilter/nfnetlink_queue_core.c:23: include/uapi/linux/netfilter.h:76:17: error: field ‘in’ has incomplete type struct in_addr in; And also explicit include linux/netfilter.h in several spots. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Eric W. Biederman --- include/linux/netfilter.h | 6 ++---- include/linux/netfilter_defs.h | 9 +++++++++ include/net/netns/netfilter.h | 2 +- include/net/netns/x_tables.h | 2 +- include/uapi/linux/netfilter.h | 3 ++- net/ipv6/output_core.c | 1 + 6 files changed, 16 insertions(+), 7 deletions(-) create mode 100644 include/linux/netfilter_defs.h (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index f5ff5d156da8..00050dfd9f23 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -10,7 +10,8 @@ #include #include #include -#include +#include + #ifdef CONFIG_NETFILTER static inline int NF_DROP_GETERR(int verdict) { @@ -38,9 +39,6 @@ static inline void nf_inet_addr_mask(const union nf_inet_addr *a1, int netfilter_init(void); -/* Largest hook number + 1 */ -#define NF_MAX_HOOKS 8 - struct sk_buff; struct nf_hook_ops; diff --git a/include/linux/netfilter_defs.h b/include/linux/netfilter_defs.h new file mode 100644 index 000000000000..d3a7f8597e82 --- /dev/null +++ b/include/linux/netfilter_defs.h @@ -0,0 +1,9 @@ +#ifndef __LINUX_NETFILTER_CORE_H_ +#define __LINUX_NETFILTER_CORE_H_ + +#include + +/* Largest hook number + 1, see uapi/linux/netfilter_decnet.h */ +#define NF_MAX_HOOKS 8 + +#endif diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index cf25b5e35f3c..532e4ba64f49 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -1,7 +1,7 @@ #ifndef __NETNS_NETFILTER_H #define __NETNS_NETFILTER_H -#include +#include struct proc_dir_entry; struct nf_logger; diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h index 4d6597ad6067..c8a7681efa6a 100644 --- a/include/net/netns/x_tables.h +++ b/include/net/netns/x_tables.h @@ -2,7 +2,7 @@ #define __NETNS_X_TABLES_H #include -#include +#include struct ebt_table; diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index 177027cce6b3..d93f949d1d9a 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -4,7 +4,8 @@ #include #include #include - +#include +#include /* Responses from hook functions. */ #define NF_DROP 0 diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 21678acd4521..928a0fb0b744 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -8,6 +8,7 @@ #include #include #include +#include static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, const struct in6_addr *dst, -- cgit v1.2.3 From dcb8f5c8139ef945cdfd55900fae265c4dbefc02 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 17 Jun 2015 23:58:28 +0200 Subject: netfilter: xtables: fix warnings on 32bit platforms On 32bit archs gcc complains due to cast from void* to u64. Add intermediate casts to long to silence these warnings. include/linux/netfilter/x_tables.h:376:10: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] include/linux/netfilter/x_tables.h:384:15: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] include/linux/netfilter/x_tables.h:391:23: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] include/linux/netfilter/x_tables.h:400:22: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] Fixes: 71ae0dff02d756e ("netfilter: xtables: use percpu rule counters") Reported-by: kbuild test robot Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1c97a2204379..286098a5667f 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -372,7 +372,7 @@ static inline u64 xt_percpu_counter_alloc(void) if (res == NULL) return (u64) -ENOMEM; - return (__force u64) res; + return (u64) (__force unsigned long) res; } return 0; @@ -380,14 +380,14 @@ static inline u64 xt_percpu_counter_alloc(void) static inline void xt_percpu_counter_free(u64 pcnt) { if (nr_cpu_ids > 1) - free_percpu((void __percpu *) pcnt); + free_percpu((void __percpu *) (unsigned long) pcnt); } static inline struct xt_counters * xt_get_this_cpu_counter(struct xt_counters *cnt) { if (nr_cpu_ids > 1) - return this_cpu_ptr((void __percpu *) cnt->pcnt); + return this_cpu_ptr((void __percpu *) (unsigned long) cnt->pcnt); return cnt; } @@ -396,7 +396,7 @@ static inline struct xt_counters * xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) { if (nr_cpu_ids > 1) - return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu); + return per_cpu_ptr((void __percpu *) (unsigned long) cnt->pcnt, cpu); return cnt; } -- cgit v1.2.3 From 638579f00a9b810ae37c7086b0d20634c1e0234e Mon Sep 17 00:00:00 2001 From: Zhaowei Yuan Date: Wed, 17 Jun 2015 17:56:27 +0800 Subject: net: Update out-of-date comment Struct inet_proto no longer exists, so update the comment which is out of date. Signed-off-by: Zhaowei Yuan Signed-off-by: David S. Miller --- include/net/sock.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 3e8258699270..14d539c040d7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -926,7 +926,6 @@ static inline void sk_prot_clear_nulls(struct sock *sk, int size) /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface - * transport -> network interface is defined by struct inet_proto */ struct proto { void (*close)(struct sock *sk, -- cgit v1.2.3 From b42be38b2778eda2237fc759e55e3b698b05b315 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Wed, 17 Jun 2015 17:14:33 +0200 Subject: netlink: add API to retrieve all group memberships This patch adds getsockopt(SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS) to retrieve all groups a socket is a member of. Currently, we have to use getsockname() and look at the nl.nl_groups bitmask. However, this mask is limited to 32 groups. Hence, similar to NETLINK_ADD_MEMBERSHIP and NETLINK_DROP_MEMBERSHIP, this adds a separate sockopt to manager higher groups IDs than 32. This new NETLINK_LIST_MEMBERSHIPS option takes a pointer to __u32 and the size of the array. The array is filled with the full membership-set of the socket, and the required array size is returned in optlen. Hence, user-space can retry with a properly sized array in case it was too small. Signed-off-by: David Herrmann Signed-off-by: David S. Miller --- include/uapi/linux/netlink.h | 17 +++++++++-------- net/netlink/af_netlink.c | 22 ++++++++++++++++++++++ 2 files changed, 31 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 3e34b7d702f8..cf6a65cccbdf 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -101,14 +101,15 @@ struct nlmsgerr { struct nlmsghdr msg; }; -#define NETLINK_ADD_MEMBERSHIP 1 -#define NETLINK_DROP_MEMBERSHIP 2 -#define NETLINK_PKTINFO 3 -#define NETLINK_BROADCAST_ERROR 4 -#define NETLINK_NO_ENOBUFS 5 -#define NETLINK_RX_RING 6 -#define NETLINK_TX_RING 7 -#define NETLINK_LISTEN_ALL_NSID 8 +#define NETLINK_ADD_MEMBERSHIP 1 +#define NETLINK_DROP_MEMBERSHIP 2 +#define NETLINK_PKTINFO 3 +#define NETLINK_BROADCAST_ERROR 4 +#define NETLINK_NO_ENOBUFS 5 +#define NETLINK_RX_RING 6 +#define NETLINK_TX_RING 7 +#define NETLINK_LISTEN_ALL_NSID 8 +#define NETLINK_LIST_MEMBERSHIPS 9 struct nl_pktinfo { __u32 group; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 69d67c300b80..dea925388a5b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2290,6 +2290,28 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, return -EFAULT; err = 0; break; + case NETLINK_LIST_MEMBERSHIPS: { + int pos, idx, shift; + + err = 0; + netlink_table_grab(); + for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) { + if (len - pos < sizeof(u32)) + break; + + idx = pos / sizeof(unsigned long); + shift = (pos % sizeof(unsigned long)) * 8; + if (put_user((u32)(nlk->groups[idx] >> shift), + (u32 __user *)(optval + pos))) { + err = -EFAULT; + break; + } + } + if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen)) + err = -EFAULT; + netlink_table_ungrab(); + break; + } default: err = -ENOPROTOOPT; } -- cgit v1.2.3 From dfe816c5e37272f2f3c1311f0e9934e1b4229261 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Fri, 19 Jun 2015 19:47:53 +0530 Subject: macvtap: Increase limit of macvtap queues Macvtap should be compatible with tuntap for maximum number of queues. commit 'baf71c5c1f80d82e92924050a60b5baaf97e3094 (tuntap: Increase the number of queues in tun.)' removes the limitations and increases number of queues in tuntap. Now, Its safe to increase number of queues in Macvtap as well. This patch also modifies 'macvtap_del_queues' function to avoid extra memory allocation in stack. Changes from v1->v2 : Michael S. Tsirkin, Jason Wang : Better way to use linked list to avoid use of extra memory in stack. Sergei Shtylyov : Specify dependent commit's summary. Signed-off-by: Pankaj Gupta Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 10 ++-------- include/linux/if_macvlan.h | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 483afb19596c..6a64197f5bce 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -263,27 +263,21 @@ out: static void macvtap_del_queues(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); - struct macvtap_queue *q, *tmp, *qlist[MAX_MACVTAP_QUEUES]; - int i, j = 0; + struct macvtap_queue *q, *tmp; ASSERT_RTNL(); list_for_each_entry_safe(q, tmp, &vlan->queue_list, next) { list_del_init(&q->next); - qlist[j++] = q; RCU_INIT_POINTER(q->vlan, NULL); if (q->enabled) vlan->numvtaps--; vlan->numqueues--; + sock_put(&q->sk); } - for (i = 0; i < vlan->numvtaps; i++) - RCU_INIT_POINTER(vlan->taps[i], NULL); BUG_ON(vlan->numvtaps); BUG_ON(vlan->numqueues); /* guarantee that any future macvtap_set_queue will fail */ vlan->numvtaps = MAX_MACVTAP_QUEUES; - - for (--j; j >= 0; j--) - sock_put(&qlist[j]->sk); } static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 6f6929ea8a0c..a4ccc3122f93 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -29,7 +29,7 @@ struct macvtap_queue; * Maximum times a macvtap device can be opened. This can be used to * configure the number of receive queue, e.g. for multiqueue virtio. */ -#define MAX_MACVTAP_QUEUES 16 +#define MAX_MACVTAP_QUEUES 256 #define MACVLAN_MC_FILTER_BITS 8 #define MACVLAN_MC_FILTER_SZ (1 << MACVLAN_MC_FILTER_BITS) -- cgit v1.2.3 From 8405a8fff3f8545c888a872d6e3c0c8eecd4d348 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 19 Jun 2015 14:03:39 -0500 Subject: netfilter: nf_qeueue: Drop queue entries on nf_unregister_hook Add code to nf_unregister_hook to flush the nf_queue when a hook is unregistered. This guarantees that the pointer that the nf_queue code retains into the nf_hook list will remain valid while a packet is queued. I tested what would happen if we do not flush queued packets and was trivially able to obtain the oops below. All that was required was to stop the nf_queue listening process, to delete all of the nf_tables, and to awaken the nf_queue listening process. > BUG: unable to handle kernel paging request at 0000000100000001 > IP: [<0000000100000001>] 0x100000001 > PGD b9c35067 PUD 0 > Oops: 0010 [#1] SMP > Modules linked in: > CPU: 0 PID: 519 Comm: lt-nfqnl_test Not tainted > task: ffff8800b9c8c050 ti: ffff8800ba9d8000 task.ti: ffff8800ba9d8000 > RIP: 0010:[<0000000100000001>] [<0000000100000001>] 0x100000001 > RSP: 0018:ffff8800ba9dba40 EFLAGS: 00010a16 > RAX: ffff8800bab48a00 RBX: ffff8800ba9dba90 RCX: ffff8800ba9dba90 > RDX: ffff8800b9c10128 RSI: ffff8800ba940900 RDI: ffff8800bab48a00 > RBP: ffff8800b9c10128 R08: ffffffff82976660 R09: ffff8800ba9dbb28 > R10: dead000000100100 R11: dead000000200200 R12: ffff8800ba940900 > R13: ffffffff8313fd50 R14: ffff8800b9c95200 R15: 0000000000000000 > FS: 00007fb91fc34700(0000) GS:ffff8800bfa00000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 0000000100000001 CR3: 00000000babfb000 CR4: 00000000000007f0 > Stack: > ffffffff8206ab0f ffffffff82982240 ffff8800bab48a00 ffff8800b9c100a8 > ffff8800b9c10100 0000000000000001 ffff8800ba940900 ffff8800b9c10128 > ffffffff8206bd65 ffff8800bfb0d5e0 ffff8800bab48a00 0000000000014dc0 > Call Trace: > [] ? nf_iterate+0x4f/0xa0 > [] ? nf_reinject+0x125/0x190 > [] ? nfqnl_recv_verdict+0x255/0x360 > [] ? nla_parse+0x80/0xf0 > [] ? nfnetlink_rcv_msg+0x13c/0x240 > [] ? __memcg_kmem_get_cache+0x4c/0x150 > [] ? nfnl_lock+0x20/0x20 > [] ? netlink_rcv_skb+0xa9/0xc0 > [] ? netlink_unicast+0x12f/0x1c0 > [] ? netlink_sendmsg+0x28e/0x650 > [] ? sock_sendmsg+0x44/0x50 > [] ? ___sys_sendmsg+0x2ab/0x2c0 > [] ? __wake_up+0x43/0x70 > [] ? tty_write+0x1c4/0x2a0 > [] ? __sys_sendmsg+0x44/0x80 > [] ? system_call_fastpath+0x12/0x6a > Code: Bad RIP value. > RIP [<0000000100000001>] 0x100000001 > RSP > CR2: 0000000100000001 > ---[ end trace 08eb65d42362793f ]--- Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/netfilter/nf_queue.h | 2 ++ net/netfilter/core.c | 1 + net/netfilter/nf_internals.h | 1 + net/netfilter/nf_queue.c | 17 +++++++++++++++++ net/netfilter/nfnetlink_queue_core.c | 24 +++++++++++++++++++++++- 5 files changed, 44 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index d81d584157e1..e8635854a55b 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -24,6 +24,8 @@ struct nf_queue_entry { struct nf_queue_handler { int (*outfn)(struct nf_queue_entry *entry, unsigned int queuenum); + void (*nf_hook_drop)(struct net *net, + struct nf_hook_ops *ops); }; void nf_register_queue_handler(const struct nf_queue_handler *qh); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 653e32eac08c..a0e54974e2c9 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -118,6 +118,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg) static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); + nf_queue_nf_hook_drop(reg); } EXPORT_SYMBOL(nf_unregister_hook); diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index ea7f36784b3d..399210693c2a 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -19,6 +19,7 @@ unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, /* nf_queue.c */ int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, struct nf_hook_state *state, unsigned int queuenum); +void nf_queue_nf_hook_drop(struct nf_hook_ops *ops); int __init netfilter_queue_init(void); /* nf_log.c */ diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 2e88032cd5ad..cd60d397fe05 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -105,6 +105,23 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); +void nf_queue_nf_hook_drop(struct nf_hook_ops *ops) +{ + const struct nf_queue_handler *qh; + struct net *net; + + rtnl_lock(); + rcu_read_lock(); + qh = rcu_dereference(queue_handler); + if (qh) { + for_each_net(net) { + qh->nf_hook_drop(net, ops); + } + } + rcu_read_unlock(); + rtnl_unlock(); +} + /* * Any packet that leaves via this function must come back * through nf_reinject(). diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index e26a46ef19ba..685cc6a17163 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -850,6 +850,27 @@ static struct notifier_block nfqnl_dev_notifier = { .notifier_call = nfqnl_rcv_dev_event, }; +static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long ops_ptr) +{ + return entry->elem == (struct nf_hook_ops *)ops_ptr; +} + +static void nfqnl_nf_hook_drop(struct net *net, struct nf_hook_ops *hook) +{ + struct nfnl_queue_net *q = nfnl_queue_pernet(net); + int i; + + rcu_read_lock(); + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct nfqnl_instance *inst; + struct hlist_head *head = &q->instance_table[i]; + + hlist_for_each_entry_rcu(inst, head, hlist) + nfqnl_flush(inst, nf_hook_cmp, (unsigned long)hook); + } + rcu_read_unlock(); +} + static int nfqnl_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr) @@ -1057,7 +1078,8 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { }; static const struct nf_queue_handler nfqh = { - .outfn = &nfqnl_enqueue_packet, + .outfn = &nfqnl_enqueue_packet, + .nf_hook_drop = &nfqnl_nf_hook_drop, }; static int -- cgit v1.2.3 From 3e3a78b49508e58f798cf519876bbb9ca0f931af Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Mon, 22 Jun 2015 00:27:16 -0700 Subject: switchdev: rename vlan vid_start to vid_begin Use vid_begin/end to be consistent with BRIDGE_VLAN_INFO_RANGE_BEGIN/END. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 4 ++-- include/net/switchdev.h | 2 +- net/bridge/br_vlan.c | 4 ++-- net/switchdev/switchdev.c | 12 ++++++------ 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index d4ec660bb3b7..7d5d92a10284 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4295,7 +4295,7 @@ static int rocker_port_vlans_add(struct rocker_port *rocker_port, u16 vid; int err; - for (vid = vlan->vid_start; vid <= vlan->vid_end; vid++) { + for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { err = rocker_port_vlan_add(rocker_port, trans, vid, vlan->flags); if (err) @@ -4378,7 +4378,7 @@ static int rocker_port_vlans_del(struct rocker_port *rocker_port, u16 vid; int err; - for (vid = vlan->vid_start; vid <= vlan->vid_end; vid++) { + for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { err = rocker_port_vlan_del(rocker_port, vid, vlan->flags); if (err) return err; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 437f8fe75705..d5671f118bfc 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -57,7 +57,7 @@ struct switchdev_obj { union { struct switchdev_obj_vlan { /* PORT_VLAN */ u16 flags; - u16 vid_start; + u16 vid_begin; u16 vid_end; } vlan; struct switchdev_obj_ipv4_fib { /* IPV4_FIB */ diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 17fc358a5432..574feea6a8cc 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -54,7 +54,7 @@ static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, .id = SWITCHDEV_OBJ_PORT_VLAN, .u.vlan = { .flags = flags, - .vid_start = vid, + .vid_begin = vid, .vid_end = vid, }, }; @@ -132,7 +132,7 @@ static void __vlan_vid_del(struct net_device *dev, struct net_bridge *br, struct switchdev_obj vlan_obj = { .id = SWITCHDEV_OBJ_PORT_VLAN, .u.vlan = { - .vid_start = vid, + .vid_begin = vid, .vid_end = vid, }, }; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index c29f2327f2e6..448d9199cea2 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -510,23 +510,23 @@ static int switchdev_port_br_afspec(struct net_device *dev, vinfo = nla_data(attr); vlan->flags = vinfo->flags; if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { - if (vlan->vid_start) + if (vlan->vid_begin) return -EINVAL; - vlan->vid_start = vinfo->vid; + vlan->vid_begin = vinfo->vid; } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) { - if (!vlan->vid_start) + if (!vlan->vid_begin) return -EINVAL; vlan->vid_end = vinfo->vid; - if (vlan->vid_end <= vlan->vid_start) + if (vlan->vid_end <= vlan->vid_begin) return -EINVAL; err = f(dev, &obj); if (err) return err; memset(vlan, 0, sizeof(*vlan)); } else { - if (vlan->vid_start) + if (vlan->vid_begin) return -EINVAL; - vlan->vid_start = vinfo->vid; + vlan->vid_begin = vinfo->vid; vlan->vid_end = vinfo->vid; err = f(dev, &obj); if (err) -- cgit v1.2.3 From 7d4f8d871ab15bd50a5771382ca2c9355b38d73c Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Mon, 22 Jun 2015 00:27:17 -0700 Subject: switchdev; add VLAN support for port's bridge_getlink One more missing piece of the puzzle. Add vlan dump support to switchdev port's bridge_getlink. iproute2 "bridge vlan show" cmd already knows how to show the vlans installed on the bridge and the device , but (until now) no one implemented the port vlan part of the netlink PF_BRIDGE:RTM_GETLINK msg. Before this patch, "bridge vlan show": $ bridge -c vlan show port vlan ids sw1p1 30-34 << bridge side vlans 57 sw1p1 << device side vlans (missing) sw1p2 57 sw1p2 sw1p3 sw1p4 br0 None (When the port is bridged, the output repeats the vlan list for the vlans on the bridge side of the port and the vlans on the device side of the port. The listing above show no vlans for the device side even though they are installed). After this patch: $ bridge -c vlan show port vlan ids sw1p1 30-34 << bridge side vlan 57 sw1p1 30-34 << device side vlans 57 3840 PVID sw1p2 57 sw1p2 57 3840 PVID sw1p3 3842 PVID sw1p4 3843 PVID br0 None I re-used ndo_dflt_bridge_getlink to add vlan fill call-back func. switchdev support adds an obj dump for VLAN objects, using the same call-back scheme as FDB dump. Support included for both compressed and un-compressed vlan dumps. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +- drivers/net/ethernet/rocker/rocker.c | 25 ++++++ include/linux/rtnetlink.h | 6 +- net/core/rtnetlink.c | 18 +++- net/switchdev/switchdev.c | 123 +++++++++++++++++++++++++- 7 files changed, 172 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index c0f34845cf59..6f642426308c 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5096,7 +5096,7 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return ndo_dflt_bridge_getlink(skb, pid, seq, dev, hsw_mode == PORT_FWD_TYPE_VEPA ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB, - 0, 0, nlflags); + 0, 0, nlflags, filter_mask, NULL); } #ifdef CONFIG_BE2NET_VXLAN diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 52d7d8b8f1f9..48a52b35b614 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8069,7 +8069,7 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, #ifdef HAVE_BRIDGE_FILTER static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, - u32 __always_unused filter_mask, int nlflags) + u32 filter_mask, int nlflags) #else static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, int nlflags) @@ -8095,7 +8095,7 @@ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return 0; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, veb->bridge_mode, - nlflags); + nlflags, 0, 0, filter_mask, NULL); } #endif /* HAVE_BRIDGE_ATTRIBS */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 3bf2f3cfd9f6..9aa6104e34ea 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8095,7 +8095,8 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return 0; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, - adapter->bridge_mode, 0, 0, nlflags); + adapter->bridge_mode, 0, 0, nlflags, + filter_mask, NULL); } static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 7d5d92a10284..83e913b60d9c 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4456,6 +4456,28 @@ static int rocker_port_fdb_dump(const struct rocker_port *rocker_port, return err; } +static int rocker_port_vlan_dump(const struct rocker_port *rocker_port, + struct switchdev_obj *obj) +{ + struct switchdev_obj_vlan *vlan = &obj->u.vlan; + u16 vid; + int err = 0; + + for (vid = 1; vid < VLAN_N_VID; vid++) { + if (!test_bit(vid, rocker_port->vlan_bitmap)) + continue; + vlan->flags = 0; + if (rocker_vlan_id_is_internal(htons(vid))) + vlan->flags |= BRIDGE_VLAN_INFO_PVID; + vlan->vid_begin = vlan->vid_end = vid; + err = obj->cb(rocker_port->dev, obj); + if (err) + break; + } + + return err; +} + static int rocker_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj) { @@ -4466,6 +4488,9 @@ static int rocker_port_obj_dump(struct net_device *dev, case SWITCHDEV_OBJ_PORT_FDB: err = rocker_port_fdb_dump(rocker_port, obj); break; + case SWITCHDEV_OBJ_PORT_VLAN: + err = rocker_port_vlan_dump(rocker_port, obj); + break; default: err = -EOPNOTSUPP; break; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index a2324fb45cf4..39adaa9529eb 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -114,5 +114,9 @@ extern int ndo_dflt_fdb_del(struct ndmsg *ndm, extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, - u32 flags, u32 mask, int nlflags); + u32 flags, u32 mask, int nlflags, + u32 filter_mask, + int (*vlan_fill)(struct sk_buff *skb, + struct net_device *dev, + u32 filter_mask)); #endif /* __LINUX_RTNETLINK_H */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2d102ce1474f..01ced4a889e0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2908,7 +2908,11 @@ static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask, int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, - u32 flags, u32 mask, int nlflags) + u32 flags, u32 mask, int nlflags, + u32 filter_mask, + int (*vlan_fill)(struct sk_buff *skb, + struct net_device *dev, + u32 filter_mask)) { struct nlmsghdr *nlh; struct ifinfomsg *ifm; @@ -2916,6 +2920,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct nlattr *protinfo; u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; struct net_device *br_dev = netdev_master_upper_dev_get(dev); + int err = 0; nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags); if (nlh == NULL) @@ -2956,6 +2961,13 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, goto nla_put_failure; } } + if (vlan_fill) { + err = vlan_fill(skb, dev, filter_mask); + if (err) { + nla_nest_cancel(skb, br_afspec); + goto nla_put_failure; + } + } nla_nest_end(skb, br_afspec); protinfo = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); @@ -2989,9 +3001,9 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return 0; nla_put_failure: nlmsg_cancel(skb, nlh); - return -EMSGSIZE; + return err ? err : -EMSGSIZE; } -EXPORT_SYMBOL(ndo_dflt_bridge_getlink); +EXPORT_SYMBOL_GPL(ndo_dflt_bridge_getlink); static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) { diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 448d9199cea2..3e4b35a0c8cb 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -391,6 +391,126 @@ int call_switchdev_notifiers(unsigned long val, struct net_device *dev, } EXPORT_SYMBOL_GPL(call_switchdev_notifiers); +struct switchdev_vlan_dump { + struct switchdev_obj obj; + struct sk_buff *skb; + u32 filter_mask; + u16 flags; + u16 begin; + u16 end; +}; + +static int switchdev_port_vlan_dump_put(struct net_device *dev, + struct switchdev_vlan_dump *dump) +{ + struct bridge_vlan_info vinfo; + + vinfo.flags = dump->flags; + + if (dump->begin == 0 && dump->end == 0) { + return 0; + } else if (dump->begin == dump->end) { + vinfo.vid = dump->begin; + if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + return -EMSGSIZE; + } else { + vinfo.vid = dump->begin; + vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN; + if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + return -EMSGSIZE; + vinfo.vid = dump->end; + vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN; + vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END; + if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + return -EMSGSIZE; + } + + return 0; +} + +static int switchdev_port_vlan_dump_cb(struct net_device *dev, + struct switchdev_obj *obj) +{ + struct switchdev_vlan_dump *dump = + container_of(obj, struct switchdev_vlan_dump, obj); + struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan; + int err = 0; + + if (vlan->vid_begin > vlan->vid_end) + return -EINVAL; + + if (dump->filter_mask & RTEXT_FILTER_BRVLAN) { + dump->flags = vlan->flags; + for (dump->begin = dump->end = vlan->vid_begin; + dump->begin <= vlan->vid_end; + dump->begin++, dump->end++) { + err = switchdev_port_vlan_dump_put(dev, dump); + if (err) + return err; + } + } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) { + if (dump->begin > vlan->vid_begin && + dump->begin >= vlan->vid_end) { + if ((dump->begin - 1) == vlan->vid_end && + dump->flags == vlan->flags) { + /* prepend */ + dump->begin = vlan->vid_begin; + } else { + err = switchdev_port_vlan_dump_put(dev, dump); + dump->flags = vlan->flags; + dump->begin = vlan->vid_begin; + dump->end = vlan->vid_end; + } + } else if (dump->end <= vlan->vid_begin && + dump->end < vlan->vid_end) { + if ((dump->end + 1) == vlan->vid_begin && + dump->flags == vlan->flags) { + /* append */ + dump->end = vlan->vid_end; + } else { + err = switchdev_port_vlan_dump_put(dev, dump); + dump->flags = vlan->flags; + dump->begin = vlan->vid_begin; + dump->end = vlan->vid_end; + } + } else { + err = -EINVAL; + } + } + + return err; +} + +static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev, + u32 filter_mask) +{ + struct switchdev_vlan_dump dump = { + .obj = { + .id = SWITCHDEV_OBJ_PORT_VLAN, + .cb = switchdev_port_vlan_dump_cb, + }, + .skb = skb, + .filter_mask = filter_mask, + }; + int err = 0; + + if ((filter_mask & RTEXT_FILTER_BRVLAN) || + (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { + err = switchdev_port_obj_dump(dev, &dump.obj); + if (err) + goto err_out; + if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) + /* last one */ + err = switchdev_port_vlan_dump_put(dev, &dump); + } + +err_out: + return err == -EOPNOTSUPP ? 0 : err; +} + /** * switchdev_port_bridge_getlink - Get bridge port attributes * @@ -415,7 +535,8 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return err; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, - attr.u.brport_flags, mask, nlflags); + attr.u.brport_flags, mask, nlflags, + filter_mask, switchdev_port_vlan_fill); } EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink); -- cgit v1.2.3 From 42bcce87d763b4d22dc6d3a0c0b60c6b49820de8 Mon Sep 17 00:00:00 2001 From: Anish Bhatt Date: Mon, 22 Jun 2015 17:44:35 -0700 Subject: dcb : Fix incorrect documentation for struct dcb_app While IEEE and CEE use the same structure to store apps, the selector and priority fields for both are different. Only the priority field is explained, add documentation explaining how the selector field differs for both. cgdcbxd code shows an example of how selector fields differ. Signed-off-by: Anish Bhatt Signed-off-by: David S. Miller --- include/uapi/linux/dcbnl.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h index 6497d7933d5b..3ea470f35e40 100644 --- a/include/uapi/linux/dcbnl.h +++ b/include/uapi/linux/dcbnl.h @@ -207,8 +207,7 @@ struct cee_pfc { #define IEEE_8021QAZ_APP_SEL_ANY 4 /* This structure contains the IEEE 802.1Qaz APP managed object. This - * object is also used for the CEE std as well. There is no difference - * between the objects. + * object is also used for the CEE std as well. * * @selector: protocol identifier type * @protocol: protocol of type indicated @@ -216,13 +215,18 @@ struct cee_pfc { * 8-bit 802.1p user priority bitmap for CEE * * ---- - * Selector field values + * Selector field values for IEEE 802.1Qaz * 0 Reserved * 1 Ethertype * 2 Well known port number over TCP or SCTP * 3 Well known port number over UDP or DCCP * 4 Well known port number over TCP, SCTP, UDP, or DCCP * 5-7 Reserved + * + * Selector field values for CEE + * 0 Ethertype + * 1 Well known port number over TCP or UDP + * 2-3 Reserved */ struct dcb_app { __u8 selector; -- cgit v1.2.3 From 8a3d03166f19329b46c6f9e900f93a89f446077b Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Tue, 23 Jun 2015 13:45:36 -0400 Subject: net: track link-status of ipv4 nexthops Add a fib flag called RTNH_F_LINKDOWN to any ipv4 nexthops that are reachable via an interface where carrier is off. No action is taken, but additional flags are passed to userspace to indicate carrier status. This also includes a cleanup to fib_disable_ip to more clearly indicate what event made the function call to replace the more cryptic force option previously used. v2: Split out kernel functionality into 2 patches, this patch simply sets and clears new nexthop flag RTNH_F_LINKDOWN. v3: Cleanups suggested by Alex as well as a bug noticed in fib_sync_down_dev and fib_sync_up when multipath was not enabled. v5: Whitespace and variable declaration fixups suggested by Dave. v6: Style fixups noticed by Dave; ran checkpatch to be sure I got them all. Signed-off-by: Andy Gospodarek Signed-off-by: Dinesh Dutt Acked-by: Scott Feldman Signed-off-by: David S. Miller --- include/net/ip_fib.h | 4 +-- include/uapi/linux/rtnetlink.h | 3 +++ net/ipv4/fib_frontend.c | 23 ++++++++++------ net/ipv4/fib_semantics.c | 60 +++++++++++++++++++++++++++++++++--------- 4 files changed, 67 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 54271ed0ed45..f73d27c5575a 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -305,9 +305,9 @@ void fib_flush_external(struct net *net); /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); -int fib_sync_down_dev(struct net_device *dev, int force); +int fib_sync_down_dev(struct net_device *dev, unsigned long event); int fib_sync_down_addr(struct net *net, __be32 local); -int fib_sync_up(struct net_device *dev); +int fib_sync_up(struct net_device *dev, unsigned int nh_flags); void fib_select_multipath(struct fib_result *res); /* Exported by fib_trie.c */ diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 17fb02f488da..fdd8f07f1d34 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -338,6 +338,9 @@ struct rtnexthop { #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ #define RTNH_F_ONLINK 4 /* Gateway is forced on link */ #define RTNH_F_OFFLOAD 8 /* offloaded route */ +#define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */ + +#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN) /* Macros to handle hexthops */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 872494e6e6eb..534eb1485045 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1063,9 +1063,9 @@ static void nl_fib_lookup_exit(struct net *net) net->ipv4.fibnl = NULL; } -static void fib_disable_ip(struct net_device *dev, int force) +static void fib_disable_ip(struct net_device *dev, unsigned long event) { - if (fib_sync_down_dev(dev, force)) + if (fib_sync_down_dev(dev, event)) fib_flush(dev_net(dev)); rt_cache_flush(dev_net(dev)); arp_ifdown(dev); @@ -1081,7 +1081,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, case NETDEV_UP: fib_add_ifaddr(ifa); #ifdef CONFIG_IP_ROUTE_MULTIPATH - fib_sync_up(dev); + fib_sync_up(dev, RTNH_F_DEAD); #endif atomic_inc(&net->ipv4.dev_addr_genid); rt_cache_flush(dev_net(dev)); @@ -1093,7 +1093,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, /* Last address was deleted from this interface. * Disable IP. */ - fib_disable_ip(dev, 1); + fib_disable_ip(dev, event); } else { rt_cache_flush(dev_net(dev)); } @@ -1107,9 +1107,10 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct in_device *in_dev; struct net *net = dev_net(dev); + unsigned int flags; if (event == NETDEV_UNREGISTER) { - fib_disable_ip(dev, 2); + fib_disable_ip(dev, event); rt_flush_dev(dev); return NOTIFY_DONE; } @@ -1124,16 +1125,22 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_add_ifaddr(ifa); } endfor_ifa(in_dev); #ifdef CONFIG_IP_ROUTE_MULTIPATH - fib_sync_up(dev); + fib_sync_up(dev, RTNH_F_DEAD); #endif atomic_inc(&net->ipv4.dev_addr_genid); rt_cache_flush(net); break; case NETDEV_DOWN: - fib_disable_ip(dev, 0); + fib_disable_ip(dev, event); break; - case NETDEV_CHANGEMTU: case NETDEV_CHANGE: + flags = dev_get_flags(dev); + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + fib_sync_up(dev, RTNH_F_LINKDOWN); + else + fib_sync_down_dev(dev, event); + /* fall through */ + case NETDEV_CHANGEMTU: rt_cache_flush(net); break; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 28ec3c1823bf..b1b305b1e340 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -266,7 +266,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid != onh->nh_tclassid || #endif - ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) + ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK)) return -1; onh++; } endfor_nexthops(fi); @@ -318,7 +318,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) nfi->fib_type == fi->fib_type && memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX) == 0 && - ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 && + !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) return fi; } @@ -604,6 +604,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, return -ENODEV; if (!(dev->flags & IFF_UP)) return -ENETDOWN; + if (!netif_carrier_ok(dev)) + nh->nh_flags |= RTNH_F_LINKDOWN; nh->nh_dev = dev; dev_hold(dev); nh->nh_scope = RT_SCOPE_LINK; @@ -636,6 +638,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, if (!dev) goto out; dev_hold(dev); + if (!netif_carrier_ok(dev)) + nh->nh_flags |= RTNH_F_LINKDOWN; err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; } else { struct in_device *in_dev; @@ -654,6 +658,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, nh->nh_dev = in_dev->dev; dev_hold(nh->nh_dev); nh->nh_scope = RT_SCOPE_HOST; + if (!netif_carrier_ok(nh->nh_dev)) + nh->nh_flags |= RTNH_F_LINKDOWN; err = 0; } out: @@ -920,11 +926,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (!nh->nh_dev) goto failure; } else { + int linkdown = 0; + change_nexthops(fi) { err = fib_check_nh(cfg, fi, nexthop_nh); if (err != 0) goto failure; + if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN) + linkdown++; } endfor_nexthops(fi) + if (linkdown == fi->fib_nhs) + fi->fib_flags |= RTNH_F_LINKDOWN; } if (fi->fib_prefsrc) { @@ -1103,7 +1115,7 @@ int fib_sync_down_addr(struct net *net, __be32 local) return ret; } -int fib_sync_down_dev(struct net_device *dev, int force) +int fib_sync_down_dev(struct net_device *dev, unsigned long event) { int ret = 0; int scope = RT_SCOPE_NOWHERE; @@ -1112,7 +1124,8 @@ int fib_sync_down_dev(struct net_device *dev, int force) struct hlist_head *head = &fib_info_devhash[hash]; struct fib_nh *nh; - if (force) + if (event == NETDEV_UNREGISTER || + event == NETDEV_DOWN) scope = -1; hlist_for_each_entry(nh, head, nh_hash) { @@ -1129,7 +1142,15 @@ int fib_sync_down_dev(struct net_device *dev, int force) dead++; else if (nexthop_nh->nh_dev == dev && nexthop_nh->nh_scope != scope) { - nexthop_nh->nh_flags |= RTNH_F_DEAD; + switch (event) { + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + nexthop_nh->nh_flags |= RTNH_F_DEAD; + /* fall through */ + case NETDEV_CHANGE: + nexthop_nh->nh_flags |= RTNH_F_LINKDOWN; + break; + } #ifdef CONFIG_IP_ROUTE_MULTIPATH spin_lock_bh(&fib_multipath_lock); fi->fib_power -= nexthop_nh->nh_power; @@ -1139,14 +1160,23 @@ int fib_sync_down_dev(struct net_device *dev, int force) dead++; } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (force > 1 && nexthop_nh->nh_dev == dev) { + if (event == NETDEV_UNREGISTER && + nexthop_nh->nh_dev == dev) { dead = fi->fib_nhs; break; } #endif } endfor_nexthops(fi) if (dead == fi->fib_nhs) { - fi->fib_flags |= RTNH_F_DEAD; + switch (event) { + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + fi->fib_flags |= RTNH_F_DEAD; + /* fall through */ + case NETDEV_CHANGE: + fi->fib_flags |= RTNH_F_LINKDOWN; + break; + } ret++; } } @@ -1210,13 +1240,11 @@ out: return; } -#ifdef CONFIG_IP_ROUTE_MULTIPATH - /* * Dead device goes up. We wake up dead nexthops. * It takes sense only on multipath routes. */ -int fib_sync_up(struct net_device *dev) +int fib_sync_up(struct net_device *dev, unsigned int nh_flags) { struct fib_info *prev_fi; unsigned int hash; @@ -1243,7 +1271,7 @@ int fib_sync_up(struct net_device *dev) prev_fi = fi; alive = 0; change_nexthops(fi) { - if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) { + if (!(nexthop_nh->nh_flags & nh_flags)) { alive++; continue; } @@ -1254,14 +1282,18 @@ int fib_sync_up(struct net_device *dev) !__in_dev_get_rtnl(dev)) continue; alive++; +#ifdef CONFIG_IP_ROUTE_MULTIPATH spin_lock_bh(&fib_multipath_lock); nexthop_nh->nh_power = 0; - nexthop_nh->nh_flags &= ~RTNH_F_DEAD; + nexthop_nh->nh_flags &= ~nh_flags; spin_unlock_bh(&fib_multipath_lock); +#else + nexthop_nh->nh_flags &= ~nh_flags; +#endif } endfor_nexthops(fi) if (alive > 0) { - fi->fib_flags &= ~RTNH_F_DEAD; + fi->fib_flags &= ~nh_flags; ret++; } } @@ -1269,6 +1301,8 @@ int fib_sync_up(struct net_device *dev) return ret; } +#ifdef CONFIG_IP_ROUTE_MULTIPATH + /* * The algorithm is suboptimal, but it provides really * fair weighted route distribution. -- cgit v1.2.3 From 0eeb075fad736fb92620af995c47c204bbb5e829 Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Tue, 23 Jun 2015 13:45:37 -0400 Subject: net: ipv4 sysctl option to ignore routes when nexthop link is down This feature is only enabled with the new per-interface or ipv4 global sysctls called 'ignore_routes_with_linkdown'. net.ipv4.conf.all.ignore_routes_with_linkdown = 0 net.ipv4.conf.default.ignore_routes_with_linkdown = 0 net.ipv4.conf.lo.ignore_routes_with_linkdown = 0 ... When the above sysctls are set, will report to userspace that a route is dead and will no longer resolve to this nexthop when performing a fib lookup. This will signal to userspace that the route will not be selected. The signalling of a RTNH_F_DEAD is only passed to userspace if the sysctl is enabled and link is down. This was done as without it the netlink listeners would have no idea whether or not a nexthop would be selected. The kernel only sets RTNH_F_DEAD internally if the interface has IFF_UP cleared. With the new sysctl set, the following behavior can be observed (interface p8p1 is link-down): default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 dead linkdown 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 dead linkdown 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 90.0.0.1 via 70.0.0.2 dev p7p1 src 70.0.0.1 cache local 80.0.0.1 dev lo src 80.0.0.1 cache 80.0.0.2 via 10.0.5.2 dev p9p1 src 10.0.5.15 cache While the route does remain in the table (so it can be modified if needed rather than being wiped away as it would be if IFF_UP was cleared), the proper next-hop is chosen automatically when the link is down. Now interface p8p1 is linked-up: default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 192.168.56.0/24 dev p2p1 proto kernel scope link src 192.168.56.2 90.0.0.1 via 80.0.0.2 dev p8p1 src 80.0.0.1 cache local 80.0.0.1 dev lo src 80.0.0.1 cache 80.0.0.2 dev p8p1 src 80.0.0.1 cache and the output changes to what one would expect. If the sysctl is not set, the following output would be expected when p8p1 is down: default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 linkdown 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 linkdown 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 Since the dead flag does not appear, there should be no expectation that the kernel would skip using this route due to link being down. v2: Split kernel changes into 2 patches, this actually makes a behavioral change if the sysctl is set. Also took suggestion from Alex to simplify code by only checking sysctl during fib lookup and suggestion from Scott to add a per-interface sysctl. v3: Code clean-ups to make it more readable and efficient as well as a reverse path check fix. v4: Drop binary sysctl v5: Whitespace fixups from Dave v6: Style changes from Dave and checkpatch suggestions v7: One more checkpatch fixup Signed-off-by: Andy Gospodarek Signed-off-by: Dinesh Dutt Acked-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 3 +++ include/net/fib_rules.h | 3 ++- include/net/ip_fib.h | 16 +++++++++------- include/uapi/linux/ip.h | 1 + net/ipv4/devinet.c | 2 ++ net/ipv4/fib_frontend.c | 6 +++--- net/ipv4/fib_rules.c | 5 +++-- net/ipv4/fib_semantics.c | 33 ++++++++++++++++++++++++++++----- net/ipv4/fib_trie.c | 6 ++++++ net/ipv4/netfilter/ipt_rpfilter.c | 2 +- net/ipv4/route.c | 10 +++++----- 11 files changed, 63 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 0a21fbefdfbe..a4328cea376a 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -120,6 +120,9 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) || (!IN_DEV_FORWARD(in_dev) && \ IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS))) +#define IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) \ + IN_DEV_CONF_GET((in_dev), IGNORE_ROUTES_WITH_LINKDOWN) + #define IN_DEV_ARPFILTER(in_dev) IN_DEV_ORCONF((in_dev), ARPFILTER) #define IN_DEV_ARP_ACCEPT(in_dev) IN_DEV_ORCONF((in_dev), ARP_ACCEPT) #define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 6d67383a5114..903a55efbffe 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -36,7 +36,8 @@ struct fib_lookup_arg { void *result; struct fib_rule *rule; int flags; -#define FIB_LOOKUP_NOREF 1 +#define FIB_LOOKUP_NOREF 1 +#define FIB_LOOKUP_IGNORE_LINKSTATE 2 }; struct fib_rules_ops { diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f73d27c5575a..49c142bdf01e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -226,7 +226,7 @@ static inline struct fib_table *fib_new_table(struct net *net, u32 id) } static inline int fib_lookup(struct net *net, const struct flowi4 *flp, - struct fib_result *res) + struct fib_result *res, unsigned int flags) { struct fib_table *tb; int err = -ENETUNREACH; @@ -234,7 +234,7 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp, rcu_read_lock(); tb = fib_get_table(net, RT_TABLE_MAIN); - if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF)) + if (tb && !fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF)) err = 0; rcu_read_unlock(); @@ -249,16 +249,18 @@ void __net_exit fib4_rules_exit(struct net *net); struct fib_table *fib_new_table(struct net *net, u32 id); struct fib_table *fib_get_table(struct net *net, u32 id); -int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res); +int __fib_lookup(struct net *net, struct flowi4 *flp, + struct fib_result *res, unsigned int flags); static inline int fib_lookup(struct net *net, struct flowi4 *flp, - struct fib_result *res) + struct fib_result *res, unsigned int flags) { struct fib_table *tb; int err; + flags |= FIB_LOOKUP_NOREF; if (net->ipv4.fib_has_custom_rules) - return __fib_lookup(net, flp, res); + return __fib_lookup(net, flp, res, flags); rcu_read_lock(); @@ -266,11 +268,11 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, for (err = 0; !err; err = -ENETUNREACH) { tb = rcu_dereference_rtnl(net->ipv4.fib_main); - if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF)) + if (tb && !fib_table_lookup(tb, flp, res, flags)) break; tb = rcu_dereference_rtnl(net->ipv4.fib_default); - if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF)) + if (tb && !fib_table_lookup(tb, flp, res, flags)) break; } diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 411959405ab6..08f894d2ddbd 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -164,6 +164,7 @@ enum IPV4_DEVCONF_ROUTE_LOCALNET, IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL, IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL, + IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, __IPV4_DEVCONF_MAX }; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 419d23c53ec7..7498716e8f54 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2169,6 +2169,8 @@ static struct devinet_sysctl_table { "igmpv2_unsolicited_report_interval"), DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL, "igmpv3_unsolicited_report_interval"), + DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN, + "ignore_routes_with_linkdown"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 534eb1485045..6bbc54940eb4 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -280,7 +280,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_scope = scope; fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; - if (!fib_lookup(net, &fl4, &res)) + if (!fib_lookup(net, &fl4, &res, 0)) return FIB_RES_PREFSRC(net, res); } else { scope = RT_SCOPE_LINK; @@ -319,7 +319,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0; net = dev_net(dev); - if (fib_lookup(net, &fl4, &res)) + if (fib_lookup(net, &fl4, &res, 0)) goto last_resort; if (res.type != RTN_UNICAST && (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) @@ -354,7 +354,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_oif = dev->ifindex; ret = 0; - if (fib_lookup(net, &fl4, &res) == 0) { + if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) { if (res.type == RTN_UNICAST) ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 56151982f74e..18123d50f576 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -47,11 +47,12 @@ struct fib4_rule { #endif }; -int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) +int __fib_lookup(struct net *net, struct flowi4 *flp, + struct fib_result *res, unsigned int flags) { struct fib_lookup_arg arg = { .result = res, - .flags = FIB_LOOKUP_NOREF, + .flags = flags, }; int err; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b1b305b1e340..3bfccd83551c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -623,7 +623,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, /* It is not necessary, but requires a bit of thinking */ if (fl4.flowi4_scope < RT_SCOPE_LINK) fl4.flowi4_scope = RT_SCOPE_LINK; - err = fib_lookup(net, &fl4, &res); + err = fib_lookup(net, &fl4, &res, + FIB_LOOKUP_IGNORE_LINKSTATE); if (err) { rcu_read_unlock(); return err; @@ -1035,12 +1036,20 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure; if (fi->fib_nhs == 1) { + struct in_device *in_dev; + if (fi->fib_nh->nh_gw && nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) goto nla_put_failure; if (fi->fib_nh->nh_oif && nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) goto nla_put_failure; + if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) { + in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev); + if (in_dev && + IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) + rtm->rtm_flags |= RTNH_F_DEAD; + } #ifdef CONFIG_IP_ROUTE_CLASSID if (fi->fib_nh[0].nh_tclassid && nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) @@ -1057,11 +1066,19 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; for_nexthops(fi) { + struct in_device *in_dev; + rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); if (!rtnh) goto nla_put_failure; rtnh->rtnh_flags = nh->nh_flags & 0xFF; + if (nh->nh_flags & RTNH_F_LINKDOWN) { + in_dev = __in_dev_get_rcu(nh->nh_dev); + if (in_dev && + IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) + rtnh->rtnh_flags |= RTNH_F_DEAD; + } rtnh->rtnh_hops = nh->nh_weight - 1; rtnh->rtnh_ifindex = nh->nh_oif; @@ -1310,16 +1327,22 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags) void fib_select_multipath(struct fib_result *res) { struct fib_info *fi = res->fi; + struct in_device *in_dev; int w; spin_lock_bh(&fib_multipath_lock); if (fi->fib_power <= 0) { int power = 0; change_nexthops(fi) { - if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) { - power += nexthop_nh->nh_weight; - nexthop_nh->nh_power = nexthop_nh->nh_weight; - } + in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev); + if (nexthop_nh->nh_flags & RTNH_F_DEAD) + continue; + if (in_dev && + IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + nexthop_nh->nh_flags & RTNH_F_LINKDOWN) + continue; + power += nexthop_nh->nh_weight; + nexthop_nh->nh_power = nexthop_nh->nh_weight; } endfor_nexthops(fi); fi->fib_power = power; if (power <= 0) { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 6c666a9f1bd5..15d32612e3c6 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1412,9 +1412,15 @@ found: continue; for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { const struct fib_nh *nh = &fi->fib_nh[nhsel]; + struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev); if (nh->nh_flags & RTNH_F_DEAD) continue; + if (in_dev && + IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + nh->nh_flags & RTNH_F_LINKDOWN && + !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) + continue; if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) continue; diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 4bfaedf9b34e..8618fd150c96 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -40,7 +40,7 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4, struct net *net = dev_net(dev); int ret __maybe_unused; - if (fib_lookup(net, fl4, &res)) + if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) return false; if (res.type != RTN_UNICAST) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f6055984c307..d0362a2de3d3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -747,7 +747,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow if (!(n->nud_state & NUD_VALID)) { neigh_event_send(n, NULL); } else { - if (fib_lookup(net, fl4, &res) == 0) { + if (fib_lookup(net, fl4, &res, 0) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, new_gw, @@ -975,7 +975,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) return; rcu_read_lock(); - if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { + if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, 0, mtu, @@ -1186,7 +1186,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) fl4.flowi4_mark = skb->mark; rcu_read_lock(); - if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) + if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0) src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); else src = inet_select_addr(rt->dst.dev, @@ -1716,7 +1716,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.daddr = daddr; fl4.saddr = saddr; - err = fib_lookup(net, &fl4, &res); + err = fib_lookup(net, &fl4, &res, 0); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) err = -EHOSTUNREACH; @@ -2123,7 +2123,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) goto make_route; } - if (fib_lookup(net, fl4, &res)) { + if (fib_lookup(net, fl4, &res, 0)) { res.fi = NULL; res.table = NULL; if (fl4->flowi4_oif) { -- cgit v1.2.3 From 204621551b2a0060a013b92f7add4d5c452fa7cb Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 24 Jun 2015 11:02:51 +0200 Subject: net: inet_diag: export IPV6_V6ONLY sockopt For AF_INET6 sockets, the value of struct ipv6_pinfo.ipv6only is exported to userspace. It indicates whether a socket bound to in6addr_any listens on IPv4 as well as IPv6. Since the socket is natively IPv6, it is not listed by e.g. 'ss -l -4'. This patch is accompanied by an appropriate one for iproute2 to enable the additional information in 'ss -e'. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 3 ++- net/ipv4/inet_diag.c | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index b629fc53b109..68a1f71fde9f 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -112,9 +112,10 @@ enum { INET_DIAG_SHUTDOWN, INET_DIAG_DCTCPINFO, INET_DIAG_PROTOCOL, /* response attribute only */ + INET_DIAG_SKV6ONLY, }; -#define INET_DIAG_MAX INET_DIAG_PROTOCOL +#define INET_DIAG_MAX INET_DIAG_SKV6ONLY /* INET_DIAG_MEM */ diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 516a1f6fbdd3..9bc26677058e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -151,6 +151,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, if (nla_put_u8(skb, INET_DIAG_TCLASS, inet6_sk(sk)->tclass) < 0) goto errout; + + if (ipv6_only_sock(sk) && + nla_put_u8(skb, INET_DIAG_SKV6ONLY, 1)) + goto errout; } #endif -- cgit v1.2.3