kernel: sfp: re-attempt probing for phy

Add patches retrying to probe the PHY to restore support for PHYs taking
longer to initialize without breaking modules without PHYs.

Patches taken from http://git.armlinux.org.uk/cgit/linux-arm.git/log/?h=phy

Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
This commit is contained in:
Jonas Gorski 2019-12-04 23:11:34 +01:00
parent 10b12b5d68
commit 947163b9d2
4 changed files with 344 additions and 0 deletions

View File

@ -0,0 +1,55 @@
From 4d6bfb6fbb00af38402db4d1ce464e22def9fd9e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 28 Nov 2019 14:24:40 +0000
Subject: [PATCH 1/4] net: sfp: use a definition for the fault recovery
attempts
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
drivers/net/phy/sfp.c | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -160,6 +160,14 @@ static const enum gpiod_flags gpio_flags
#define T_RESET_US 10
#define T_FAULT_RECOVER msecs_to_jiffies(1000)
+/* N_FAULT_INIT is the number of recovery attempts at module initialisation
+ * time. If the TX_FAULT signal is not deasserted after this number of
+ * attempts at clearing it, we decide that the module is faulty.
+ * N_FAULT is the same but after the module has initialised.
+ */
+#define N_FAULT_INIT 5
+#define N_FAULT 5
+
/* SFP module presence detection is poor: the three MOD DEF signals are
* the same length on the PCB, which means it's possible for MOD DEF 0 to
* connect before the I2C bus on MOD DEF 1/2.
@@ -1803,7 +1811,7 @@ static void sfp_sm_main(struct sfp *sfp,
sfp_module_tx_enable(sfp);
/* Initialise the fault clearance retries */
- sfp->sm_retries = 5;
+ sfp->sm_retries = N_FAULT_INIT;
/* We need to check the TX_FAULT state, which is not defined
* while TX_DISABLE is asserted. The earliest we want to do
@@ -1842,7 +1850,7 @@ static void sfp_sm_main(struct sfp *sfp,
* there is a fault.
*/
sfp_sm_fault(sfp, SFP_S_INIT_TX_FAULT,
- sfp->sm_retries == 5);
+ sfp->sm_retries == N_FAULT_INIT);
} else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) {
init_done: /* TX_FAULT deasserted or we timed out with TX_FAULT
* clear. Probe for the PHY and check the LOS state.
@@ -1855,7 +1863,7 @@ static void sfp_sm_main(struct sfp *sfp,
sfp_sm_link_check_los(sfp);
/* Reset the fault retry count */
- sfp->sm_retries = 5;
+ sfp->sm_retries = N_FAULT;
}
break;

View File

@ -0,0 +1,60 @@
From bfa3cbb01c7ea34d7369c9bd2ec1b2dc67082b04 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Mon, 2 Dec 2019 18:06:44 +0000
Subject: [PATCH 2/4] net: sfp: rename sm_retries
Rename sm_retries as sm_fault_retries, as this is what this member is
tracking.
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
drivers/net/phy/sfp.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -222,7 +222,7 @@ struct sfp {
unsigned char sm_mod_tries;
unsigned char sm_dev_state;
unsigned short sm_state;
- unsigned int sm_retries;
+ unsigned char sm_fault_retries;
struct sfp_eeprom_id id;
unsigned int module_power_mW;
@@ -1414,7 +1414,7 @@ static bool sfp_los_event_inactive(struc
static void sfp_sm_fault(struct sfp *sfp, unsigned int next_state, bool warn)
{
- if (sfp->sm_retries && !--sfp->sm_retries) {
+ if (sfp->sm_fault_retries && !--sfp->sm_fault_retries) {
dev_err(sfp->dev,
"module persistently indicates fault, disabling\n");
sfp_sm_next(sfp, SFP_S_TX_DISABLE, 0);
@@ -1811,7 +1811,7 @@ static void sfp_sm_main(struct sfp *sfp,
sfp_module_tx_enable(sfp);
/* Initialise the fault clearance retries */
- sfp->sm_retries = N_FAULT_INIT;
+ sfp->sm_fault_retries = N_FAULT_INIT;
/* We need to check the TX_FAULT state, which is not defined
* while TX_DISABLE is asserted. The earliest we want to do
@@ -1850,7 +1850,7 @@ static void sfp_sm_main(struct sfp *sfp,
* there is a fault.
*/
sfp_sm_fault(sfp, SFP_S_INIT_TX_FAULT,
- sfp->sm_retries == N_FAULT_INIT);
+ sfp->sm_fault_retries == N_FAULT_INIT);
} else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) {
init_done: /* TX_FAULT deasserted or we timed out with TX_FAULT
* clear. Probe for the PHY and check the LOS state.
@@ -1863,7 +1863,7 @@ static void sfp_sm_main(struct sfp *sfp,
sfp_sm_link_check_los(sfp);
/* Reset the fault retry count */
- sfp->sm_retries = N_FAULT;
+ sfp->sm_fault_retries = N_FAULT;
}
break;

View File

@ -0,0 +1,97 @@
From 1fba543dc8edf4a43bff3276306648bb27c1e207 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Fri, 29 Nov 2019 00:30:08 +0000
Subject: [PATCH 3/4] net: sfp: error handling for phy probe
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
drivers/net/phy/sfp.c | 26 +++++++++++++++++---------
1 file changed, 17 insertions(+), 9 deletions(-)
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -1334,7 +1334,7 @@ static void sfp_sm_phy_detach(struct sfp
sfp->mod_phy = NULL;
}
-static void sfp_sm_probe_phy(struct sfp *sfp, bool is_c45)
+static int sfp_sm_probe_phy(struct sfp *sfp, bool is_c45)
{
struct phy_device *phy;
int err;
@@ -1342,18 +1342,18 @@ static void sfp_sm_probe_phy(struct sfp
phy = get_phy_device(sfp->i2c_mii, SFP_PHY_ADDR, is_c45);
if (phy == ERR_PTR(-ENODEV)) {
dev_info(sfp->dev, "no PHY detected\n");
- return;
+ return 0;
}
if (IS_ERR(phy)) {
dev_err(sfp->dev, "mdiobus scan returned %ld\n", PTR_ERR(phy));
- return;
+ return PTR_ERR(phy);
}
err = phy_device_register(phy);
if (err) {
phy_device_free(phy);
dev_err(sfp->dev, "phy_device_register failed: %d\n", err);
- return;
+ return err;
}
err = sfp_add_phy(sfp->sfp_bus, phy);
@@ -1361,10 +1361,12 @@ static void sfp_sm_probe_phy(struct sfp
phy_device_remove(phy);
phy_device_free(phy);
dev_err(sfp->dev, "sfp_add_phy failed: %d\n", err);
- return;
+ return err;
}
sfp->mod_phy = phy;
+
+ return 0;
}
static void sfp_sm_link_up(struct sfp *sfp)
@@ -1437,21 +1439,24 @@ static void sfp_sm_fault(struct sfp *sfp
* Clause 45 copper SFP+ modules (10G) appear to switch their interface
* mode according to the negotiated line speed.
*/
-static void sfp_sm_probe_for_phy(struct sfp *sfp)
+static int sfp_sm_probe_for_phy(struct sfp *sfp)
{
+ int err = 0;
+
switch (sfp->id.base.extended_cc) {
case SFF8024_ECC_10GBASE_T_SFI:
case SFF8024_ECC_10GBASE_T_SR:
case SFF8024_ECC_5GBASE_T:
case SFF8024_ECC_2_5GBASE_T:
- sfp_sm_probe_phy(sfp, true);
+ err = sfp_sm_probe_phy(sfp, true);
break;
default:
if (sfp->id.base.e1000_base_t)
- sfp_sm_probe_phy(sfp, false);
+ err = sfp_sm_probe_phy(sfp, false);
break;
}
+ return err;
}
static int sfp_module_parse_power(struct sfp *sfp)
@@ -1855,7 +1860,10 @@ static void sfp_sm_main(struct sfp *sfp,
init_done: /* TX_FAULT deasserted or we timed out with TX_FAULT
* clear. Probe for the PHY and check the LOS state.
*/
- sfp_sm_probe_for_phy(sfp);
+ if (sfp_sm_probe_for_phy(sfp)) {
+ sfp_sm_next(sfp, SFP_S_FAIL, 0);
+ break;
+ }
if (sfp_module_start(sfp->sfp_bus)) {
sfp_sm_next(sfp, SFP_S_FAIL, 0);
break;

View File

@ -0,0 +1,132 @@
From 6c4efe83a0acf6f06c89ae17b885fa5739eb5be7 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Mon, 2 Dec 2019 18:20:22 +0000
Subject: [PATCH 4/4] net: sfp: re-attempt probing for phy
Some 1000BASE-T PHY modules take a while for the PHY to wake up.
Retry the probe a number of times before deciding that the module has
no PHY.
Tested with:
Sourcephotonics SPGBTXCNFC - PHY takes less than 50ms to respond.
Champion One 1000SFPT - PHY takes about 200ms to respond.
Mikrotik S-RJ01 - no PHY
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
drivers/net/phy/sfp.c | 59 ++++++++++++++++++++++++++++++++++++---------------
1 file changed, 42 insertions(+), 17 deletions(-)
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -60,6 +60,7 @@ enum {
SFP_S_FAIL,
SFP_S_WAIT,
SFP_S_INIT,
+ SFP_S_INIT_PHY,
SFP_S_INIT_TX_FAULT,
SFP_S_WAIT_LOS,
SFP_S_LINK_UP,
@@ -124,6 +125,7 @@ static const char * const sm_state_strin
[SFP_S_FAIL] = "fail",
[SFP_S_WAIT] = "wait",
[SFP_S_INIT] = "init",
+ [SFP_S_INIT_PHY] = "init_phy",
[SFP_S_INIT_TX_FAULT] = "init_tx_fault",
[SFP_S_WAIT_LOS] = "wait_los",
[SFP_S_LINK_UP] = "link_up",
@@ -168,6 +170,12 @@ static const enum gpiod_flags gpio_flags
#define N_FAULT_INIT 5
#define N_FAULT 5
+/* T_PHY_RETRY is the time interval between attempts to probe the PHY.
+ * R_PHY_RETRY is the number of attempts.
+ */
+#define T_PHY_RETRY msecs_to_jiffies(50)
+#define R_PHY_RETRY 12
+
/* SFP module presence detection is poor: the three MOD DEF signals are
* the same length on the PCB, which means it's possible for MOD DEF 0 to
* connect before the I2C bus on MOD DEF 1/2.
@@ -223,6 +231,7 @@ struct sfp {
unsigned char sm_dev_state;
unsigned short sm_state;
unsigned char sm_fault_retries;
+ unsigned char sm_phy_retries;
struct sfp_eeprom_id id;
unsigned int module_power_mW;
@@ -1340,10 +1349,8 @@ static int sfp_sm_probe_phy(struct sfp *
int err;
phy = get_phy_device(sfp->i2c_mii, SFP_PHY_ADDR, is_c45);
- if (phy == ERR_PTR(-ENODEV)) {
- dev_info(sfp->dev, "no PHY detected\n");
- return 0;
- }
+ if (phy == ERR_PTR(-ENODEV))
+ return PTR_ERR(phy);
if (IS_ERR(phy)) {
dev_err(sfp->dev, "mdiobus scan returned %ld\n", PTR_ERR(phy));
return PTR_ERR(phy);
@@ -1785,6 +1792,7 @@ static void sfp_sm_module(struct sfp *sf
static void sfp_sm_main(struct sfp *sfp, unsigned int event)
{
unsigned long timeout;
+ int ret;
/* Some events are global */
if (sfp->sm_state != SFP_S_DOWN &&
@@ -1857,22 +1865,39 @@ static void sfp_sm_main(struct sfp *sfp,
sfp_sm_fault(sfp, SFP_S_INIT_TX_FAULT,
sfp->sm_fault_retries == N_FAULT_INIT);
} else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) {
- init_done: /* TX_FAULT deasserted or we timed out with TX_FAULT
- * clear. Probe for the PHY and check the LOS state.
- */
- if (sfp_sm_probe_for_phy(sfp)) {
- sfp_sm_next(sfp, SFP_S_FAIL, 0);
- break;
- }
- if (sfp_module_start(sfp->sfp_bus)) {
- sfp_sm_next(sfp, SFP_S_FAIL, 0);
+ init_done:
+ sfp->sm_phy_retries = R_PHY_RETRY;
+ goto phy_probe;
+ }
+ break;
+
+ case SFP_S_INIT_PHY:
+ if (event != SFP_E_TIMEOUT)
+ break;
+ phy_probe:
+ /* TX_FAULT deasserted or we timed out with TX_FAULT
+ * clear. Probe for the PHY and check the LOS state.
+ */
+ ret = sfp_sm_probe_for_phy(sfp);
+ if (ret == -ENODEV) {
+ if (--sfp->sm_phy_retries) {
+ sfp_sm_next(sfp, SFP_S_INIT_PHY, T_PHY_RETRY);
break;
+ } else {
+ dev_info(sfp->dev, "no PHY detected\n");
}
- sfp_sm_link_check_los(sfp);
-
- /* Reset the fault retry count */
- sfp->sm_fault_retries = N_FAULT;
+ } else if (ret) {
+ sfp_sm_next(sfp, SFP_S_FAIL, 0);
+ break;
}
+ if (sfp_module_start(sfp->sfp_bus)) {
+ sfp_sm_next(sfp, SFP_S_FAIL, 0);
+ break;
+ }
+ sfp_sm_link_check_los(sfp);
+
+ /* Reset the fault retry count */
+ sfp->sm_fault_retries = N_FAULT;
break;
case SFP_S_INIT_TX_FAULT: