public inbox for drm-ai-reviews@public-inbox.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH V1] accel/amdxdna: Read real-time clock frequencies
@ 2026-04-06 22:05 Lizhi Hou
  2026-04-11  3:33 ` Mario Limonciello
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Lizhi Hou @ 2026-04-06 22:05 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
	maciej.falkowski
  Cc: Lizhi Hou, linux-kernel, max.zhen, sonal.santan

Add support for reading real-time clock frequencies through the PMF
interface.

Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie2_pci.c  |  4 +++-
 drivers/accel/amdxdna/aie2_pci.h  | 12 ++++++++--
 drivers/accel/amdxdna/aie2_pm.c   |  6 ++---
 drivers/accel/amdxdna/npu1_regs.c |  2 +-
 drivers/accel/amdxdna/npu4_regs.c | 39 +++++++++++++++++++++----------
 drivers/accel/amdxdna/npu5_regs.c |  4 +---
 drivers/accel/amdxdna/npu6_regs.c |  4 +---
 7 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 041cbc8cd7e5..c9c23c889c78 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -284,7 +284,7 @@ static struct xrs_action_ops aie2_xrs_actions = {
 
 static void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
 {
-	ndev->priv->hw_ops.set_dpm(ndev, 0);
+	ndev->priv->hw_ops->set_dpm(ndev, 0);
 	aie_smu_fini(ndev->aie.smu_hdl);
 }
 
@@ -765,6 +765,7 @@ static int aie2_get_clock_metadata(struct amdxdna_client *client,
 	if (!clock)
 		return -ENOMEM;
 
+	aie2_update_counters(ndev);
 	snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name),
 		 "MP-NPU Clock");
 	clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
@@ -925,6 +926,7 @@ static int aie2_query_resource_info(struct amdxdna_client *client,
 	ndev = xdna->dev_handle;
 	priv = ndev->priv;
 
+	aie2_update_counters(ndev);
 	res_info.npu_clk_max = priv->dpm_clk_tbl[ndev->max_dpm_level].hclk;
 	res_info.npu_tops_max = ndev->max_tops;
 	res_info.npu_task_max = priv->hwctx_limit;
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index 7c308672b5fe..77ba125e4d72 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -201,8 +201,16 @@ struct amdxdna_dev_hdl {
 
 struct aie2_hw_ops {
 	int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+	int (*update_counters)(struct amdxdna_dev_hdl *ndev);
 };
 
+#define aie2_update_counters(ndev)				\
+({								\
+	typeof(ndev) _ndev = ndev;				\
+	if (_ndev->priv->hw_ops->update_counters)		\
+		_ndev->priv->hw_ops->update_counters(_ndev);	\
+})
+
 enum aie2_fw_feature {
 	AIE2_NPU_COMMAND,
 	AIE2_PREEMPT,
@@ -229,7 +237,7 @@ struct amdxdna_dev_priv {
 	struct aie_bar_off_pair		sram_offs[SRAM_MAX_INDEX];
 	struct aie_bar_off_pair		psp_regs_off[PSP_MAX_REGS];
 	struct aie_bar_off_pair		smu_regs_off[SMU_MAX_REGS];
-	struct aie2_hw_ops		hw_ops;
+	const struct aie2_hw_ops	*hw_ops;
 };
 
 extern const struct amdxdna_dev_ops aie2_ops;
@@ -243,7 +251,7 @@ extern const struct dpm_clk_freq npu4_dpm_clk_table[];
 extern const struct rt_config npu1_default_rt_cfg[];
 extern const struct rt_config npu4_default_rt_cfg[];
 extern const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[];
-int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+extern const struct aie2_hw_ops npu4_hw_ops;
 
 /* aie2_pm.c */
 int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c
index 5ec6728d04fd..786d688bd82c 100644
--- a/drivers/accel/amdxdna/aie2_pm.c
+++ b/drivers/accel/amdxdna/aie2_pm.c
@@ -35,7 +35,7 @@ int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
 	if (ret)
 		return ret;
 
-	ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+	ret = ndev->priv->hw_ops->set_dpm(ndev, dpm_level);
 	if (!ret)
 		ndev->dpm_level = dpm_level;
 	amdxdna_pm_suspend_put(ndev->aie.xdna);
@@ -49,7 +49,7 @@ int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
 
 	if (ndev->dev_status != AIE2_DEV_UNINIT) {
 		/* Resume device */
-		ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
+		ret = ndev->priv->hw_ops->set_dpm(ndev, ndev->dpm_level);
 		if (ret)
 			return ret;
 
@@ -64,7 +64,7 @@ int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
 		ndev->max_dpm_level++;
 	ndev->max_dpm_level--;
 
-	ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
+	ret = ndev->priv->hw_ops->set_dpm(ndev, ndev->max_dpm_level);
 	if (ret)
 		return ret;
 	ndev->dpm_level = ndev->max_dpm_level;
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
index a83e44f378ad..f1141a65e64d 100644
--- a/drivers/accel/amdxdna/npu1_regs.c
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -122,7 +122,7 @@ static const struct amdxdna_dev_priv npu1_dev_priv = {
 		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
 		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU1_SMU, MPNPU_PUB_SCRATCH7),
 	},
-	.hw_ops		= {
+	.hw_ops		= &(const struct aie2_hw_ops) {
 		.set_dpm = npu1_set_dpm,
 	},
 };
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
index 5d68171f4ec2..a3b6df56abd0 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -6,6 +6,7 @@
 #include <drm/amdxdna_accel.h>
 #include <drm/drm_device.h>
 #include <drm/gpu_scheduler.h>
+#include <linux/amd-pmf-io.h>
 #include <linux/bits.h>
 #include <linux/sizes.h>
 
@@ -63,12 +64,7 @@
 #define NPU4_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
 #define NPU4_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
 
-#define NPU4_DPM_TOPS(ndev, dpm_level) \
-({ \
-	typeof(ndev) _ndev = ndev; \
-	(4096 * (_ndev)->total_col * \
-	 (_ndev)->priv->dpm_clk_tbl[dpm_level].hclk / 1000000); \
-})
+#define NPU4_DPM_TOPS(ndev, hclk) (4096 * (ndev)->total_col * (hclk) / 1000000)
 
 const struct rt_config npu4_default_rt_cfg[] = {
 	{ 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
@@ -105,7 +101,7 @@ const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[] = {
 	{ 0 }
 };
 
-int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
+static int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
 {
 	int ret;
 
@@ -115,8 +111,8 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
 
 	ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
 	ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
-	ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->max_dpm_level);
-	ndev->curr_tops = NPU4_DPM_TOPS(ndev, dpm_level);
+	ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk);
+	ndev->curr_tops = NPU4_DPM_TOPS(ndev, ndev->hclk_freq);
 
 	XDNA_DBG(ndev->aie.xdna, "MP-NPU clock %d, H clock %d\n",
 		 ndev->npuclk_freq, ndev->hclk_freq);
@@ -124,6 +120,27 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
 	return 0;
 }
 
+static int npu4_update_counters(struct amdxdna_dev_hdl *ndev)
+{
+	struct amd_pmf_npu_metrics npu_metrics;
+	int ret;
+
+	ret = AIE2_GET_PMF_NPU_METRICS(&npu_metrics);
+	if (ret)
+		return ret;
+
+	ndev->npuclk_freq = npu_metrics.mpnpuclk_freq;
+	ndev->hclk_freq = npu_metrics.npuclk_freq;
+	ndev->curr_tops = NPU4_DPM_TOPS(ndev, ndev->hclk_freq);
+
+	return 0;
+}
+
+const struct aie2_hw_ops npu4_hw_ops = {
+	.set_dpm = npu4_set_dpm,
+	.update_counters = npu4_update_counters,
+};
+
 static const struct amdxdna_dev_priv npu4_dev_priv = {
 	.fw_path        = "amdnpu/17f0_10/",
 	.rt_config	= npu4_default_rt_cfg,
@@ -154,9 +171,7 @@ static const struct amdxdna_dev_priv npu4_dev_priv = {
 		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
 		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU4_SMU, MP1_C2PMSG_60),
 	},
-	.hw_ops		= {
-		.set_dpm = npu4_set_dpm,
-	},
+	.hw_ops		= &npu4_hw_ops
 };
 
 const struct amdxdna_dev_info dev_npu4_info = {
diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
index 98ee8780f3f5..6d4596b9e61e 100644
--- a/drivers/accel/amdxdna/npu5_regs.c
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -92,9 +92,7 @@ static const struct amdxdna_dev_priv npu5_dev_priv = {
 		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
 		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU5_SMU, MP1_C2PMSG_60),
 	},
-	.hw_ops		= {
-		.set_dpm = npu4_set_dpm,
-	},
+	.hw_ops		= &npu4_hw_ops
 };
 
 const struct amdxdna_dev_info dev_npu5_info = {
diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
index 31400cca5ec4..76181345b6d1 100644
--- a/drivers/accel/amdxdna/npu6_regs.c
+++ b/drivers/accel/amdxdna/npu6_regs.c
@@ -92,9 +92,7 @@ static const struct amdxdna_dev_priv npu6_dev_priv = {
 		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
 		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU6_SMU, MP1_C2PMSG_60),
 	},
-	.hw_ops         = {
-		.set_dpm = npu4_set_dpm,
-	},
+	.hw_ops         = &npu4_hw_ops
 
 };
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH V1] accel/amdxdna: Read real-time clock frequencies
  2026-04-06 22:05 [PATCH V1] accel/amdxdna: Read real-time clock frequencies Lizhi Hou
@ 2026-04-11  3:33 ` Mario Limonciello
  2026-04-12  4:21 ` Claude review: " Claude Code Review Bot
  2026-04-12  4:21 ` Claude Code Review Bot
  2 siblings, 0 replies; 4+ messages in thread
From: Mario Limonciello @ 2026-04-11  3:33 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, maciej.falkowski
  Cc: linux-kernel, max.zhen, sonal.santan



On 4/6/26 17:05, Lizhi Hou wrote:
> Add support for reading real-time clock frequencies through the PMF
> interface.
> 
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> ---
>   drivers/accel/amdxdna/aie2_pci.c  |  4 +++-
>   drivers/accel/amdxdna/aie2_pci.h  | 12 ++++++++--
>   drivers/accel/amdxdna/aie2_pm.c   |  6 ++---
>   drivers/accel/amdxdna/npu1_regs.c |  2 +-
>   drivers/accel/amdxdna/npu4_regs.c | 39 +++++++++++++++++++++----------
>   drivers/accel/amdxdna/npu5_regs.c |  4 +---
>   drivers/accel/amdxdna/npu6_regs.c |  4 +---
>   7 files changed, 46 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index 041cbc8cd7e5..c9c23c889c78 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -284,7 +284,7 @@ static struct xrs_action_ops aie2_xrs_actions = {
>   
>   static void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
>   {
> -	ndev->priv->hw_ops.set_dpm(ndev, 0);
> +	ndev->priv->hw_ops->set_dpm(ndev, 0);
>   	aie_smu_fini(ndev->aie.smu_hdl);
>   }
>   
> @@ -765,6 +765,7 @@ static int aie2_get_clock_metadata(struct amdxdna_client *client,
>   	if (!clock)
>   		return -ENOMEM;
>   
> +	aie2_update_counters(ndev);
>   	snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name),
>   		 "MP-NPU Clock");
>   	clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
> @@ -925,6 +926,7 @@ static int aie2_query_resource_info(struct amdxdna_client *client,
>   	ndev = xdna->dev_handle;
>   	priv = ndev->priv;
>   
> +	aie2_update_counters(ndev);
>   	res_info.npu_clk_max = priv->dpm_clk_tbl[ndev->max_dpm_level].hclk;
>   	res_info.npu_tops_max = ndev->max_tops;
>   	res_info.npu_task_max = priv->hwctx_limit;
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index 7c308672b5fe..77ba125e4d72 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -201,8 +201,16 @@ struct amdxdna_dev_hdl {
>   
>   struct aie2_hw_ops {
>   	int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
> +	int (*update_counters)(struct amdxdna_dev_hdl *ndev);
>   };
>   
> +#define aie2_update_counters(ndev)				\
> +({								\
> +	typeof(ndev) _ndev = ndev;				\
> +	if (_ndev->priv->hw_ops->update_counters)		\
> +		_ndev->priv->hw_ops->update_counters(_ndev);	\
> +})
> +
>   enum aie2_fw_feature {
>   	AIE2_NPU_COMMAND,
>   	AIE2_PREEMPT,
> @@ -229,7 +237,7 @@ struct amdxdna_dev_priv {
>   	struct aie_bar_off_pair		sram_offs[SRAM_MAX_INDEX];
>   	struct aie_bar_off_pair		psp_regs_off[PSP_MAX_REGS];
>   	struct aie_bar_off_pair		smu_regs_off[SMU_MAX_REGS];
> -	struct aie2_hw_ops		hw_ops;
> +	const struct aie2_hw_ops	*hw_ops;
>   };
>   
>   extern const struct amdxdna_dev_ops aie2_ops;
> @@ -243,7 +251,7 @@ extern const struct dpm_clk_freq npu4_dpm_clk_table[];
>   extern const struct rt_config npu1_default_rt_cfg[];
>   extern const struct rt_config npu4_default_rt_cfg[];
>   extern const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[];
> -int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
> +extern const struct aie2_hw_ops npu4_hw_ops;
>   
>   /* aie2_pm.c */
>   int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
> diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c
> index 5ec6728d04fd..786d688bd82c 100644
> --- a/drivers/accel/amdxdna/aie2_pm.c
> +++ b/drivers/accel/amdxdna/aie2_pm.c
> @@ -35,7 +35,7 @@ int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>   	if (ret)
>   		return ret;
>   
> -	ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
> +	ret = ndev->priv->hw_ops->set_dpm(ndev, dpm_level);
>   	if (!ret)
>   		ndev->dpm_level = dpm_level;
>   	amdxdna_pm_suspend_put(ndev->aie.xdna);
> @@ -49,7 +49,7 @@ int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
>   
>   	if (ndev->dev_status != AIE2_DEV_UNINIT) {
>   		/* Resume device */
> -		ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
> +		ret = ndev->priv->hw_ops->set_dpm(ndev, ndev->dpm_level);
>   		if (ret)
>   			return ret;
>   
> @@ -64,7 +64,7 @@ int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
>   		ndev->max_dpm_level++;
>   	ndev->max_dpm_level--;
>   
> -	ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
> +	ret = ndev->priv->hw_ops->set_dpm(ndev, ndev->max_dpm_level);
>   	if (ret)
>   		return ret;
>   	ndev->dpm_level = ndev->max_dpm_level;
> diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
> index a83e44f378ad..f1141a65e64d 100644
> --- a/drivers/accel/amdxdna/npu1_regs.c
> +++ b/drivers/accel/amdxdna/npu1_regs.c
> @@ -122,7 +122,7 @@ static const struct amdxdna_dev_priv npu1_dev_priv = {
>   		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
>   		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU1_SMU, MPNPU_PUB_SCRATCH7),
>   	},
> -	.hw_ops		= {
> +	.hw_ops		= &(const struct aie2_hw_ops) {
>   		.set_dpm = npu1_set_dpm,
>   	},
>   };
> diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
> index 5d68171f4ec2..a3b6df56abd0 100644
> --- a/drivers/accel/amdxdna/npu4_regs.c
> +++ b/drivers/accel/amdxdna/npu4_regs.c
> @@ -6,6 +6,7 @@
>   #include <drm/amdxdna_accel.h>
>   #include <drm/drm_device.h>
>   #include <drm/gpu_scheduler.h>
> +#include <linux/amd-pmf-io.h>
>   #include <linux/bits.h>
>   #include <linux/sizes.h>
>   
> @@ -63,12 +64,7 @@
>   #define NPU4_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
>   #define NPU4_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
>   
> -#define NPU4_DPM_TOPS(ndev, dpm_level) \
> -({ \
> -	typeof(ndev) _ndev = ndev; \
> -	(4096 * (_ndev)->total_col * \
> -	 (_ndev)->priv->dpm_clk_tbl[dpm_level].hclk / 1000000); \
> -})
> +#define NPU4_DPM_TOPS(ndev, hclk) (4096 * (ndev)->total_col * (hclk) / 1000000)
>   
>   const struct rt_config npu4_default_rt_cfg[] = {
>   	{ 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
> @@ -105,7 +101,7 @@ const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[] = {
>   	{ 0 }
>   };
>   
> -int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
> +static int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>   {
>   	int ret;
>   
> @@ -115,8 +111,8 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>   
>   	ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
>   	ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
> -	ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->max_dpm_level);
> -	ndev->curr_tops = NPU4_DPM_TOPS(ndev, dpm_level);
> +	ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk);
> +	ndev->curr_tops = NPU4_DPM_TOPS(ndev, ndev->hclk_freq);
>   
>   	XDNA_DBG(ndev->aie.xdna, "MP-NPU clock %d, H clock %d\n",
>   		 ndev->npuclk_freq, ndev->hclk_freq);
> @@ -124,6 +120,27 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>   	return 0;
>   }
>   
> +static int npu4_update_counters(struct amdxdna_dev_hdl *ndev)
> +{
> +	struct amd_pmf_npu_metrics npu_metrics;
> +	int ret;
> +
> +	ret = AIE2_GET_PMF_NPU_METRICS(&npu_metrics);
> +	if (ret)
> +		return ret;
> +
> +	ndev->npuclk_freq = npu_metrics.mpnpuclk_freq;
> +	ndev->hclk_freq = npu_metrics.npuclk_freq;
> +	ndev->curr_tops = NPU4_DPM_TOPS(ndev, ndev->hclk_freq);
> +
> +	return 0;
> +}
> +
> +const struct aie2_hw_ops npu4_hw_ops = {
> +	.set_dpm = npu4_set_dpm,
> +	.update_counters = npu4_update_counters,
> +};
> +
>   static const struct amdxdna_dev_priv npu4_dev_priv = {
>   	.fw_path        = "amdnpu/17f0_10/",
>   	.rt_config	= npu4_default_rt_cfg,
> @@ -154,9 +171,7 @@ static const struct amdxdna_dev_priv npu4_dev_priv = {
>   		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
>   		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU4_SMU, MP1_C2PMSG_60),
>   	},
> -	.hw_ops		= {
> -		.set_dpm = npu4_set_dpm,
> -	},
> +	.hw_ops		= &npu4_hw_ops
>   };
>   
>   const struct amdxdna_dev_info dev_npu4_info = {
> diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
> index 98ee8780f3f5..6d4596b9e61e 100644
> --- a/drivers/accel/amdxdna/npu5_regs.c
> +++ b/drivers/accel/amdxdna/npu5_regs.c
> @@ -92,9 +92,7 @@ static const struct amdxdna_dev_priv npu5_dev_priv = {
>   		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
>   		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU5_SMU, MP1_C2PMSG_60),
>   	},
> -	.hw_ops		= {
> -		.set_dpm = npu4_set_dpm,
> -	},
> +	.hw_ops		= &npu4_hw_ops
>   };
>   
>   const struct amdxdna_dev_info dev_npu5_info = {
> diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
> index 31400cca5ec4..76181345b6d1 100644
> --- a/drivers/accel/amdxdna/npu6_regs.c
> +++ b/drivers/accel/amdxdna/npu6_regs.c
> @@ -92,9 +92,7 @@ static const struct amdxdna_dev_priv npu6_dev_priv = {
>   		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
>   		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU6_SMU, MP1_C2PMSG_60),
>   	},
> -	.hw_ops         = {
> -		.set_dpm = npu4_set_dpm,
> -	},
> +	.hw_ops         = &npu4_hw_ops
>   
>   };
>   


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Claude review: accel/amdxdna: Read real-time clock frequencies
  2026-04-06 22:05 [PATCH V1] accel/amdxdna: Read real-time clock frequencies Lizhi Hou
  2026-04-11  3:33 ` Mario Limonciello
@ 2026-04-12  4:21 ` Claude Code Review Bot
  2026-04-12  4:21 ` Claude Code Review Bot
  2 siblings, 0 replies; 4+ messages in thread
From: Claude Code Review Bot @ 2026-04-12  4:21 UTC (permalink / raw)
  To: dri-devel-reviews

Overall Series Review

Subject: accel/amdxdna: Read real-time clock frequencies
Author: Lizhi Hou <lizhi.hou@amd.com>
Patches: 2
Reviewed: 2026-04-12T14:21:09.757535

---

This is a single patch that bundles two logically distinct changes:

1. **Refactoring `hw_ops` from an embedded struct to a const pointer** — enabling sharing of the ops struct across NPU generations and making `npu4_set_dpm` static.
2. **Adding a new `update_counters` callback** that reads real-time clock frequencies from the AMD PMF (Platform Management Framework) interface.

The refactoring is clean and well-motivated: npu5 and npu6 already reused `npu4_set_dpm`, so sharing a single `npu4_hw_ops` const struct is the right consolidation. However, kernel convention strongly favors splitting these into at least two patches (refactor first, new feature second) for easier bisection and review.

The core concern is the **field name mapping** in `npu4_update_counters` between PMF metrics fields and driver fields, which is confusing and needs explicit justification. There are also issues around silent error suppression and a missing `max_tops` update in the new callback.

**Overall: Needs revision.** The approach is sound but the naming confusion warrants clarification, the error handling in the macro should be addressed, and the patch should ideally be split.

---

---
Generated by Claude Code Patch Reviewer

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Claude review: accel/amdxdna: Read real-time clock frequencies
  2026-04-06 22:05 [PATCH V1] accel/amdxdna: Read real-time clock frequencies Lizhi Hou
  2026-04-11  3:33 ` Mario Limonciello
  2026-04-12  4:21 ` Claude review: " Claude Code Review Bot
@ 2026-04-12  4:21 ` Claude Code Review Bot
  2 siblings, 0 replies; 4+ messages in thread
From: Claude Code Review Bot @ 2026-04-12  4:21 UTC (permalink / raw)
  To: dri-devel-reviews

Patch Review

**1. Potential field-name swap in `npu4_update_counters` (npu4_regs.c) — needs clarification**

```c
ndev->npuclk_freq = npu_metrics.mpnpuclk_freq;
ndev->hclk_freq = npu_metrics.npuclk_freq;
```

From `include/linux/amd-pmf-io.h`, the PMF struct documents:
- `npuclk_freq` — "NPU clock frequency [MHz]"
- `mpnpuclk_freq` — "MPNPU [MHz]"

In the driver (`npu4_set_dpm`), the existing mapping is:
```c
ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;  // MP-NPU clock
ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;      // H clock
```

So this maps PMF's `mpnpuclk_freq` → driver's `npuclk_freq` (MP-NPU), and PMF's `npuclk_freq` → driver's `hclk_freq` (H clock). The cross-wiring of names is extremely confusing. Even if this mapping is hardware-correct, a comment explaining why `npu_metrics.npuclk_freq` maps to `ndev->hclk_freq` (and not `npuclk_freq`) would prevent future maintenance mistakes.

**2. `aie2_update_counters` macro silently discards errors (aie2_pci.h)**

```c
#define aie2_update_counters(ndev)                  \
({                                                  \
    typeof(ndev) _ndev = ndev;                      \
    if (_ndev->priv->hw_ops->update_counters)       \
        _ndev->priv->hw_ops->update_counters(_ndev);\
})
```

This macro has two problems:

- The return value of `update_counters()` is silently discarded. If PMF is unavailable (`CONFIG_AMD_PMF=n`), `AIE2_GET_PMF_NPU_METRICS` returns `-EOPNOTSUPP`, and the caller proceeds to report stale/previous frequency data to userspace with no indication it's stale.
- The macro itself is a statement expression but its value is the result of the `if` statement, which is `void`. Callers cannot check for errors even if they wanted to.

At minimum, consider making this return `int` and propagating the error, or at least adding `dev_dbg` on failure so the silent fallback to stale data is discoverable during debugging:

```c
#define aie2_update_counters(ndev)                          \
({                                                          \
    typeof(ndev) _ndev = ndev;                              \
    int _ret = 0;                                           \
    if (_ndev->priv->hw_ops->update_counters)               \
        _ret = _ndev->priv->hw_ops->update_counters(_ndev); \
    _ret;                                                   \
})
```

**3. `npu4_update_counters` doesn't update `max_tops` (npu4_regs.c)**

Compare with `npu4_set_dpm`:
```c
ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk);
ndev->curr_tops = NPU4_DPM_TOPS(ndev, ndev->hclk_freq);
```

But `npu4_update_counters` only updates `curr_tops`:
```c
ndev->curr_tops = NPU4_DPM_TOPS(ndev, ndev->hclk_freq);
```

This is probably intentional (max TOPS shouldn't change at runtime), but `aie2_query_resource_info` reads `ndev->max_tops` right after calling `aie2_update_counters`. If this is deliberate, a brief comment explaining that `max_tops` is set once in `set_dpm` and doesn't need runtime refresh would help reviewers.

**4. No locking around field updates in `npu4_update_counters`**

`npu4_update_counters` modifies `ndev->npuclk_freq`, `ndev->hclk_freq`, and `ndev->curr_tops` without any visible locking. These same fields are also written by `npu4_set_dpm` (called via `aie2_pm_set_dpm` from ioctl paths). If a DPM-level change races with a clock metadata query, the reported values could be inconsistent (e.g., `npuclk_freq` from PMF but `curr_tops` computed from a DPM-table hclk). This may be an existing issue, but the new call sites in `aie2_get_clock_metadata` and `aie2_query_resource_info` widen the race window.

**5. Compound literal vs named constant inconsistency (npu1_regs.c)**

```c
.hw_ops = &(const struct aie2_hw_ops) {
    .set_dpm = npu1_set_dpm,
},
```

NPU1 uses an anonymous compound literal while NPU4/5/6 use the named `npu4_hw_ops`. For consistency and debuggability, consider defining a `npu1_hw_ops` named constant as well.

**6. Missing trailing commas (npu4_regs.c, npu5_regs.c, npu6_regs.c)**

```c
.hw_ops = &npu4_hw_ops
```

The kernel coding style convention is to include trailing commas after the last struct member initializer, to reduce future diff noise. The original code had them.

**7. NPU5 and NPU6 inherit `update_counters` (npu5_regs.c, npu6_regs.c)**

By pointing `.hw_ops = &npu4_hw_ops`, npu5 and npu6 now also get `npu4_update_counters`. This means PMF metrics queries will be made for these NPU generations too. Is the PMF interface guaranteed to work the same way across NPU4/5/6? The commit message doesn't address this.

**8. Patch should be split**

This patch bundles a mechanical refactor (struct → pointer, making `npu4_set_dpm` static, sharing `npu4_hw_ops`) with a functional change (new `update_counters` callback and its call sites). Kernel convention is to separate these:
- Patch 1: Refactor `hw_ops` from embedded struct to const pointer
- Patch 2: Add `update_counters` callback and PMF integration

This aids bisection if the PMF integration causes issues.

**9. `NPU4_DPM_TOPS` macro simplification is correct (npu4_regs.c)**

The old macro:
```c
#define NPU4_DPM_TOPS(ndev, dpm_level) \
({ \
    typeof(ndev) _ndev = ndev; \
    (4096 * (_ndev)->total_col * \
     (_ndev)->priv->dpm_clk_tbl[dpm_level].hclk / 1000000); \
})
```

The new macro:
```c
#define NPU4_DPM_TOPS(ndev, hclk) (4096 * (ndev)->total_col * (hclk) / 1000000)
```

This is a good simplification. Taking the raw hclk value as input rather than a DPM level index makes the macro more reusable (needed for `update_counters` which gets hclk from PMF, not from the DPM table). The removal of `typeof` guard is safe since `ndev` is only dereferenced once and all callers pass a simple pointer.

---
Generated by Claude Code Patch Reviewer

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-04-12  4:21 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-06 22:05 [PATCH V1] accel/amdxdna: Read real-time clock frequencies Lizhi Hou
2026-04-11  3:33 ` Mario Limonciello
2026-04-12  4:21 ` Claude review: " Claude Code Review Bot
2026-04-12  4:21 ` Claude Code Review Bot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox