From 7af23ebe93fe52a30b2338450c8cd40a4f5210d4 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Wed, 8 May 2019 16:13:03 +0800 Subject: drm/amdgpu: Issue ras TA disable/enable cmd forcely on boot Check ras TA error code and return EAGAIN. Issue ras enable/disable cmd without checking currect state. Looks like ras TA will handle current state == target state case. Now driver might need do a reset to satisfy ras TA. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 34 +++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 22bd21efe6b1..5f8e1163a75d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -521,6 +521,8 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, enable ? "enable":"disable", ras_block_str(head->block), ret); + if (ret == TA_RAS_STATUS__RESET_NEEDED) + return -EAGAIN; return -EINVAL; } @@ -541,16 +543,32 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, return -EINVAL; if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) { - /* If ras is enabled by vbios, we set up ras object first in - * both case. For enable, that is all what we need do. For - * disable, we need perform a ras TA disable cmd after that. - */ - ret = __amdgpu_ras_feature_enable(adev, head, 1); - if (ret) - return ret; + if (enable) { + /* There is no harm to issue a ras TA cmd regardless of + * the currecnt ras state. + * If current state == target state, it will do nothing + * But sometimes it requests driver to reset and repost + * with error code -EAGAIN. + */ + ret = amdgpu_ras_feature_enable(adev, head, 1); + /* With old ras TA, we might fail to enable ras. + * Log it and just setup the object. + * TODO need remove this WA in the future. + */ + if (ret == -EINVAL) { + ret = __amdgpu_ras_feature_enable(adev, head, 1); + if (!ret) + DRM_INFO("RAS INFO: %s setup object\n", + ras_block_str(head->block)); + } + } else { + /* setup the object then issue a ras TA disable cmd.*/ + ret = __amdgpu_ras_feature_enable(adev, head, 1); + if (ret) + return ret; - if (!enable) ret = amdgpu_ras_feature_enable(adev, head, 0); + } } else ret = amdgpu_ras_feature_enable(adev, head, enable); -- cgit v1.2.3 From a564808e7f5b19b3621a1dc4ff2a3042171ae167 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Wed, 8 May 2019 19:12:24 +0800 Subject: drm/amdgpu: handle ras reset add another flag to allow IP do a gpu reset after device init. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 36 +++++++++++++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 +++ 2 files changed, 37 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 5f8e1163a75d..37cb3de08494 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -118,7 +118,8 @@ const char *ras_block_string[] = { #define ras_err_str(i) (ras_error_string[ffs(i)]) #define ras_block_str(i) (ras_block_string[i]) -#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1 +#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1 +#define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2 #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) static void amdgpu_ras_self_test(struct amdgpu_device *adev) @@ -1358,6 +1359,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) } /* recovery end */ +/* return 0 if ras will reset gpu and repost.*/ +int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, + unsigned int block) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (!ras) + return -EINVAL; + + ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET; + return 0; +} + /* * check hardware's ras ability which will be saved in hw_supported. * if hardware does not support ras, we can skip some ras initializtion and @@ -1433,7 +1447,12 @@ recovery_out: return -EINVAL; } -/* do some init work after IP late init as dependence */ +/* do some init work after IP late init as dependence. + * TODO + * gpu reset will re-enable ras, need fint out one way to run it again. + * for now, if a gpu reset happened, unless IP enable its ras, the ras state + * will be showed as disabled. + */ void amdgpu_ras_post_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1462,6 +1481,19 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev) } } } + + if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) { + con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET; + /* setup ras obj state as disabled. + * for init_by_vbios case. + * if we want to enable ras, just enable it in a normal way. + * If we want do disable it, need setup ras obj as enabled, + * then issue another TA disable cmd. + * See feature_enable_on_boot + */ + amdgpu_ras_disable_all_features(adev, 1); + amdgpu_ras_reset_gpu(adev, 0); + } } /* do some fini work before IP fini as dependence */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index e60a554656ca..06ef325b61b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -175,6 +175,9 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, return ras && (ras->supported & (1 << block)); } +int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, + unsigned int block); + int amdgpu_ras_query_error_count(struct amdgpu_device *adev, bool is_ce); -- cgit v1.2.3 From 466b179346094e01deccd051a215fe782b59ca68 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Tue, 7 May 2019 11:53:31 +0800 Subject: drm/amdgpu: add badpages sysfs interafce add badpages node. it will output badpages list in format gpu pfn : gpu page size : flags example 0x00000000 : 0x00001000 : R 0x00000001 : 0x00001000 : R 0x00000002 : 0x00001000 : R 0x00000003 : 0x00001000 : R 0x00000004 : 0x00001000 : R 0x00000005 : 0x00001000 : R 0x00000006 : 0x00001000 : R 0x00000007 : 0x00001000 : P 0x00000008 : 0x00001000 : P 0x00000009 : 0x00001000 : P flags can be one of below characters R: reserved. P: pending for reserve. F: failed to reserve for some reasons. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 146 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + 2 files changed, 147 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 37cb3de08494..49c71cfc7fc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -90,6 +90,12 @@ struct ras_manager { struct ras_err_data err_data; }; +struct ras_badpage { + unsigned int bp; + unsigned int size; + unsigned int flags; +}; + const char *ras_error_string[] = { "none", "parity", @@ -710,6 +716,77 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev, /* sysfs begin */ +static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, + struct ras_badpage **bps, unsigned int *count); + +static char *amdgpu_ras_badpage_flags_str(unsigned int flags) +{ + switch (flags) { + case 0: + return "R"; + case 1: + return "P"; + case 2: + default: + return "F"; + }; +} + +/* + * DOC: ras sysfs gpu_vram_bad_pages interface + * + * It allows user to read the bad pages of vram on the gpu through + * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages + * + * It outputs multiple lines, and each line stands for one gpu page. + * + * The format of one line is below, + * gpu pfn : gpu page size : flags + * + * gpu pfn and gpu page size are printed in hex format. + * flags can be one of below character, + * R: reserved, this gpu page is reserved and not able to use. + * P: pending for reserve, this gpu page is marked as bad, will be reserved + * in next window of page_reserve. + * F: unable to reserve. this gpu page can't be reserved due to some reasons. + * + * examples: + * 0x00000001 : 0x00001000 : R + * 0x00000002 : 0x00001000 : P + */ + +static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, + struct kobject *kobj, struct bin_attribute *attr, + char *buf, loff_t ppos, size_t count) +{ + struct amdgpu_ras *con = + container_of(attr, struct amdgpu_ras, badpages_attr); + struct amdgpu_device *adev = con->adev; + const unsigned int element_size = + sizeof("0xabcdabcd : 0x12345678 : R\n") - 1; + unsigned int start = (ppos + element_size - 1) / element_size; + unsigned int end = (ppos + count - 1) / element_size; + ssize_t s = 0; + struct ras_badpage *bps = NULL; + unsigned int bps_count = 0; + + memset(buf, 0, count); + + if (amdgpu_ras_badpages_read(adev, &bps, &bps_count)) + return 0; + + for (; start < end && start < bps_count; start++) + s += scnprintf(&buf[s], element_size + 1, + "0x%08x : 0x%08x : %1s\n", + bps[start].bp, + bps[start].size, + amdgpu_ras_badpage_flags_str(bps[start].flags)); + + kfree(bps); + + return s; +} + static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, struct device_attribute *attr, char *buf) { @@ -750,9 +827,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) &con->features_attr.attr, NULL }; + struct bin_attribute *bin_attrs[] = { + &con->badpages_attr, + NULL + }; struct attribute_group group = { .name = "ras", .attrs = attrs, + .bin_attrs = bin_attrs, }; con->features_attr = (struct device_attribute) { @@ -762,7 +844,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) }, .show = amdgpu_ras_sysfs_features_read, }; + + con->badpages_attr = (struct bin_attribute) { + .attr = { + .name = "gpu_vram_bad_pages", + .mode = S_IRUGO, + }, + .size = 0, + .private = NULL, + .read = amdgpu_ras_sysfs_badpages_read, + }; + sysfs_attr_init(attrs[0]); + sysfs_bin_attr_init(bin_attrs[0]); return sysfs_create_group(&adev->dev->kobj, &group); } @@ -774,9 +868,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev) &con->features_attr.attr, NULL }; + struct bin_attribute *bin_attrs[] = { + &con->badpages_attr, + NULL + }; struct attribute_group group = { .name = "ras", .attrs = attrs, + .bin_attrs = bin_attrs, }; sysfs_remove_group(&adev->dev->kobj, &group); @@ -1108,6 +1207,53 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev) /* ih end */ /* recovery begin */ + +/* return 0 on success. + * caller need free bps. + */ +static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, + struct ras_badpage **bps, unsigned int *count) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + int i = 0; + int ret = 0; + + if (!con || !con->eh_data || !bps || !count) + return -EINVAL; + + mutex_lock(&con->recovery_lock); + data = con->eh_data; + if (!data || data->count == 0) { + *bps = NULL; + goto out; + } + + *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL); + if (!*bps) { + ret = -ENOMEM; + goto out; + } + + for (; i < data->count; i++) { + (*bps)[i] = (struct ras_badpage){ + .bp = data->bps[i].bp, + .size = AMDGPU_GPU_PAGE_SIZE, + .flags = 0, + }; + + if (data->last_reserved <= i) + (*bps)[i].flags = 1; + else if (data->bps[i].bo == NULL) + (*bps)[i].flags = 2; + } + + *count = data->count; +out: + mutex_unlock(&con->recovery_lock); + return ret; +} + static void amdgpu_ras_do_recovery(struct work_struct *work) { struct amdgpu_ras *ras = diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 06ef325b61b8..59994ee00855 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -93,6 +93,7 @@ struct amdgpu_ras { struct dentry *ent; /* sysfs */ struct device_attribute features_attr; + struct bin_attribute badpages_attr; /* block array */ struct ras_manager *objs; -- cgit v1.2.3 From 511fdbc33aaa4758f7c445183ff840e251c0b427 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Thu, 9 May 2019 08:26:27 +0800 Subject: drm/amdgpu: ras support suspend/resume add ras suspend function. rename ras_post_init to amdgpu_ras_resume. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Reviewed-by: James Zhu Tested-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 20 +++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 4 +++- 3 files changed, 20 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 309461d0c275..da120fe330be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2745,7 +2745,7 @@ fence_driver_init: } /* must succeed. */ - amdgpu_ras_post_init(adev); + amdgpu_ras_resume(adev); r = device_create_file(adev->dev, &dev_attr_pcie_replay_count); if (r) { @@ -3503,7 +3503,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, goto out; /* must succeed. */ - amdgpu_ras_post_init(tmp_adev); + amdgpu_ras_resume(tmp_adev); /* Update PSP FW topology after reset */ if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 49c71cfc7fc6..da1dc40b9b14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1594,12 +1594,9 @@ recovery_out: } /* do some init work after IP late init as dependence. - * TODO - * gpu reset will re-enable ras, need fint out one way to run it again. - * for now, if a gpu reset happened, unless IP enable its ras, the ras state - * will be showed as disabled. + * and it runs in resume/gpu reset/booting up cases. */ -void amdgpu_ras_post_init(struct amdgpu_device *adev) +void amdgpu_ras_resume(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj, *tmp; @@ -1642,6 +1639,19 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev) } } +void amdgpu_ras_suspend(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!con) + return; + + amdgpu_ras_disable_all_features(adev, 0); + /* Make sure all ras objects are disabled. */ + if (con->features) + amdgpu_ras_disable_all_features(adev, 1); +} + /* do some fini work before IP fini as dependence */ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 59994ee00855..c6b34fbd695f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -179,6 +179,9 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, unsigned int block); +void amdgpu_ras_resume(struct amdgpu_device *adev); +void amdgpu_ras_suspend(struct amdgpu_device *adev); + int amdgpu_ras_query_error_count(struct amdgpu_device *adev, bool is_ce); @@ -256,7 +259,6 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) { /* called in ip_init and ip_fini */ int amdgpu_ras_init(struct amdgpu_device *adev); -void amdgpu_ras_post_init(struct amdgpu_device *adev); int amdgpu_ras_fini(struct amdgpu_device *adev); int amdgpu_ras_pre_fini(struct amdgpu_device *adev); -- cgit v1.2.3 From d6ee400e793f0ae6c9f5926bea9fbb362a950d96 Mon Sep 17 00:00:00 2001 From: Slava Abramov Date: Thu, 16 May 2019 16:17:53 -0400 Subject: drm/amdgpu: use div64_ul for 32-bit compatibility v1 v1: replace casting to unsigned long with div64_ul Acked-by: Alex Deucher Signed-off-by: Slava Abramov Tested-by: Slava Abramov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index da1dc40b9b14..d5719b0fb82c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -764,8 +764,8 @@ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, struct amdgpu_device *adev = con->adev; const unsigned int element_size = sizeof("0xabcdabcd : 0x12345678 : R\n") - 1; - unsigned int start = (ppos + element_size - 1) / element_size; - unsigned int end = (ppos + count - 1) / element_size; + unsigned int start = div64_ul(ppos + element_size - 1, element_size); + unsigned int end = div64_ul(ppos + count - 1, element_size); ssize_t s = 0; struct ras_badpage *bps = NULL; unsigned int bps_count = 0; -- cgit v1.2.3 From 74abc2210e105f0fffe59c35d2329201f1b4310e Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Fri, 24 May 2019 09:21:54 -0400 Subject: drm/amd/doc: Add RAS documentation to guide Acked-by: Slava Abramov Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu.rst | 11 +++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst index cacfcfad2356..86138798128f 100644 --- a/Documentation/gpu/amdgpu.rst +++ b/Documentation/gpu/amdgpu.rst @@ -79,6 +79,17 @@ AMDGPU XGMI Support .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c :internal: +AMDGPU RAS debugfs control interface +==================================== + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c + :doc: AMDGPU RAS debugfs control interface + + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c + :internal: + + GPU Power/Thermal Controls and Monitoring ========================================= diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index d5719b0fb82c..7c8a4aedf07c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -244,8 +244,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return 0; } -/* - * DOC: ras debugfs control interface +/** + * DOC: AMDGPU RAS debugfs control interface * * It accepts struct ras_debug_if who has two members. * -- cgit v1.2.3 From efb426d581288c40263829dda43a7229f1125e06 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Tue, 28 May 2019 14:47:31 +0800 Subject: drm/amdgpu: ras injection use gpu address injection need a valid gpu address. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 7c8a4aedf07c..011630f62f85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -128,6 +128,12 @@ const char *ras_block_string[] = { #define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2 #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) +static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev, + uint64_t offset, uint64_t size, + struct amdgpu_bo **bo_ptr); +static int amdgpu_ras_release_vram(struct amdgpu_device *adev, + struct amdgpu_bo **bo_ptr); + static void amdgpu_ras_self_test(struct amdgpu_device *adev) { /* TODO */ @@ -307,6 +313,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * { struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; struct ras_debug_if data; + struct amdgpu_bo *bo; int ret = 0; ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data); @@ -324,7 +331,16 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * ret = amdgpu_ras_feature_enable(adev, &data.head, 1); break; case 2: + ret = amdgpu_ras_reserve_vram(adev, + data.inject.address, PAGE_SIZE, &bo); + /* This address might be used already on failure. In fact we can + * perform an injection in such case. + */ + if (ret) + break; + data.inject.address = amdgpu_bo_gpu_offset(bo); ret = amdgpu_ras_error_inject(adev, &data.inject); + amdgpu_ras_release_vram(adev, &bo); break; default: ret = -EINVAL; -- cgit v1.2.3 From f867723b41f871c88388462c007976bb9a4c72da Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Mon, 10 Jun 2019 00:07:51 +0200 Subject: drm/amd: drop use of drmP.h in amdgpu.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delete the unused drmP.h from amdgpu.h. Fix fallout in various files. Signed-off-by: Sam Ravnborg Reviewed-by: Alex Deucher Cc: "Christian König" Cc: "David (ChunMing) Zhou" Cc: David Airlie Cc: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20190609220757.10862-5-sam@ravnborg.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | 3 +++ drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 2 ++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c | 1 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 4 +++- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 1 + drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 1 + drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c | 1 + drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c | 1 + drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c | 1 + drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c | 2 ++ drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c | 2 ++ drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c | 1 + drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c | 2 ++ 19 files changed, 38 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 14398f55f602..fbec83bfb4ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -44,9 +44,9 @@ #include #include -#include -#include #include +#include +#include #include #include diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 22bd21efe6b1..4fea7f835506 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -24,6 +24,8 @@ #include #include #include +#include + #include "amdgpu.h" #include "amdgpu_ras.h" #include "amdgpu_atomfirmware.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 639297250c21..c799691dfa84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -23,8 +23,11 @@ */ #include +#include #include + #include + #include "amdgpu.h" #include "amdgpu_vm.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 7d484fad3909..01f88269a7f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -21,6 +21,10 @@ * */ +#include + +#include + #include "amdgpu.h" bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 4f761ebb655e..91f10995249b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -21,6 +21,8 @@ * */ #include +#include + #include "amdgpu.h" #include "amdgpu_ih.h" #include "amdgpu_gfx.h" diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3b7370d914a5..51bbf773b4f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -20,8 +20,12 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ + #include +#include + #include + #include "amdgpu.h" #include "gmc_v9_0.h" #include "amdgpu_atomfirmware.h" diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 77c2bc344dfc..ce1ea31feee0 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -24,6 +24,9 @@ */ #include +#include +#include + #include "amdgpu.h" #include "amdgpu_psp.h" #include "amdgpu_ucode.h" diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index b91df7bd1d98..b1e7aca72578 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -21,6 +21,8 @@ */ #include +#include + #include "amdgpu.h" #include "amdgpu_psp.h" #include "amdgpu_ucode.h" diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index a10e3a50d9ef..bc67e6502733 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -24,6 +24,7 @@ */ #include +#include #include "amdgpu.h" #include "amdgpu_dm.h" diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 1d5fc5ad3bee..e611b5376d8c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -23,7 +23,9 @@ * */ -#include +#include + +#include #include "dc.h" #include "amdgpu.h" diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 048757e8f494..8f81c25c523d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 384c37875cd0..8e05c14d2d0d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "hwmgr.h" diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c index b6767d74dc85..b12100de3084 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c @@ -21,6 +21,7 @@ * */ #include +#include #include #include diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 669bd0c2a16c..46858b901d05 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -25,6 +25,7 @@ #include #include "linux/delay.h" #include +#include #include "smumgr.h" #include "pp_debug.h" diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c index 375ccf6ff5f2..0eee7b2f8b9a 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c @@ -25,6 +25,7 @@ #include "pp_debug.h" #include #include +#include #include #include diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index 2d4cfe14f72e..477c1c870591 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -21,6 +21,8 @@ * */ +#include + #include "pp_debug.h" #include "smumgr.h" #include "smu74.h" diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c index 6d11076a79ba..d409925d1f7d 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c @@ -21,6 +21,8 @@ * */ +#include + #include "smumgr.h" #include "smu10_inc.h" #include "soc15_common.h" diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c index 3ed6c5f1e5cf..8f6f2808094a 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c @@ -23,6 +23,7 @@ #include "pp_debug.h" #include #include +#include #include #include diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c index c81acc3192ad..672986e9eecb 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c @@ -21,6 +21,8 @@ * */ +#include + #include "smumgr.h" #include "vega10_inc.h" #include "soc15_common.h" -- cgit v1.2.3 From 450f30ea9c60228e87cc60647473e2c1eb55df0b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 13 Jun 2019 15:19:19 +0200 Subject: amdgpu: no need to check return value of debugfs_create functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Alex Deucher Cc: "Christian König" Cc: "David (ChunMing) Zhou" Cc: David Airlie Cc: Daniel Vetter Cc: xinhui pan Cc: Evan Quan Cc: Feifei Xu Cc: amd-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 51 ++++++++------------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 4 +-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 5 +--- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 5 +--- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 5 +--- 5 files changed, 17 insertions(+), 53 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 011630f62f85..7efc4e07665d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -967,40 +967,24 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev) /* sysfs end */ /* debugfs begin */ -static int amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) +static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct drm_minor *minor = adev->ddev->primary; - struct dentry *root = minor->debugfs_root, *dir; - struct dentry *ent; - - dir = debugfs_create_dir("ras", root); - if (IS_ERR(dir)) - return -EINVAL; - con->dir = dir; - - ent = debugfs_create_file("ras_ctrl", - S_IWUGO | S_IRUGO, con->dir, - adev, &amdgpu_ras_debugfs_ctrl_ops); - if (IS_ERR(ent)) { - debugfs_remove(con->dir); - return -EINVAL; - } - - con->ent = ent; - return 0; + con->dir = debugfs_create_dir("ras", minor->debugfs_root); + con->ent = debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir, + adev, &amdgpu_ras_debugfs_ctrl_ops); } -int amdgpu_ras_debugfs_create(struct amdgpu_device *adev, +void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, struct ras_fs_if *head) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head); - struct dentry *ent; if (!obj || obj->ent) - return -EINVAL; + return; get_obj(obj); @@ -1008,34 +992,25 @@ int amdgpu_ras_debugfs_create(struct amdgpu_device *adev, head->debugfs_name, sizeof(obj->fs_data.debugfs_name)); - ent = debugfs_create_file(obj->fs_data.debugfs_name, - S_IWUGO | S_IRUGO, con->dir, - obj, &amdgpu_ras_debugfs_ops); - - if (IS_ERR(ent)) - return -EINVAL; - - obj->ent = ent; - - return 0; + obj->ent = debugfs_create_file(obj->fs_data.debugfs_name, + S_IWUGO | S_IRUGO, con->dir, obj, + &amdgpu_ras_debugfs_ops); } -int amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, +void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, struct ras_common_if *head) { struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); if (!obj || !obj->ent) - return 0; + return; debugfs_remove(obj->ent); obj->ent = NULL; put_obj(obj); - - return 0; } -static int amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) +static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj, *tmp; @@ -1048,8 +1023,6 @@ static int amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) debugfs_remove(con->dir); con->dir = NULL; con->ent = NULL; - - return 0; } /* debugfs end */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 94c652f5265a..b2841195bd3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -276,10 +276,10 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, struct ras_common_if *head); -int amdgpu_ras_debugfs_create(struct amdgpu_device *adev, +void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, struct ras_fs_if *head); -int amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, +void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, struct ras_common_if *head); int amdgpu_ras_error_query(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ba36a28da2fa..a3f1490f7307 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3904,9 +3904,7 @@ static int gfx_v9_0_ecc_late_init(void *handle) if (r) goto interrupt; - r = amdgpu_ras_debugfs_create(adev, &fs_info); - if (r) - goto debugfs; + amdgpu_ras_debugfs_create(adev, &fs_info); r = amdgpu_ras_sysfs_create(adev, &fs_info); if (r) @@ -3921,7 +3919,6 @@ irq: amdgpu_ras_sysfs_remove(adev, *ras_if); sysfs: amdgpu_ras_debugfs_remove(adev, *ras_if); -debugfs: amdgpu_ras_interrupt_remove_handler(adev, &ih_info); interrupt: amdgpu_ras_feature_enable(adev, *ras_if, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 602593bab7a7..3c23de4b2e48 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -730,9 +730,7 @@ static int gmc_v9_0_ecc_late_init(void *handle) if (r) goto interrupt; - r = amdgpu_ras_debugfs_create(adev, &fs_info); - if (r) - goto debugfs; + amdgpu_ras_debugfs_create(adev, &fs_info); r = amdgpu_ras_sysfs_create(adev, &fs_info); if (r) @@ -747,7 +745,6 @@ irq: amdgpu_ras_sysfs_remove(adev, *ras_if); sysfs: amdgpu_ras_debugfs_remove(adev, *ras_if); -debugfs: amdgpu_ras_interrupt_remove_handler(adev, &ih_info); interrupt: amdgpu_ras_feature_enable(adev, *ras_if, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 7a259c5b6c62..c0b6011b4bd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1566,9 +1566,7 @@ static int sdma_v4_0_late_init(void *handle) if (r) goto interrupt; - r = amdgpu_ras_debugfs_create(adev, &fs_info); - if (r) - goto debugfs; + amdgpu_ras_debugfs_create(adev, &fs_info); r = amdgpu_ras_sysfs_create(adev, &fs_info); if (r) @@ -1589,7 +1587,6 @@ irq: amdgpu_ras_sysfs_remove(adev, *ras_if); sysfs: amdgpu_ras_debugfs_remove(adev, *ras_if); -debugfs: amdgpu_ras_interrupt_remove_handler(adev, &ih_info); interrupt: amdgpu_ras_feature_enable(adev, *ras_if, 0); -- cgit v1.2.3 From acb05f0a3f6261b330034e14e4d2c6b4bc896504 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Fri, 14 Jun 2019 16:06:10 +0800 Subject: drm/amdgpu: Do error injection even vram reserve fails As long as the address is mapped with vram, we can do an error injection. Signed-off-by: xinhui pan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 7efc4e07665d..614116c7036a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -333,12 +333,13 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * case 2: ret = amdgpu_ras_reserve_vram(adev, data.inject.address, PAGE_SIZE, &bo); - /* This address might be used already on failure. In fact we can - * perform an injection in such case. - */ - if (ret) - break; - data.inject.address = amdgpu_bo_gpu_offset(bo); + if (ret) { + /* address was offset, now it is absolute.*/ + data.inject.address += adev->gmc.vram_start; + if (data.inject.address > adev->gmc.vram_end) + break; + } else + data.inject.address = amdgpu_bo_gpu_offset(bo); ret = amdgpu_ras_error_inject(adev, &data.inject); amdgpu_ras_release_vram(adev, &bo); break; -- cgit v1.2.3 From 5f872b723a451a26ad0f1d29541df9de5d23529d Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 18 Jul 2019 12:49:15 +0800 Subject: drm/amdgpu: do not create ras debugfs/sysfs node for ASICs that don't have ras ability driver shouldn't init any ras debugfs/sysfs node for ASICs that don't have ras hardware ability Signed-off-by: Hawking Zhang Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 1a4412e47810..3a9ece450b31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1557,6 +1557,12 @@ int amdgpu_ras_init(struct amdgpu_device *adev) amdgpu_ras_check_supported(adev, &con->hw_supported, &con->supported); + if (!con->hw_supported) { + amdgpu_ras_set_context(adev, NULL); + kfree(con); + return 0; + } + con->features = 0; INIT_LIST_HEAD(&con->head); /* Might need get this flag from vbios. */ -- cgit v1.2.3 From 29bd650809225f51ba475c556f43e53e392c44e3 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 18 Jul 2019 13:59:38 +0800 Subject: drm/amdgpu: only allow error injection to UMC IP block error injection to other IP blocks (except UMC) will be enabled until RAS feature stablize on those IP blocks Signed-off-by: Hawking Zhang Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 3a9ece450b31..fc346eb1aacd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -689,6 +689,12 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, if (!obj) return -EINVAL; + if (block_info.block_id != TA_RAS_BLOCK__UMC) { + DRM_INFO("%s error injection is not supported yet\n", + ras_block_str(info->head.block)); + return -EINVAL; + } + ret = psp_ras_trigger_error(&adev->psp, &block_info); if (ret) DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n", -- cgit v1.2.3 From 578a4daa1cd61f9783b5d0f566d6ec0a2cb9f6a3 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 18 Jul 2019 16:03:46 +0800 Subject: drm/amdgpu: drop ras self test this function is not needed any more. error injection is the only way to validate ras but it can't be executed in amdgpu_ras_init, where gpu is even not initialized Signed-off-by: Hawking Zhang Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index fc346eb1aacd..fac7aa2c244f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -136,11 +136,6 @@ static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev, static int amdgpu_ras_release_vram(struct amdgpu_device *adev, struct amdgpu_bo **bo_ptr); -static void amdgpu_ras_self_test(struct amdgpu_device *adev) -{ - /* TODO */ -} - static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -1582,8 +1577,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev) if (amdgpu_ras_fs_init(adev)) goto fs_out; - amdgpu_ras_self_test(adev); - DRM_INFO("RAS INFO: ras initialized successfully, " "hardware ability[%x] ras_mask[%x]\n", con->hw_supported, con->supported); -- cgit v1.2.3