return err;
}
-static void kfd_cleanup_node(struct kfd_dev *kfd)
+static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
{
- struct kfd_node *knode = kfd->node;
-
- device_queue_manager_uninit(knode->dqm);
- kfd_interrupt_exit(knode);
- kfd_topology_remove_device(knode);
- if (knode->gws)
- amdgpu_amdkfd_free_gws(knode->adev, knode->gws);
- kfree(knode);
- kfd->node = NULL;
+ struct kfd_node *knode;
+ unsigned int i;
+
+ for (i = 0; i < num_nodes; i++) {
+ knode = kfd->nodes[i];
+ device_queue_manager_uninit(knode->dqm);
+ kfd_interrupt_exit(knode);
+ kfd_topology_remove_device(knode);
+ if (knode->gws)
+ amdgpu_amdkfd_free_gws(knode->adev, knode->gws);
+ kfree(knode);
+ kfd->nodes[i] = NULL;
+ }
}
bool kgd2kfd_device_init(struct kfd_dev *kfd,
const struct kgd2kfd_shared_resources *gpu_resources)
{
- unsigned int size, map_process_packet_size;
+ unsigned int size, map_process_packet_size, i;
struct kfd_node *node;
uint32_t first_vmid_kfd, last_vmid_kfd, vmid_num_kfd;
unsigned int max_proc_per_quantum;
KGD_ENGINE_SDMA1);
kfd->shared_resources = *gpu_resources;
- first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
- last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
- vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1;
+ if (kfd->adev->gfx.num_xcd == 0 || kfd->adev->gfx.num_xcd == 1 ||
+ kfd->adev->gfx.num_xcc_per_xcp == 0)
+ kfd->num_nodes = 1;
+ else
+ kfd->num_nodes =
+ kfd->adev->gfx.num_xcd/kfd->adev->gfx.num_xcc_per_xcp;
+ if (kfd->num_nodes == 0) {
+ dev_err(kfd_device,
+ "KFD num nodes cannot be 0, GC inst: %d, num_xcc_in_node: %d\n",
+ kfd->adev->gfx.num_xcd, kfd->adev->gfx.num_xcc_per_xcp);
+ goto out;
+ }
/* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
* 32 and 64-bit requests are possible and must be
return false;
}
+ first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
+ last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
+ vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1;
+
+ /* For GFX9.4.3, we need special handling for VMIDs depending on
+ * partition mode.
+ * In CPX mode, the VMID range needs to be shared between XCDs.
+ * Additionally, there are 13 VMIDs (3-15) available for KFD. To
+ * divide them equally, we change starting VMID to 4 and not use
+ * VMID 3.
+ * If the VMID range changes for GFX9.4.3, then this code MUST be
+ * revisited.
+ */
+ if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
+ kfd->adev->gfx.partition_mode == AMDGPU_CPX_PARTITION_MODE &&
+ kfd->num_nodes != 1) {
+ vmid_num_kfd /= 2;
+ first_vmid_kfd = last_vmid_kfd + 1 - vmid_num_kfd*2;
+ }
+
/* Verify module parameters regarding mapped process number*/
if (hws_max_conc_proc >= 0)
max_proc_per_quantum = min((u32)hws_max_conc_proc, vmid_num_kfd);
kfd_cwsr_init(kfd);
+ /* TODO: Needs to be updated for memory partitioning */
svm_migrate_init(kfd->adev);
/* Allocate the KFD node */
node->max_proc_per_quantum = max_proc_per_quantum;
atomic_set(&node->sram_ecc_flag, 0);
- /* Initialize the KFD node */
- if (kfd_init_node(node)) {
- dev_err(kfd_device, "Error initializing KFD node\n");
- goto node_init_error;
+ dev_info(kfd_device, "Total number of KFD nodes to be created: %d\n",
+ kfd->num_nodes);
+ for (i = 0; i < kfd->num_nodes; i++) {
+ node = kzalloc(sizeof(struct kfd_node), GFP_KERNEL);
+ if (!node)
+ goto node_alloc_error;
+
+ node->adev = kfd->adev;
+ node->kfd = kfd;
+ node->kfd2kgd = kfd->kfd2kgd;
+ node->vm_info.vmid_num_kfd = vmid_num_kfd;
+ node->num_xcc_per_node = max(1U, kfd->adev->gfx.num_xcc_per_xcp);
+ node->start_xcc_id = node->num_xcc_per_node * i;
+
+ if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
+ kfd->adev->gfx.partition_mode == AMDGPU_CPX_PARTITION_MODE &&
+ kfd->num_nodes != 1) {
+ /* For GFX9.4.3 and CPX mode, first XCD gets VMID range
+ * 4-9 and second XCD gets VMID range 10-15.
+ */
+
+ node->vm_info.first_vmid_kfd = (i%2 == 0) ?
+ first_vmid_kfd :
+ first_vmid_kfd+vmid_num_kfd;
+ node->vm_info.last_vmid_kfd = (i%2 == 0) ?
+ last_vmid_kfd-vmid_num_kfd :
+ last_vmid_kfd;
+ node->compute_vmid_bitmap =
+ ((0x1 << (node->vm_info.last_vmid_kfd + 1)) - 1) -
+ ((0x1 << (node->vm_info.first_vmid_kfd)) - 1);
+ } else {
+ node->vm_info.first_vmid_kfd = first_vmid_kfd;
+ node->vm_info.last_vmid_kfd = last_vmid_kfd;
+ node->compute_vmid_bitmap =
+ gpu_resources->compute_vmid_bitmap;
+ }
+ node->max_proc_per_quantum = max_proc_per_quantum;
+ atomic_set(&node->sram_ecc_flag, 0);
+ /* Initialize the KFD node */
+ if (kfd_init_node(node)) {
+ dev_err(kfd_device, "Error initializing KFD node\n");
+ goto node_init_error;
+ }
+ kfd->nodes[i] = node;
}
- kfd->node = node;
if (kfd_resume_iommu(kfd))
goto kfd_resume_iommu_error;
goto out;
kfd_resume_iommu_error:
- kfd_cleanup_node(kfd);
node_init_error:
node_alloc_error:
+ kfd_cleanup_nodes(kfd, i);
device_iommu_error:
kfd_doorbell_fini(kfd);
kfd_doorbell_error:
void kgd2kfd_device_exit(struct kfd_dev *kfd)
{
if (kfd->init_complete) {
- kfd_cleanup_node(kfd);
+ /* Cleanup KFD nodes */
+ kfd_cleanup_nodes(kfd, kfd->num_nodes);
+ /* Cleanup common/shared resources */
kfd_doorbell_fini(kfd);
ida_destroy(&kfd->doorbell_ida);
kfd_gtt_sa_fini(kfd);
int kgd2kfd_pre_reset(struct kfd_dev *kfd)
{
- struct kfd_node *node = kfd->node;
+ struct kfd_node *node;
+ int i;
if (!kfd->init_complete)
return 0;
- kfd_smi_event_update_gpu_reset(node, false);
-
- node->dqm->ops.pre_reset(node->dqm);
+ for (i = 0; i < kfd->num_nodes; i++) {
+ node = kfd->nodes[i];
+ kfd_smi_event_update_gpu_reset(node, false);
+ node->dqm->ops.pre_reset(node->dqm);
+ }
kgd2kfd_suspend(kfd, false);
- kfd_signal_reset_event(node);
+ for (i = 0; i < kfd->num_nodes; i++)
+ kfd_signal_reset_event(kfd->nodes[i]);
+
return 0;
}
int kgd2kfd_post_reset(struct kfd_dev *kfd)
{
int ret;
- struct kfd_node *node = kfd->node;
+ struct kfd_node *node;
+ int i;
if (!kfd->init_complete)
return 0;
- ret = kfd_resume(node);
- if (ret)
- return ret;
- atomic_dec(&kfd_locked);
+ for (i = 0; i < kfd->num_nodes; i++) {
+ ret = kfd_resume(kfd->nodes[i]);
+ if (ret)
+ return ret;
+ }
- atomic_set(&node->sram_ecc_flag, 0);
+ atomic_dec(&kfd_locked);
- kfd_smi_event_update_gpu_reset(node, true);
+ for (i = 0; i < kfd->num_nodes; i++) {
+ node = kfd->nodes[i];
+ atomic_set(&node->sram_ecc_flag, 0);
+ kfd_smi_event_update_gpu_reset(node, true);
+ }
return 0;
}
void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
{
- struct kfd_node *node = kfd->node;
+ struct kfd_node *node;
+ int i;
if (!kfd->init_complete)
return;
kfd_suspend_all_processes();
}
- node->dqm->ops.stop(node->dqm);
+ for (i = 0; i < kfd->num_nodes; i++) {
+ node = kfd->nodes[i];
+ node->dqm->ops.stop(node->dqm);
+ }
kfd_iommu_suspend(kfd);
}
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
{
- int ret, count;
- struct kfd_node *node = kfd->node;
+ int ret, count, i;
if (!kfd->init_complete)
return 0;
- ret = kfd_resume(node);
- if (ret)
- return ret;
+ for (i = 0; i < kfd->num_nodes; i++) {
+ ret = kfd_resume(kfd->nodes[i]);
+ if (ret)
+ return ret;
+ }
/* for runtime resume, skip unlocking kfd */
if (!run_pm) {
/* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{
- uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
+ uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE], i;
bool is_patched = false;
unsigned long flags;
- struct kfd_node *node = kfd->node;
+ struct kfd_node *node;
if (!kfd->init_complete)
return;
return;
}
- spin_lock_irqsave(&node->interrupt_lock, flags);
-
- if (node->interrupts_active
- && interrupt_is_wanted(node, ih_ring_entry,
- patched_ihre, &is_patched)
- && enqueue_ih_ring_entry(node,
- is_patched ? patched_ihre : ih_ring_entry))
- kfd_queue_work(node->ih_wq, &node->interrupt_work);
+ for (i = 0; i < kfd->num_nodes; i++) {
+ node = kfd->nodes[i];
+ spin_lock_irqsave(&node->interrupt_lock, flags);
+
+ if (node->interrupts_active
+ && interrupt_is_wanted(node, ih_ring_entry,
+ patched_ihre, &is_patched)
+ && enqueue_ih_ring_entry(node,
+ is_patched ? patched_ihre : ih_ring_entry)) {
+ kfd_queue_work(node->ih_wq, &node->interrupt_work);
+ spin_unlock_irqrestore(&node->interrupt_lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&node->interrupt_lock, flags);
+ }
- spin_unlock_irqrestore(&node->interrupt_lock, flags);
}
int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger)
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
{
+ /*
+ * TODO: Currently update SRAM ECC flag for first node.
+ * This needs to be updated later when we can
+ * identify SRAM ECC error on other nodes also.
+ */
if (kfd)
- atomic_inc(&kfd->node->sram_ecc_flag);
+ atomic_inc(&kfd->nodes[0]->sram_ecc_flag);
}
void kfd_inc_compute_active(struct kfd_node *node)
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
{
+ /*
+ * TODO: For now, raise the throttling event only on first node.
+ * This will need to change after we are able to determine
+ * which node raised the throttling event.
+ */
if (kfd && kfd->init_complete)
- kfd_smi_event_update_thermal_throttling(kfd->node, throttle_bitmask);
+ kfd_smi_event_update_thermal_throttling(kfd->nodes[0],
+ throttle_bitmask);
}
/* kfd_get_num_sdma_engines returns the number of PCIe optimized SDMA and
kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
- 0, adev->kfd.dev->node->id, prange->prefetch_loc,
+ 0, adev->kfd.dev->nodes[0]->id, prange->prefetch_loc,
prange->preferred_loc, trigger);
r = migrate_vma_setup(&migrate);
kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
- 0, adev->kfd.dev->node->id, trigger);
+ 0, adev->kfd.dev->nodes[0]->id, trigger);
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
svm_range_free_dma_mappings(prange);
kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
- adev->kfd.dev->node->id, 0, prange->prefetch_loc,
+ adev->kfd.dev->nodes[0]->id, 0, prange->prefetch_loc,
prange->preferred_loc, trigger);
r = migrate_vma_setup(&migrate);
kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
- adev->kfd.dev->node->id, 0, trigger);
+ adev->kfd.dev->nodes[0]->id, 0, trigger);
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
unsigned long address, bool write_fault,
ktime_t ts)
{
- kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_PAGE_FAULT_START,
+ kfd_smi_event_add(pid, dev->nodes[0], KFD_SMI_EVENT_PAGE_FAULT_START,
"%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid,
- address, dev->node->id, write_fault ? 'W' : 'R');
+ address, dev->nodes[0]->id, write_fault ? 'W' : 'R');
}
void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
unsigned long address, bool migration)
{
- kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_PAGE_FAULT_END,
+ kfd_smi_event_add(pid, dev->nodes[0], KFD_SMI_EVENT_PAGE_FAULT_END,
"%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(),
- pid, address, dev->node->id, migration ? 'M' : 'U');
+ pid, address, dev->nodes[0]->id, migration ? 'M' : 'U');
}
void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid,
uint32_t prefetch_loc, uint32_t preferred_loc,
uint32_t trigger)
{
- kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_MIGRATE_START,
+ kfd_smi_event_add(pid, dev->nodes[0], KFD_SMI_EVENT_MIGRATE_START,
"%lld -%d @%lx(%lx) %x->%x %x:%x %d\n",
ktime_get_boottime_ns(), pid, start, end - start,
from, to, prefetch_loc, preferred_loc, trigger);
unsigned long start, unsigned long end,
uint32_t from, uint32_t to, uint32_t trigger)
{
- kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_MIGRATE_END,
+ kfd_smi_event_add(pid, dev->nodes[0], KFD_SMI_EVENT_MIGRATE_END,
"%lld -%d @%lx(%lx) %x->%x %d\n",
ktime_get_boottime_ns(), pid, start, end - start,
from, to, trigger);
void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid,
uint32_t trigger)
{
- kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_QUEUE_EVICTION,
+ kfd_smi_event_add(pid, dev->nodes[0], KFD_SMI_EVENT_QUEUE_EVICTION,
"%lld -%d %x %d\n", ktime_get_boottime_ns(), pid,
- dev->node->id, trigger);
+ dev->nodes[0]->id, trigger);
}
void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid)
{
- kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_QUEUE_RESTORE,
+ kfd_smi_event_add(pid, dev->nodes[0], KFD_SMI_EVENT_QUEUE_RESTORE,
"%lld -%d %x\n", ktime_get_boottime_ns(), pid,
- dev->node->id);
+ dev->nodes[0]->id);
}
void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
unsigned long address, unsigned long last,
uint32_t trigger)
{
- kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_UNMAP_FROM_GPU,
+ kfd_smi_event_add(pid, dev->nodes[0], KFD_SMI_EVENT_UNMAP_FROM_GPU,
"%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(),
- pid, address, last - address + 1, dev->node->id, trigger);
+ pid, address, last - address + 1, dev->nodes[0]->id, trigger);
}
int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd)