11a59d1b8SThomas Gleixner /* SPDX-License-Identifier: GPL-2.0-or-later */ 213d19498SJack Steiner /* 313d19498SJack Steiner * SN Platform GRU Driver 413d19498SJack Steiner * 513d19498SJack Steiner * GRU DRIVER TABLES, MACROS, externs, etc 613d19498SJack Steiner * 713d19498SJack Steiner * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 813d19498SJack Steiner */ 913d19498SJack Steiner 1013d19498SJack Steiner #ifndef __GRUTABLES_H__ 1113d19498SJack Steiner #define __GRUTABLES_H__ 1213d19498SJack Steiner 1313d19498SJack Steiner /* 149ca8e40cSJack Steiner * GRU Chiplet: 159ca8e40cSJack Steiner * The GRU is a user addressible memory accelerator. It provides 169ca8e40cSJack Steiner * several forms of load, store, memset, bcopy instructions. In addition, it 179ca8e40cSJack Steiner * contains special instructions for AMOs, sending messages to message 189ca8e40cSJack Steiner * queues, etc. 199ca8e40cSJack Steiner * 209ca8e40cSJack Steiner * The GRU is an integral part of the node controller. It connects 219ca8e40cSJack Steiner * directly to the cpu socket. In its current implementation, there are 2 229ca8e40cSJack Steiner * GRU chiplets in the node controller on each blade (~node). 239ca8e40cSJack Steiner * 249ca8e40cSJack Steiner * The entire GRU memory space is fully coherent and cacheable by the cpus. 259ca8e40cSJack Steiner * 269ca8e40cSJack Steiner * Each GRU chiplet has a physical memory map that looks like the following: 279ca8e40cSJack Steiner * 289ca8e40cSJack Steiner * +-----------------+ 299ca8e40cSJack Steiner * |/////////////////| 309ca8e40cSJack Steiner * |/////////////////| 319ca8e40cSJack Steiner * |/////////////////| 329ca8e40cSJack Steiner * |/////////////////| 339ca8e40cSJack Steiner * |/////////////////| 349ca8e40cSJack Steiner * |/////////////////| 359ca8e40cSJack Steiner * |/////////////////| 369ca8e40cSJack Steiner * |/////////////////| 379ca8e40cSJack Steiner * +-----------------+ 389ca8e40cSJack Steiner * | system control | 399ca8e40cSJack Steiner * +-----------------+ _______ +-------------+ 409ca8e40cSJack Steiner * |/////////////////| / | | 419ca8e40cSJack Steiner * |/////////////////| / | | 429ca8e40cSJack Steiner * |/////////////////| / | instructions| 439ca8e40cSJack Steiner * |/////////////////| / | | 449ca8e40cSJack Steiner * |/////////////////| / | | 459ca8e40cSJack Steiner * |/////////////////| / |-------------| 469ca8e40cSJack Steiner * |/////////////////| / | | 479ca8e40cSJack Steiner * +-----------------+ | | 489ca8e40cSJack Steiner * | context 15 | | data | 499ca8e40cSJack Steiner * +-----------------+ | | 509ca8e40cSJack Steiner * | ...... | \ | | 519ca8e40cSJack Steiner * +-----------------+ \____________ +-------------+ 529ca8e40cSJack Steiner * | context 1 | 539ca8e40cSJack Steiner * +-----------------+ 549ca8e40cSJack Steiner * | context 0 | 559ca8e40cSJack Steiner * +-----------------+ 569ca8e40cSJack Steiner * 579ca8e40cSJack Steiner * Each of the "contexts" is a chunk of memory that can be mmaped into user 589ca8e40cSJack Steiner * space. The context consists of 2 parts: 599ca8e40cSJack Steiner * 609ca8e40cSJack Steiner * - an instruction space that can be directly accessed by the user 619ca8e40cSJack Steiner * to issue GRU instructions and to check instruction status. 629ca8e40cSJack Steiner * 639ca8e40cSJack Steiner * - a data area that acts as normal RAM. 649ca8e40cSJack Steiner * 659ca8e40cSJack Steiner * User instructions contain virtual addresses of data to be accessed by the 669ca8e40cSJack Steiner * GRU. The GRU contains a TLB that is used to convert these user virtual 679ca8e40cSJack Steiner * addresses to physical addresses. 689ca8e40cSJack Steiner * 699ca8e40cSJack Steiner * The "system control" area of the GRU chiplet is used by the kernel driver 709ca8e40cSJack Steiner * to manage user contexts and to perform functions such as TLB dropin and 719ca8e40cSJack Steiner * purging. 729ca8e40cSJack Steiner * 739ca8e40cSJack Steiner * One context may be reserved for the kernel and used for cross-partition 749ca8e40cSJack Steiner * communication. The GRU will also be used to asynchronously zero out 759ca8e40cSJack Steiner * large blocks of memory (not currently implemented). 769ca8e40cSJack Steiner * 779ca8e40cSJack Steiner * 7813d19498SJack Steiner * Tables: 7913d19498SJack Steiner * 8013d19498SJack Steiner * VDATA-VMA Data - Holds a few parameters. Head of linked list of 8113d19498SJack Steiner * GTS tables for threads using the GSEG 8213d19498SJack Steiner * GTS - Gru Thread State - contains info for managing a GSEG context. A 8313d19498SJack Steiner * GTS is allocated for each thread accessing a 8413d19498SJack Steiner * GSEG. 8513d19498SJack Steiner * GTD - GRU Thread Data - contains shadow copy of GRU data when GSEG is 8613d19498SJack Steiner * not loaded into a GRU 8713d19498SJack Steiner * GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs 8813d19498SJack Steiner * where a GSEG has been loaded. Similar to 8913d19498SJack Steiner * an mm_struct but for GRU. 9013d19498SJack Steiner * 9113d19498SJack Steiner * GS - GRU State - Used to manage the state of a GRU chiplet 9213d19498SJack Steiner * BS - Blade State - Used to manage state of all GRU chiplets 9313d19498SJack Steiner * on a blade 9413d19498SJack Steiner * 9513d19498SJack Steiner * 9613d19498SJack Steiner * Normal task tables for task using GRU. 9713d19498SJack Steiner * - 2 threads in process 9813d19498SJack Steiner * - 2 GSEGs open in process 9913d19498SJack Steiner * - GSEG1 is being used by both threads 10013d19498SJack Steiner * - GSEG2 is used only by thread 2 10113d19498SJack Steiner * 10213d19498SJack Steiner * task -->| 10313d19498SJack Steiner * task ---+---> mm ->------ (notifier) -------+-> gms 10413d19498SJack Steiner * | | 10513d19498SJack Steiner * |--> vma -> vdata ---> gts--->| GSEG1 (thread1) 10613d19498SJack Steiner * | | | 10713d19498SJack Steiner * | +-> gts--->| GSEG1 (thread2) 10813d19498SJack Steiner * | | 10913d19498SJack Steiner * |--> vma -> vdata ---> gts--->| GSEG2 (thread2) 11013d19498SJack Steiner * . 11113d19498SJack Steiner * . 11213d19498SJack Steiner * 11313d19498SJack Steiner * GSEGs are marked DONTCOPY on fork 11413d19498SJack Steiner * 11513d19498SJack Steiner * At open 11613d19498SJack Steiner * file.private_data -> NULL 11713d19498SJack Steiner * 11813d19498SJack Steiner * At mmap, 11913d19498SJack Steiner * vma -> vdata 12013d19498SJack Steiner * 12113d19498SJack Steiner * After gseg reference 12213d19498SJack Steiner * vma -> vdata ->gts 12313d19498SJack Steiner * 12413d19498SJack Steiner * After fork 12513d19498SJack Steiner * parent 12613d19498SJack Steiner * vma -> vdata -> gts 12713d19498SJack Steiner * child 12813d19498SJack Steiner * (vma is not copied) 12913d19498SJack Steiner * 13013d19498SJack Steiner */ 13113d19498SJack Steiner 132*03acb0c5SXiyu Yang #include <linux/refcount.h> 13313d19498SJack Steiner #include <linux/rmap.h> 13413d19498SJack Steiner #include <linux/interrupt.h> 13513d19498SJack Steiner #include <linux/mutex.h> 13613d19498SJack Steiner #include <linux/wait.h> 13713d19498SJack Steiner #include <linux/mmu_notifier.h> 1381770a80fSSouptick Joarder #include <linux/mm_types.h> 13913d19498SJack Steiner #include "gru.h" 1407e796a72SJack Steiner #include "grulib.h" 14113d19498SJack Steiner #include "gruhandles.h" 14213d19498SJack Steiner 14313d19498SJack Steiner extern struct gru_stats_s gru_stats; 14413d19498SJack Steiner extern struct gru_blade_state *gru_base[]; 14513d19498SJack Steiner extern unsigned long gru_start_paddr, gru_end_paddr; 1461a2c09e3SJack Steiner extern void *gru_start_vaddr; 147e1c3219dSJack Steiner extern unsigned int gru_max_gids; 14813d19498SJack Steiner 14913d19498SJack Steiner #define GRU_MAX_BLADES MAX_NUMNODES 15013d19498SJack Steiner #define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE) 15113d19498SJack Steiner 15213d19498SJack Steiner #define GRU_DRIVER_ID_STR "SGI GRU Device Driver" 153e5ae6e82SJack Steiner #define GRU_DRIVER_VERSION_STR "0.85" 15413d19498SJack Steiner 15513d19498SJack Steiner /* 15613d19498SJack Steiner * GRU statistics. 15713d19498SJack Steiner */ 15813d19498SJack Steiner struct gru_stats_s { 15913d19498SJack Steiner atomic_long_t vdata_alloc; 16013d19498SJack Steiner atomic_long_t vdata_free; 16113d19498SJack Steiner atomic_long_t gts_alloc; 16213d19498SJack Steiner atomic_long_t gts_free; 163563447d7SJack Steiner atomic_long_t gms_alloc; 164563447d7SJack Steiner atomic_long_t gms_free; 16513d19498SJack Steiner atomic_long_t gts_double_allocate; 16613d19498SJack Steiner atomic_long_t assign_context; 16713d19498SJack Steiner atomic_long_t assign_context_failed; 16813d19498SJack Steiner atomic_long_t free_context; 169836ce679SJack Steiner atomic_long_t load_user_context; 170836ce679SJack Steiner atomic_long_t load_kernel_context; 171836ce679SJack Steiner atomic_long_t lock_kernel_context; 172836ce679SJack Steiner atomic_long_t unlock_kernel_context; 173836ce679SJack Steiner atomic_long_t steal_user_context; 174836ce679SJack Steiner atomic_long_t steal_kernel_context; 17513d19498SJack Steiner atomic_long_t steal_context_failed; 17613d19498SJack Steiner atomic_long_t nopfn; 17713d19498SJack Steiner atomic_long_t asid_new; 17813d19498SJack Steiner atomic_long_t asid_next; 17913d19498SJack Steiner atomic_long_t asid_wrap; 18013d19498SJack Steiner atomic_long_t asid_reuse; 18113d19498SJack Steiner atomic_long_t intr; 182563447d7SJack Steiner atomic_long_t intr_cbr; 183563447d7SJack Steiner atomic_long_t intr_tfh; 1842ce4d4c9SJack Steiner atomic_long_t intr_spurious; 18543884604SJack Steiner atomic_long_t intr_mm_lock_failed; 18613d19498SJack Steiner atomic_long_t call_os; 18713d19498SJack Steiner atomic_long_t call_os_wait_queue; 18813d19498SJack Steiner atomic_long_t user_flush_tlb; 18913d19498SJack Steiner atomic_long_t user_unload_context; 19013d19498SJack Steiner atomic_long_t user_exception; 19192b39388SJack Steiner atomic_long_t set_context_option; 19255484c45SJack Steiner atomic_long_t check_context_retarget_intr; 19355484c45SJack Steiner atomic_long_t check_context_unload; 19413d19498SJack Steiner atomic_long_t tlb_dropin; 195c550222fSJack Steiner atomic_long_t tlb_preload_page; 19613d19498SJack Steiner atomic_long_t tlb_dropin_fail_no_asid; 19713d19498SJack Steiner atomic_long_t tlb_dropin_fail_upm; 19813d19498SJack Steiner atomic_long_t tlb_dropin_fail_invalid; 19913d19498SJack Steiner atomic_long_t tlb_dropin_fail_range_active; 20013d19498SJack Steiner atomic_long_t tlb_dropin_fail_idle; 20113d19498SJack Steiner atomic_long_t tlb_dropin_fail_fmm; 202cd1334f0SJack Steiner atomic_long_t tlb_dropin_fail_no_exception; 203270952a9SJack Steiner atomic_long_t tfh_stale_on_fault; 20413d19498SJack Steiner atomic_long_t mmu_invalidate_range; 20513d19498SJack Steiner atomic_long_t mmu_invalidate_page; 20613d19498SJack Steiner atomic_long_t flush_tlb; 20713d19498SJack Steiner atomic_long_t flush_tlb_gru; 20813d19498SJack Steiner atomic_long_t flush_tlb_gru_tgh; 20913d19498SJack Steiner atomic_long_t flush_tlb_gru_zero_asid; 21013d19498SJack Steiner 21113d19498SJack Steiner atomic_long_t copy_gpa; 212289750d1SRobin Holt atomic_long_t read_gpa; 21313d19498SJack Steiner 21413d19498SJack Steiner atomic_long_t mesq_receive; 21513d19498SJack Steiner atomic_long_t mesq_receive_none; 21613d19498SJack Steiner atomic_long_t mesq_send; 21713d19498SJack Steiner atomic_long_t mesq_send_failed; 21813d19498SJack Steiner atomic_long_t mesq_noop; 21913d19498SJack Steiner atomic_long_t mesq_send_unexpected_error; 22013d19498SJack Steiner atomic_long_t mesq_send_lb_overflow; 22113d19498SJack Steiner atomic_long_t mesq_send_qlimit_reached; 22213d19498SJack Steiner atomic_long_t mesq_send_amo_nacked; 22313d19498SJack Steiner atomic_long_t mesq_send_put_nacked; 224563447d7SJack Steiner atomic_long_t mesq_page_overflow; 22513d19498SJack Steiner atomic_long_t mesq_qf_locked; 22613d19498SJack Steiner atomic_long_t mesq_qf_noop_not_full; 22713d19498SJack Steiner atomic_long_t mesq_qf_switch_head_failed; 22813d19498SJack Steiner atomic_long_t mesq_qf_unexpected_error; 22913d19498SJack Steiner atomic_long_t mesq_noop_unexpected_error; 23013d19498SJack Steiner atomic_long_t mesq_noop_lb_overflow; 23113d19498SJack Steiner atomic_long_t mesq_noop_qlimit_reached; 23213d19498SJack Steiner atomic_long_t mesq_noop_amo_nacked; 23313d19498SJack Steiner atomic_long_t mesq_noop_put_nacked; 234563447d7SJack Steiner atomic_long_t mesq_noop_page_overflow; 23513d19498SJack Steiner 23613d19498SJack Steiner }; 23713d19498SJack Steiner 238a24e5e1cSJack Steiner enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync, 239c550222fSJack Steiner cchop_deallocate, tfhop_write_only, tfhop_write_restart, 240c550222fSJack Steiner tghop_invalidate, mcsop_last}; 241a24e5e1cSJack Steiner 242e56484daSJack Steiner struct mcs_op_statistic { 243e56484daSJack Steiner atomic_long_t count; 244e56484daSJack Steiner atomic_long_t total; 245e56484daSJack Steiner unsigned long max; 246e56484daSJack Steiner }; 247e56484daSJack Steiner 248e56484daSJack Steiner extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; 249e56484daSJack Steiner 25013d19498SJack Steiner #define OPT_DPRINT 1 25113d19498SJack Steiner #define OPT_STATS 2 25213d19498SJack Steiner 25313d19498SJack Steiner 25413d19498SJack Steiner #define IRQ_GRU 110 /* Starting IRQ number for interrupts */ 25513d19498SJack Steiner 25613d19498SJack Steiner /* Delay in jiffies between attempts to assign a GRU context */ 25713d19498SJack Steiner #define GRU_ASSIGN_DELAY ((HZ * 20) / 1000) 25813d19498SJack Steiner 25913d19498SJack Steiner /* 26013d19498SJack Steiner * If a process has it's context stolen, min delay in jiffies before trying to 26113d19498SJack Steiner * steal a context from another process. 26213d19498SJack Steiner */ 26313d19498SJack Steiner #define GRU_STEAL_DELAY ((HZ * 200) / 1000) 26413d19498SJack Steiner 26513d19498SJack Steiner #define STAT(id) do { \ 2669ca8e40cSJack Steiner if (gru_options & OPT_STATS) \ 26713d19498SJack Steiner atomic_long_inc(&gru_stats.id); \ 26813d19498SJack Steiner } while (0) 26913d19498SJack Steiner 27013d19498SJack Steiner #ifdef CONFIG_SGI_GRU_DEBUG 27113d19498SJack Steiner #define gru_dbg(dev, fmt, x...) \ 27213d19498SJack Steiner do { \ 2739ca8e40cSJack Steiner if (gru_options & OPT_DPRINT) \ 274563447d7SJack Steiner printk(KERN_DEBUG "GRU:%d %s: " fmt, smp_processor_id(), __func__, x);\ 27513d19498SJack Steiner } while (0) 27613d19498SJack Steiner #else 27713d19498SJack Steiner #define gru_dbg(x...) 27813d19498SJack Steiner #endif 27913d19498SJack Steiner 28013d19498SJack Steiner /*----------------------------------------------------------------------------- 28113d19498SJack Steiner * ASID management 28213d19498SJack Steiner */ 28313d19498SJack Steiner #define MAX_ASID 0xfffff0 28413d19498SJack Steiner #define MIN_ASID 8 28513d19498SJack Steiner #define ASID_INC 8 /* number of regions */ 28613d19498SJack Steiner 28713d19498SJack Steiner /* Generate a GRU asid value from a GRU base asid & a virtual address. */ 28813d19498SJack Steiner #define VADDR_HI_BIT 64 289fe5bb6b0SJack Steiner #define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3) 29013d19498SJack Steiner #define GRUASID(asid, addr) ((asid) + GRUREGION(addr)) 29113d19498SJack Steiner 29213d19498SJack Steiner /*------------------------------------------------------------------------------ 29313d19498SJack Steiner * File & VMS Tables 29413d19498SJack Steiner */ 29513d19498SJack Steiner 29613d19498SJack Steiner struct gru_state; 29713d19498SJack Steiner 29813d19498SJack Steiner /* 29913d19498SJack Steiner * This structure is pointed to from the mmstruct via the notifier pointer. 30013d19498SJack Steiner * There is one of these per address space. 30113d19498SJack Steiner */ 302fe5bb6b0SJack Steiner struct gru_mm_tracker { /* pack to reduce size */ 303fe5bb6b0SJack Steiner unsigned int mt_asid_gen:24; /* ASID wrap count */ 304fe5bb6b0SJack Steiner unsigned int mt_asid:24; /* current base ASID for gru */ 305fe5bb6b0SJack Steiner unsigned short mt_ctxbitmap:16;/* bitmap of contexts using 30613d19498SJack Steiner asid */ 307fe5bb6b0SJack Steiner } __attribute__ ((packed)); 30813d19498SJack Steiner 30913d19498SJack Steiner struct gru_mm_struct { 31013d19498SJack Steiner struct mmu_notifier ms_notifier; 31113d19498SJack Steiner spinlock_t ms_asid_lock; /* protects ASID assignment */ 31213d19498SJack Steiner atomic_t ms_range_active;/* num range_invals active */ 31313d19498SJack Steiner wait_queue_head_t ms_wait_queue; 31413d19498SJack Steiner DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS); 31513d19498SJack Steiner struct gru_mm_tracker ms_asids[GRU_MAX_GRUS]; 31613d19498SJack Steiner }; 31713d19498SJack Steiner 31813d19498SJack Steiner /* 31913d19498SJack Steiner * One of these structures is allocated when a GSEG is mmaped. The 32013d19498SJack Steiner * structure is pointed to by the vma->vm_private_data field in the vma struct. 32113d19498SJack Steiner */ 32213d19498SJack Steiner struct gru_vma_data { 32313d19498SJack Steiner spinlock_t vd_lock; /* Serialize access to vma */ 32413d19498SJack Steiner struct list_head vd_head; /* head of linked list of gts */ 32513d19498SJack Steiner long vd_user_options;/* misc user option flags */ 32613d19498SJack Steiner int vd_cbr_au_count; 32713d19498SJack Steiner int vd_dsr_au_count; 328c550222fSJack Steiner unsigned char vd_tlb_preload_count; 32913d19498SJack Steiner }; 33013d19498SJack Steiner 33113d19498SJack Steiner /* 33213d19498SJack Steiner * One of these is allocated for each thread accessing a mmaped GRU. A linked 33313d19498SJack Steiner * list of these structure is hung off the struct gru_vma_data in the mm_struct. 33413d19498SJack Steiner */ 33513d19498SJack Steiner struct gru_thread_state { 33613d19498SJack Steiner struct list_head ts_next; /* list - head at vma-private */ 33713d19498SJack Steiner struct mutex ts_ctxlock; /* load/unload CTX lock */ 33813d19498SJack Steiner struct mm_struct *ts_mm; /* mm currently mapped to 33913d19498SJack Steiner context */ 34013d19498SJack Steiner struct vm_area_struct *ts_vma; /* vma of GRU context */ 34113d19498SJack Steiner struct gru_state *ts_gru; /* GRU where the context is 34213d19498SJack Steiner loaded */ 34313d19498SJack Steiner struct gru_mm_struct *ts_gms; /* asid & ioproc struct */ 344c550222fSJack Steiner unsigned char ts_tlb_preload_count; /* TLB preload pages */ 34513d19498SJack Steiner unsigned long ts_cbr_map; /* map of allocated CBRs */ 34613d19498SJack Steiner unsigned long ts_dsr_map; /* map of allocated DATA 34713d19498SJack Steiner resources */ 34813d19498SJack Steiner unsigned long ts_steal_jiffies;/* jiffies when context last 34913d19498SJack Steiner stolen */ 35013d19498SJack Steiner long ts_user_options;/* misc user option flags */ 35113d19498SJack Steiner pid_t ts_tgid_owner; /* task that is using the 35213d19498SJack Steiner context - for migration */ 353518e5cd4SJack Steiner short ts_user_blade_id;/* user selected blade */ 354518e5cd4SJack Steiner char ts_user_chiplet_id;/* user selected chiplet */ 3557b8274e9SJack Steiner unsigned short ts_sizeavail; /* Pagesizes in use */ 35613d19498SJack Steiner int ts_tsid; /* thread that owns the 35713d19498SJack Steiner structure */ 35813d19498SJack Steiner int ts_tlb_int_select;/* target cpu if interrupts 35913d19498SJack Steiner enabled */ 36013d19498SJack Steiner int ts_ctxnum; /* context number where the 36113d19498SJack Steiner context is loaded */ 362*03acb0c5SXiyu Yang refcount_t ts_refcnt; /* reference count GTS */ 36313d19498SJack Steiner unsigned char ts_dsr_au_count;/* Number of DSR resources 36413d19498SJack Steiner required for contest */ 36513d19498SJack Steiner unsigned char ts_cbr_au_count;/* Number of CBR resources 36613d19498SJack Steiner required for contest */ 367b1b19fcfSJack Steiner char ts_cch_req_slice;/* CCH packet slice */ 368fe5bb6b0SJack Steiner char ts_blade; /* If >= 0, migrate context if 36925985edcSLucas De Marchi ref from different blade */ 3707b8274e9SJack Steiner char ts_force_cch_reload; 37113d19498SJack Steiner char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each 37213d19498SJack Steiner allocated CB */ 373940229b9SJack Steiner int ts_data_valid; /* Indicates if ts_gdata has 374940229b9SJack Steiner valid data */ 3755958ab88SJack Steiner struct gru_gseg_statistics ustats; /* User statistics */ 376f490e8aeSGustavo A. R. Silva unsigned long ts_gdata[]; /* save area for GRU data (CB, 37713d19498SJack Steiner DS, CBE) */ 37813d19498SJack Steiner }; 37913d19498SJack Steiner 38013d19498SJack Steiner /* 38113d19498SJack Steiner * Threaded programs actually allocate an array of GSEGs when a context is 38213d19498SJack Steiner * created. Each thread uses a separate GSEG. TSID is the index into the GSEG 38313d19498SJack Steiner * array. 38413d19498SJack Steiner */ 38513d19498SJack Steiner #define TSID(a, v) (((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE) 38613d19498SJack Steiner #define UGRUADDR(gts) ((gts)->ts_vma->vm_start + \ 38713d19498SJack Steiner (gts)->ts_tsid * GRU_GSEG_PAGESIZE) 38813d19498SJack Steiner 38913d19498SJack Steiner #define NULLCTX (-1) /* if context not loaded into GRU */ 39013d19498SJack Steiner 39113d19498SJack Steiner /*----------------------------------------------------------------------------- 39213d19498SJack Steiner * GRU State Tables 39313d19498SJack Steiner */ 39413d19498SJack Steiner 39513d19498SJack Steiner /* 39613d19498SJack Steiner * One of these exists for each GRU chiplet. 39713d19498SJack Steiner */ 39813d19498SJack Steiner struct gru_state { 39913d19498SJack Steiner struct gru_blade_state *gs_blade; /* GRU state for entire 40013d19498SJack Steiner blade */ 40113d19498SJack Steiner unsigned long gs_gru_base_paddr; /* Physical address of 40213d19498SJack Steiner gru segments (64) */ 40313d19498SJack Steiner void *gs_gru_base_vaddr; /* Virtual address of 40413d19498SJack Steiner gru segments (64) */ 405e1c3219dSJack Steiner unsigned short gs_gid; /* unique GRU number */ 406e1c3219dSJack Steiner unsigned short gs_blade_id; /* blade of GRU */ 40755484c45SJack Steiner unsigned char gs_chiplet_id; /* blade chiplet of GRU */ 40813d19498SJack Steiner unsigned char gs_tgh_local_shift; /* used to pick TGH for 40913d19498SJack Steiner local flush */ 41013d19498SJack Steiner unsigned char gs_tgh_first_remote; /* starting TGH# for 41113d19498SJack Steiner remote flush */ 41213d19498SJack Steiner spinlock_t gs_asid_lock; /* lock used for 41313d19498SJack Steiner assigning asids */ 41413d19498SJack Steiner spinlock_t gs_lock; /* lock used for 41513d19498SJack Steiner assigning contexts */ 41613d19498SJack Steiner 41713d19498SJack Steiner /* -- the following are protected by the gs_asid_lock spinlock ---- */ 41813d19498SJack Steiner unsigned int gs_asid; /* Next availe ASID */ 41913d19498SJack Steiner unsigned int gs_asid_limit; /* Limit of available 42013d19498SJack Steiner ASIDs */ 42113d19498SJack Steiner unsigned int gs_asid_gen; /* asid generation. 42213d19498SJack Steiner Inc on wrap */ 42313d19498SJack Steiner 42413d19498SJack Steiner /* --- the following fields are protected by the gs_lock spinlock --- */ 42513d19498SJack Steiner unsigned long gs_context_map; /* bitmap to manage 42613d19498SJack Steiner contexts in use */ 42713d19498SJack Steiner unsigned long gs_cbr_map; /* bitmap to manage CB 42813d19498SJack Steiner resources */ 42913d19498SJack Steiner unsigned long gs_dsr_map; /* bitmap used to manage 43013d19498SJack Steiner DATA resources */ 43113d19498SJack Steiner unsigned int gs_reserved_cbrs; /* Number of kernel- 43213d19498SJack Steiner reserved cbrs */ 43313d19498SJack Steiner unsigned int gs_reserved_dsr_bytes; /* Bytes of kernel- 43413d19498SJack Steiner reserved dsrs */ 43513d19498SJack Steiner unsigned short gs_active_contexts; /* number of contexts 43613d19498SJack Steiner in use */ 43713d19498SJack Steiner struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using 43813d19498SJack Steiner the context */ 4394107e1d3SJack Steiner int gs_irq[GRU_NUM_TFM]; /* Interrupt irqs */ 44013d19498SJack Steiner }; 44113d19498SJack Steiner 44213d19498SJack Steiner /* 44313d19498SJack Steiner * This structure contains the GRU state for all the GRUs on a blade. 44413d19498SJack Steiner */ 44513d19498SJack Steiner struct gru_blade_state { 44613d19498SJack Steiner void *kernel_cb; /* First kernel 44713d19498SJack Steiner reserved cb */ 44813d19498SJack Steiner void *kernel_dsr; /* First kernel 44913d19498SJack Steiner reserved DSR */ 450836ce679SJack Steiner struct rw_semaphore bs_kgts_sema; /* lock for kgts */ 451836ce679SJack Steiner struct gru_thread_state *bs_kgts; /* GTS for kernel use */ 452836ce679SJack Steiner 4534a7a17c1SJack Steiner /* ---- the following are used for managing kernel async GRU CBRs --- */ 4544a7a17c1SJack Steiner int bs_async_dsr_bytes; /* DSRs for async */ 4554a7a17c1SJack Steiner int bs_async_cbrs; /* CBRs AU for async */ 4564a7a17c1SJack Steiner struct completion *bs_async_wq; 4574a7a17c1SJack Steiner 45813d19498SJack Steiner /* ---- the following are protected by the bs_lock spinlock ---- */ 45913d19498SJack Steiner spinlock_t bs_lock; /* lock used for 46013d19498SJack Steiner stealing contexts */ 46113d19498SJack Steiner int bs_lru_ctxnum; /* STEAL - last context 46213d19498SJack Steiner stolen */ 46313d19498SJack Steiner struct gru_state *bs_lru_gru; /* STEAL - last gru 46413d19498SJack Steiner stolen */ 46513d19498SJack Steiner 46613d19498SJack Steiner struct gru_state bs_grus[GRU_CHIPLETS_PER_BLADE]; 46713d19498SJack Steiner }; 46813d19498SJack Steiner 46913d19498SJack Steiner /*----------------------------------------------------------------------------- 47013d19498SJack Steiner * Address Primitives 47113d19498SJack Steiner */ 47213d19498SJack Steiner #define get_tfm_for_cpu(g, c) \ 47313d19498SJack Steiner ((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c))) 47413d19498SJack Steiner #define get_tfh_by_index(g, i) \ 47513d19498SJack Steiner ((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i))) 47613d19498SJack Steiner #define get_tgh_by_index(g, i) \ 47713d19498SJack Steiner ((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i))) 47813d19498SJack Steiner #define get_cbe_by_index(g, i) \ 47913d19498SJack Steiner ((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\ 48013d19498SJack Steiner (i))) 48113d19498SJack Steiner 48213d19498SJack Steiner /*----------------------------------------------------------------------------- 48313d19498SJack Steiner * Useful Macros 48413d19498SJack Steiner */ 48513d19498SJack Steiner 48613d19498SJack Steiner /* Given a blade# & chiplet#, get a pointer to the GRU */ 48713d19498SJack Steiner #define get_gru(b, c) (&gru_base[b]->bs_grus[c]) 48813d19498SJack Steiner 48913d19498SJack Steiner /* Number of bytes to save/restore when unloading/loading GRU contexts */ 49013d19498SJack Steiner #define DSR_BYTES(dsr) ((dsr) * GRU_DSR_AU_BYTES) 49113d19498SJack Steiner #define CBR_BYTES(cbr) ((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2) 49213d19498SJack Steiner 49313d19498SJack Steiner /* Convert a user CB number to the actual CBRNUM */ 49413d19498SJack Steiner #define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \ 49513d19498SJack Steiner * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE) 49613d19498SJack Steiner 49713d19498SJack Steiner /* Convert a gid to a pointer to the GRU */ 49813d19498SJack Steiner #define GID_TO_GRU(gid) \ 49913d19498SJack Steiner (gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ? \ 50013d19498SJack Steiner (&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]-> \ 50113d19498SJack Steiner bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) : \ 50213d19498SJack Steiner NULL) 50313d19498SJack Steiner 50413d19498SJack Steiner /* Scan all active GRUs in a GRU bitmap */ 50513d19498SJack Steiner #define for_each_gru_in_bitmap(gid, map) \ 5068c1840f1SAkinobu Mita for_each_set_bit((gid), (map), GRU_MAX_GRUS) 50713d19498SJack Steiner 50813d19498SJack Steiner /* Scan all active GRUs on a specific blade */ 50913d19498SJack Steiner #define for_each_gru_on_blade(gru, nid, i) \ 51013d19498SJack Steiner for ((gru) = gru_base[nid]->bs_grus, (i) = 0; \ 51113d19498SJack Steiner (i) < GRU_CHIPLETS_PER_BLADE; \ 51213d19498SJack Steiner (i)++, (gru)++) 51313d19498SJack Steiner 514e1c3219dSJack Steiner /* Scan all GRUs */ 515e1c3219dSJack Steiner #define foreach_gid(gid) \ 516e1c3219dSJack Steiner for ((gid) = 0; (gid) < gru_max_gids; (gid)++) 517e1c3219dSJack Steiner 51813d19498SJack Steiner /* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */ 51913d19498SJack Steiner #define for_each_gts_on_gru(gts, gru, ctxnum) \ 52013d19498SJack Steiner for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++) \ 52113d19498SJack Steiner if (((gts) = (gru)->gs_gts[ctxnum])) 52213d19498SJack Steiner 52313d19498SJack Steiner /* Scan each CBR whose bit is set in a TFM (or copy of) */ 52413d19498SJack Steiner #define for_each_cbr_in_tfm(i, map) \ 5258c1840f1SAkinobu Mita for_each_set_bit((i), (map), GRU_NUM_CBE) 52613d19498SJack Steiner 52713d19498SJack Steiner /* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */ 52813d19498SJack Steiner #define for_each_cbr_in_allocation_map(i, map, k) \ 5298c1840f1SAkinobu Mita for_each_set_bit((k), (map), GRU_CBR_AU) \ 53013d19498SJack Steiner for ((i) = (k)*GRU_CBR_AU_SIZE; \ 53113d19498SJack Steiner (i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++) 53213d19498SJack Steiner 53313d19498SJack Steiner /* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */ 53413d19498SJack Steiner #define for_each_dsr_in_allocation_map(i, map, k) \ 5358c1840f1SAkinobu Mita for_each_set_bit((k), (const unsigned long *)(map), GRU_DSR_AU) \ 53613d19498SJack Steiner for ((i) = (k) * GRU_DSR_AU_CL; \ 53713d19498SJack Steiner (i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++) 53813d19498SJack Steiner 53913d19498SJack Steiner #define gseg_physical_address(gru, ctxnum) \ 54013d19498SJack Steiner ((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE) 54113d19498SJack Steiner #define gseg_virtual_address(gru, ctxnum) \ 54213d19498SJack Steiner ((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE) 54313d19498SJack Steiner 54413d19498SJack Steiner /*----------------------------------------------------------------------------- 54513d19498SJack Steiner * Lock / Unlock GRU handles 54613d19498SJack Steiner * Use the "delresp" bit in the handle as a "lock" bit. 54713d19498SJack Steiner */ 54813d19498SJack Steiner 54913d19498SJack Steiner /* Lock hierarchy checking enabled only in emulator */ 55013d19498SJack Steiner 5519cc9b056SJack Steiner /* 0 = lock failed, 1 = locked */ 5529cc9b056SJack Steiner static inline int __trylock_handle(void *h) 5539cc9b056SJack Steiner { 5549cc9b056SJack Steiner return !test_and_set_bit(1, h); 5559cc9b056SJack Steiner } 5569cc9b056SJack Steiner 55713d19498SJack Steiner static inline void __lock_handle(void *h) 55813d19498SJack Steiner { 55913d19498SJack Steiner while (test_and_set_bit(1, h)) 56013d19498SJack Steiner cpu_relax(); 56113d19498SJack Steiner } 56213d19498SJack Steiner 56313d19498SJack Steiner static inline void __unlock_handle(void *h) 56413d19498SJack Steiner { 56513d19498SJack Steiner clear_bit(1, h); 56613d19498SJack Steiner } 56713d19498SJack Steiner 5689cc9b056SJack Steiner static inline int trylock_cch_handle(struct gru_context_configuration_handle *cch) 5699cc9b056SJack Steiner { 5709cc9b056SJack Steiner return __trylock_handle(cch); 5719cc9b056SJack Steiner } 5729cc9b056SJack Steiner 57313d19498SJack Steiner static inline void lock_cch_handle(struct gru_context_configuration_handle *cch) 57413d19498SJack Steiner { 57513d19498SJack Steiner __lock_handle(cch); 57613d19498SJack Steiner } 57713d19498SJack Steiner 57813d19498SJack Steiner static inline void unlock_cch_handle(struct gru_context_configuration_handle 57913d19498SJack Steiner *cch) 58013d19498SJack Steiner { 58113d19498SJack Steiner __unlock_handle(cch); 58213d19498SJack Steiner } 58313d19498SJack Steiner 58413d19498SJack Steiner static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh) 58513d19498SJack Steiner { 58613d19498SJack Steiner __lock_handle(tgh); 58713d19498SJack Steiner } 58813d19498SJack Steiner 58913d19498SJack Steiner static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh) 59013d19498SJack Steiner { 59113d19498SJack Steiner __unlock_handle(tgh); 59213d19498SJack Steiner } 59313d19498SJack Steiner 594836ce679SJack Steiner static inline int is_kernel_context(struct gru_thread_state *gts) 595836ce679SJack Steiner { 596836ce679SJack Steiner return !gts->ts_mm; 597836ce679SJack Steiner } 598836ce679SJack Steiner 5994107e1d3SJack Steiner /* 6004107e1d3SJack Steiner * The following are for Nehelem-EX. A more general scheme is needed for 6014107e1d3SJack Steiner * future processors. 6024107e1d3SJack Steiner */ 6034107e1d3SJack Steiner #define UV_MAX_INT_CORES 8 6044107e1d3SJack Steiner #define uv_cpu_socket_number(p) ((cpu_physical_id(p) >> 5) & 1) 6054107e1d3SJack Steiner #define uv_cpu_ht_number(p) (cpu_physical_id(p) & 1) 6064107e1d3SJack Steiner #define uv_cpu_core_number(p) (((cpu_physical_id(p) >> 2) & 4) | \ 6074107e1d3SJack Steiner ((cpu_physical_id(p) >> 1) & 3)) 60813d19498SJack Steiner /*----------------------------------------------------------------------------- 60913d19498SJack Steiner * Function prototypes & externs 61013d19498SJack Steiner */ 61113d19498SJack Steiner struct gru_unload_context_req; 61213d19498SJack Steiner 613f0f37e2fSAlexey Dobriyan extern const struct vm_operations_struct gru_vm_ops; 61413d19498SJack Steiner extern struct device *grudev; 61513d19498SJack Steiner 61613d19498SJack Steiner extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, 61713d19498SJack Steiner int tsid); 61813d19498SJack Steiner extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct 61913d19498SJack Steiner *vma, int tsid); 62013d19498SJack Steiner extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct 62113d19498SJack Steiner *vma, int tsid); 62255484c45SJack Steiner extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts); 623d57c82b1SJack Steiner extern void gru_load_context(struct gru_thread_state *gts); 62455484c45SJack Steiner extern void gru_steal_context(struct gru_thread_state *gts); 62513d19498SJack Steiner extern void gru_unload_context(struct gru_thread_state *gts, int savestate); 62699f7c229SJack Steiner extern int gru_update_cch(struct gru_thread_state *gts); 62713d19498SJack Steiner extern void gts_drop(struct gru_thread_state *gts); 62813d19498SJack Steiner extern void gru_tgh_flush_init(struct gru_state *gru); 629d5826dd6SJack Steiner extern int gru_kservices_init(void); 630d5826dd6SJack Steiner extern void gru_kservices_exit(void); 6314107e1d3SJack Steiner extern irqreturn_t gru0_intr(int irq, void *dev_id); 6324107e1d3SJack Steiner extern irqreturn_t gru1_intr(int irq, void *dev_id); 6334107e1d3SJack Steiner extern irqreturn_t gru_intr_mblade(int irq, void *dev_id); 6349cc9b056SJack Steiner extern int gru_dump_chiplet_request(unsigned long arg); 6357e796a72SJack Steiner extern long gru_get_gseg_statistics(unsigned long arg); 63613d19498SJack Steiner extern int gru_handle_user_call_os(unsigned long address); 63713d19498SJack Steiner extern int gru_user_flush_tlb(unsigned long arg); 63813d19498SJack Steiner extern int gru_user_unload_context(unsigned long arg); 63913d19498SJack Steiner extern int gru_get_exception_detail(unsigned long arg); 64092b39388SJack Steiner extern int gru_set_context_option(unsigned long address); 64155484c45SJack Steiner extern void gru_check_context_placement(struct gru_thread_state *gts); 64213d19498SJack Steiner extern int gru_cpu_fault_map_id(void); 64313d19498SJack Steiner extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); 64413d19498SJack Steiner extern void gru_flush_all_tlb(struct gru_state *gru); 64513d19498SJack Steiner extern int gru_proc_init(void); 64613d19498SJack Steiner extern void gru_proc_exit(void); 64713d19498SJack Steiner 648364b76dfSJack Steiner extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, 649c550222fSJack Steiner int cbr_au_count, int dsr_au_count, 650c550222fSJack Steiner unsigned char tlb_preload_count, int options, int tsid); 6519ca8e40cSJack Steiner extern unsigned long gru_reserve_cb_resources(struct gru_state *gru, 65213d19498SJack Steiner int cbr_au_count, char *cbmap); 6539ca8e40cSJack Steiner extern unsigned long gru_reserve_ds_resources(struct gru_state *gru, 65413d19498SJack Steiner int dsr_au_count, char *dsmap); 6551770a80fSSouptick Joarder extern vm_fault_t gru_fault(struct vm_fault *vmf); 65613d19498SJack Steiner extern struct gru_mm_struct *gru_register_mmu_notifier(void); 65713d19498SJack Steiner extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms); 65813d19498SJack Steiner 659eb5bd5e5SJack Steiner extern int gru_ktest(unsigned long arg); 66013d19498SJack Steiner extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, 66113d19498SJack Steiner unsigned long len); 66213d19498SJack Steiner 6639ca8e40cSJack Steiner extern unsigned long gru_options; 66413d19498SJack Steiner 66513d19498SJack Steiner #endif /* __GRUTABLES_H__ */ 666