当前位置:网站首页>Buddy: initialize memory domain
Buddy: initialize memory domain
2022-07-19 04:03:00 【Kun Yu】
Due to hardware limitations , The kernel does not treat all pages equally . Some pages are located at specific physical addresses in memory , So you can't use it for some specific tasks . Because of this limitation , So the kernel divides pages into different regions (zone). The kernel uses regions to group pages with similar characteristics .
build_all_zonelists
stay pglist_data There is one in the structure node_zonelists Array member , Every... In the array zoneref The object points to a zone object .
build_all_zonelists Equivalent to one node_zonelists Array , Initialize each pglist_data object (UMA There's only one next pglist_data object ).
void __ref build_all_zonelists(pg_data_t *pgdat)
{
unsigned long vm_total_pages;
if (system_state == SYSTEM_BOOTING) {
build_all_zonelists_init(); // // Create a memory domain list for all memory nodes ,cpu Initialize allocator , Modify the current cpu Of task Structural mems_allowed Signs, etc
} else {
__build_all_zonelists(pgdat);
/* cpuset refresh routine should be here */
}
vm_total_pages = nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE)); // Calculation exceeded high_wmark_pages The total number of pages
// The memory threshold is divided into :min_wmark_pages、low_wmark_pages、high_wmark_pages and wmark_pages
if (vm_total_pages < (pageblock_nr_pages * MIGRATE_TYPES))
page_group_by_mobility_disabled = 1;
else
page_group_by_mobility_disabled = 0;
pr_info("Built %u zonelists, mobility grouping %s. Total pages: %ld\n",
nr_online_nodes,
page_group_by_mobility_disabled ? "off" : "on",
vm_total_pages);
#ifdef CONFIG_NUMA
pr_info("Policy zone: %s\n", zone_names[policy_zone]);
#endif
}
build_all_zonelists_init
static noinline void __init
build_all_zonelists_init(void)
{
int cpu;
__build_all_zonelists(NULL);
||
\/
static void __build_all_zonelists(void *data)
{
...
#ifdef CONFIG_NUMA
memset(node_load, 0, sizeof(node_load)); // initialization node_load Array , Maximum 10 Load tags
/* static int node_load[MAX_NUMNODES]; */
/* #define MAX_NUMNODES (1 << NODES_SHIFT) -> 10 */
#endif
if (self && !node_online(self->node_id)) { // This node has been hot added , And there is no memory ( Not working )
build_zonelists(self); // Create a memory domain list for all memory nodes
||
\/
static void build_zonelists(pg_data_t *pgdat)
{
...
local_node = pgdat->node_id; // NUMA Effective node sorting ( Memory cluster )
prev_node = local_node;
memset(node_order, 0, sizeof(node_order));
while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { // Get the next valid node
if (node_distance(local_node, node) !=
node_distance(local_node, prev_node))
node_load[node] += 1;
node_order[nr_nodes++] = node;
prev_node = node;
}
build_zonelists_in_node_order(pgdat, node_order, nr_nodes); // Build a memory domain list by node and node area
build_thisnode_zonelists(pgdat); // Put this node into node_zonelists Of ( Ben ) Memory domain reference
pr_info("Fallback order for Node %d: ", local_node);
for (node = 0; node < nr_nodes; node++)
pr_cont("%d ", node_order[node]);
pr_cont("\n");
}
Support NUMA Memory nodes for ( Memory cluster , Each node is associated with a processor in the system , from pg_data_t Structural representation ) Included in different node areas , from pg_data_t The structure of the node_zonelists Express ( Node area / Memory area ), When allocating memory, first look in the matching memory area , If you can't find it, go to the standby area to find :
} else {
/*
* All possible nodes have pgdat preallocated
* in free_area_init
*/
for_each_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid); // Pre assign known nodes
build_zonelists(pgdat);
}
}
spin_unlock(&lock);
}
||
\/
for_each_possible_cpu(cpu)
per_cpu_pages_init(&per_cpu(boot_pageset, cpu), &per_cpu(boot_zonestats, cpu));
// Initialize the boot_pagesset. When each cpu When the distributor is available ,
// The actual page set for each region will be allocated later . If the system has started ,
// Will also use boot_pagessets Boot offline cpu,
// Because in a particular cpu These page sets are also required for initializing the allocator on .
// therefore ,percpu Allocator requires page allocator ,
// And the page allocator needs percpu Allocator to allocate its page set ( This is a difficult problem for chickens to lay eggs ).
mminit_verify_zonelist(); // Verify memory area and node list
cpuset_init_current_mems_allowed(); // Modify the current cpu Of task Structural mems_allowed sign
}
page_alloc_init
/*
* CPU-up CPU-down
*
* BP AP BP AP
*
* OFFLINE OFFLINE
* | ^
* v |
* BRINGUP_CPU->AP_OFFLINE BRINGUP_CPU <- AP_IDLE_DEAD (idle thread/play_dead)
* | AP_OFFLINE
* v (IRQ-off) ,---------------^
* AP_ONLNE | (stop_machine)
* | TEARDOWN_CPU <- AP_ONLINE_IDLE
* | ^
* v |
* AP_ACTIVE AP_ACTIVE
*/
page_alloc_init Function is used to register cpu Status callback function (page_alloc_cpu_online、page_alloc_cpu_dead) Into the hot plug thread structure , Every cpu Each has its own hot plug thread for state detection 、 Call the function corresponding to the state, and so on ( The kernel thread is named cpuhp)
void __init page_alloc_init(void)
{
int ret;
#ifdef CONFIG_NUMA
if (num_node_state(N_MEMORY) == 1) // Calculation N_MEMORY(regular, high, movable) In memory NUMA Number of nodes
hashdist = 0;
#endif
ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC,
"mm/page_alloc:pcp",
page_alloc_cpu_online,
page_alloc_cpu_dead); // Set the hot plug status callback function , In the current state triggered cpu Start callback
WARN_ON(ret < 0);
}
// enum cpuhp_state {
// CPUHP_INVALID = -1,
//
// /* PREPARE section invoked on a control CPU */
// CPUHP_OFFLINE = 0,
// CPUHP_CREATE_THREADS,
// ...
// CPUHP_PAGE_ALLOC
// ...
// }
CPU Hot plug status enumerator :
stay CPU During online operation , The state machine starts from CPUHP_OFFLINE+1 To
CPUHP_ONLINE Call the installed state to start the callback . stay CPU During offline operation ,
Install the disassembly callback from CPUHP_ONLINE-1 To CPUHP_ONLINE Of
Call in reverse order .
The state space has three parts : Get ready 、 Start and online :
Get ready : In hot plug CPU Before startup or hot plug CPU After death , Under control CPU Call callback .
start-up : In hot plug CPU On , Start from low-level hot plug with interrupt disabled / Disassemble the code to call the callback .
On-line : Callback is in hot plug CPU From every CPU Hot plug thread calls , And enable interrupt and preemption .
Only if , You need to add the explicit state to this enumeration :
1) The status is within the starting segment .
2) Compared with other states in the same section , This state has a sort constraint .
If 1) and 2) Are not applicable , Please use dynamic state space in the following cases :
CPUHP_PREPARE_DYN or CPUHP_PREPARE_ONLINE , Used to set the function state Parameters .
||
\/
return __cpuhp_setup_state(state, name, false, startup, teardown,
false);
||
\/
cpus_read_lock();
ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
teardown, multi_instance);
cpus_read_unlock();
||
\/
int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
const char *name, bool invoke,
int (*startup)(unsigned int cpu),
int (*teardown)(unsigned int cpu),
bool multi_instance)
{
int cpu, ret = 0;
bool dynstate;
lockdep_assert_cpus_held(); // Before user space runs ,lockdep Is not enabled
if (cpuhp_cb_check(state) || !name) // state stay CPUHP_OFFLINE to CPUHP_ONLINE In between
return -EINVAL;
mutex_lock(&cpuhp_state_mutex);
ret = cpuhp_store_callbacks(state, name, startup, teardown,
multi_instance); // Stay online or ready , Set the current state stage , Set callback function and linked list , Used for subsequent traversal
...
for_each_present_cpu(cpu) { // Traverse cpu
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
int cpustate = st->state;
if (cpustate < state)
continue;
ret = cpuhp_issue_call(cpu, state, true, NULL); // Start the hot plug calling thread
if (ret) {
if (teardown) // Unload function
cpuhp_rollback_install(cpu, state, NULL);
cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
goto out;
}
}
...
mutex_unlock(&cpuhp_state_mutex);
...
return ret;
}
Now analyze the callback function page_alloc_cpu_online:
static int page_alloc_cpu_online(unsigned int cpu)
{
struct zone *zone;
for_each_populated_zone(zone)
zone_pcp_update(zone, 1);
return 0;
}
||
\/
mutex_lock(&pcp_batch_high_lock);
zone_set_pageset_high_and_batch(zone, cpu_online); // According to the area size , Calculate and calculate for each of the areas cpu Set new high and batch values for page sets
mutex_unlock(&pcp_batch_high_lock);
||
\/
static void zone_set_pageset_high_and_batch(struct zone *zone, int cpu_online)
{
int new_high, new_batch;
new_batch = max(1, zone_batchsize(zone));
new_high = zone_highsize(zone, new_batch, cpu_online);
if (zone->pageset_high == new_high &&
zone->pageset_batch == new_batch)
return;
zone->pageset_high = new_high;
zone->pageset_batch = new_batch;
__zone_set_pageset_high_and_batch(zone, new_high, new_batch);
}
Now analyze the callback function page_alloc_cpu_dead:
static int page_alloc_cpu_dead(unsigned int cpu)
{
struct zone *zone;
lru_add_drain_cpu(cpu); // from cpu Of pagevec Arrange pages in , Every time “cpu” It is the present. cpu, And preemption is disabled ; perhaps “cpu” Hot plugging , And already dead.
mlock_page_drain_remote(cpu);
// Mlocked Page use PageMlocked() Mark, mark , In order to be in vmscan Effective test in ,
// And support semi accurate statistics .
// page[PageMlocked(page)] The locked page is not available , It will be placed in LRU“unevictable( Not available )” In the list
// PageUnvictable Set to indicate unavailable status
drain_pages(cpu);
// Release... Indicating all areas on the processor pcplists
// The processor must be the current processor and the thread fixed to the current processor , Or is it (not online) Processors that are not online
vm_events_fold_cpu(cpu);
// Put the external cpu Events collapse into their own cpu Incident (this_cpu_add), The global count remains unchanged
// Every cpu Counter lightweight implementation , The counter should only be incremented , No key kernel component should rely on counter values
// The counter is completely inlined . On many platforms , The generated code is only an increment of the global address
cpu_vm_stats_fold(cpu);
// Will be offline cpu The data of is folded into the global array
// offline cpu No access , Therefore, synchronization is simplified
// Set the offline differential counter to zero , In order to vm The statistical information is consistent
for_each_populated_zone(zone) // Get an effective management area (present_pages The members are not 0)
zone_pcp_update(zone, 0); // Calculate according to the size of the area , And for all of the area cpu Set new high and batch values for page sets .
return 0;
}
边栏推荐
- Redis数据迁移:方法二AOF
- 通过Dao投票STI的销毁,SeekTiger真正做到由社区驱动
- Pinhole minimally invasive gingival surgery (pinhole gum rejuvenation)
- Acwing: Game 60 of the week
- donet framework4.X==windows窗体应用新建项目,通过System.Data.SqlClient连接sqlserver进行查询
- windows10:vscode下go语言的适配
- Group convolution
- Klakndi synchronization screen is simple to use
- Mathematical modeling learning (67): detailed introduction to xgboost classification model case tutorial
- 7.16模拟赛总结
猜你喜欢
随机推荐
Nature Communications
HCIP第七天笔记
C语言详解系列——循环语句的练习与巩固,二分查找的讲解
Go environment installation
The biggest bug I've ever written in my career as a programmer! Netizen: high and low is a P8 level!
Application of MATLAB in linear algebra
go环境安装
HCIP第五天笔记
How to use mitmproxy to get data return in automated testing?
关于数据库的问题,唯一和非重复的概念
缩短饿了么tabs 组件线条宽度
priority_queue的介绍及其使用
Large file upload
[untitled]
程序分析与优化 - 11 多分支分析
sql界面切换不能获取焦点
Container adapter - stack, queue, priority queue
XDC 2022 Intel 技术专场:英特尔软硬件技术构筑云计算架构基石
Raspberry pie configuration
Multivariate statistical analysis principal component analysis - 01







