From e4abe4aa44f4441bcf8d3a604369922a9d61efc4 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 09:24:13 +0900 Subject: [PATCH 01/17] vm: align specialization guards with CPython patterns --- crates/vm/src/frame.rs | 153 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 140 insertions(+), 13 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index bf725d8bcd7..a9eee109516 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -2030,7 +2030,7 @@ impl ExecutingFrame<'_> { Instruction::ForIter { .. } => { // Relative forward jump: target = lasti + caches + delta let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); - self.adaptive(|s, ii, cb| s.specialize_for_iter(vm, ii, cb)); + self.adaptive(|s, ii, cb| s.specialize_for_iter(vm, u32::from(arg), ii, cb)); self.execute_for_iter(vm, target)?; Ok(None) } @@ -3150,12 +3150,24 @@ impl ExecutingFrame<'_> { } Instruction::Send { .. } => { // (receiver, v -- receiver, retval) - self.adaptive(|s, ii, cb| s.specialize_send(ii, cb)); + self.adaptive(|s, ii, cb| s.specialize_send(vm, ii, cb)); let exit_label = bytecode::Label(self.lasti() + 1 + u32::from(arg)); + let receiver = self.nth_value(1); + let can_fast_send = !self.specialization_eval_frame_active(vm) + && (receiver.downcast_ref_if_exact::(vm).is_some() + || receiver.downcast_ref_if_exact::(vm).is_some()) + && self + .builtin_coro(receiver) + .is_some_and(|coro| !coro.running()); let val = self.pop_value(); let receiver = self.top_value(); - - match self._send(receiver, val, vm)? { + let ret = if can_fast_send { + let coro = self.builtin_coro(receiver).unwrap(); + coro.send(receiver, val, vm)? + } else { + self._send(receiver, val, vm)? + }; + match ret { PyIterReturn::Return(value) => { self.push_value(value); Ok(None) @@ -3176,11 +3188,16 @@ impl ExecutingFrame<'_> { let exit_label = bytecode::Label(self.lasti() + 1 + u32::from(arg)); // Stack: [receiver, val] — peek receiver before popping let receiver = self.nth_value(1); - let is_coro = self.builtin_coro(receiver).is_some(); + let can_fast_send = !self.specialization_eval_frame_active(vm) + && (receiver.downcast_ref_if_exact::(vm).is_some() + || receiver.downcast_ref_if_exact::(vm).is_some()) + && self + .builtin_coro(receiver) + .is_some_and(|coro| !coro.running()); let val = self.pop_value(); - let receiver = self.top_value(); - if is_coro { + if can_fast_send { + let receiver = self.top_value(); let coro = self.builtin_coro(receiver).unwrap(); match coro.send(receiver, val, vm)? { PyIterReturn::Return(value) => { @@ -3199,6 +3216,10 @@ impl ExecutingFrame<'_> { } } } + self.deoptimize(Instruction::Send { + delta: Arg::marker(), + }); + let receiver = self.top_value(); match self._send(receiver, val, vm)? { PyIterReturn::Return(value) => { self.push_value(value); @@ -5242,6 +5263,9 @@ impl ExecutingFrame<'_> { } Ok(None) } else { + self.deoptimize(Instruction::ForIter { + delta: Arg::marker(), + }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -5257,6 +5281,9 @@ impl ExecutingFrame<'_> { } Ok(None) } else { + self.deoptimize(Instruction::ForIter { + delta: Arg::marker(), + }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -5272,6 +5299,9 @@ impl ExecutingFrame<'_> { } Ok(None) } else { + self.deoptimize(Instruction::ForIter { + delta: Arg::marker(), + }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -5279,7 +5309,21 @@ impl ExecutingFrame<'_> { Instruction::ForIterGen => { let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); let iter = self.top_value(); + if self.specialization_eval_frame_active(vm) { + self.deoptimize(Instruction::ForIter { + delta: Arg::marker(), + }); + self.execute_for_iter(vm, target)?; + return Ok(None); + } if let Some(generator) = iter.downcast_ref_if_exact::(vm) { + if generator.as_coro().running() { + self.deoptimize(Instruction::ForIter { + delta: Arg::marker(), + }); + self.execute_for_iter(vm, target)?; + return Ok(None); + } match generator.as_coro().send(iter, vm.ctx.none(), vm) { Ok(PyIterReturn::Return(value)) => { self.push_value(value); @@ -5295,6 +5339,9 @@ impl ExecutingFrame<'_> { } Ok(None) } else { + self.deoptimize(Instruction::ForIter { + delta: Arg::marker(), + }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -7616,6 +7663,17 @@ impl ExecutingFrame<'_> { let callable = self.nth_value(nargs + 1); if let Some(func) = callable.downcast_ref::() { + if self.specialization_eval_frame_active(vm) { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } let version = func.get_version_for_current_state(); if version == 0 { unsafe { @@ -7654,6 +7712,17 @@ impl ExecutingFrame<'_> { && let Some(bound_method) = callable.downcast_ref::() && let Some(func) = bound_method.function_obj().downcast_ref::() { + if self.specialization_eval_frame_active(vm) { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } let version = func.get_version_for_current_state(); if version == 0 { unsafe { @@ -7702,7 +7771,7 @@ impl ExecutingFrame<'_> { match nargs { 0 => Instruction::CallMethodDescriptorNoargs, 1 => Instruction::CallMethodDescriptorO, - _ => Instruction::CallMethodDescriptorFast, + _ => Instruction::CallMethodDescriptorFastWithKeywords, } }; self.specialize_at(instr_idx, cache_base, new_op); @@ -7726,6 +7795,8 @@ impl ExecutingFrame<'_> { Instruction::CallIsinstance } else if effective_nargs == 1 { Instruction::CallBuiltinO + } else if effective_nargs > 1 { + Instruction::CallBuiltinFastWithKeywords } else { Instruction::CallBuiltinFast }; @@ -7801,7 +7872,7 @@ impl ExecutingFrame<'_> { fn specialize_call_kw( &mut self, - _vm: &VirtualMachine, + vm: &VirtualMachine, nargs: u32, instr_idx: usize, cache_base: usize, @@ -7822,6 +7893,17 @@ impl ExecutingFrame<'_> { let callable = self.nth_value(nargs + 2); if let Some(func) = callable.downcast_ref::() { + if self.specialization_eval_frame_active(vm) { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } let version = func.get_version_for_current_state(); if version == 0 { unsafe { @@ -7848,6 +7930,17 @@ impl ExecutingFrame<'_> { && let Some(bound_method) = callable.downcast_ref::() && let Some(func) = bound_method.function_obj().downcast_ref::() { + if self.specialization_eval_frame_active(vm) { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } let version = func.get_version_for_current_state(); if version == 0 { unsafe { @@ -7873,7 +7966,7 @@ impl ExecutingFrame<'_> { self.specialize_at(instr_idx, cache_base, Instruction::CallKwNonPy); } - fn specialize_send(&mut self, instr_idx: usize, cache_base: usize) { + fn specialize_send(&mut self, vm: &VirtualMachine, instr_idx: usize, cache_base: usize) { if !matches!( self.code.instructions.read_op(instr_idx), Instruction::Send { .. } @@ -7882,7 +7975,9 @@ impl ExecutingFrame<'_> { } // Stack: [receiver, val] — receiver is at position 1 let receiver = self.nth_value(1); - if self.builtin_coro(receiver).is_some() { + let is_exact_gen_or_coro = receiver.downcast_ref_if_exact::(vm).is_some() + || receiver.downcast_ref_if_exact::(vm).is_some(); + if is_exact_gen_or_coro && !self.specialization_eval_frame_active(vm) { self.specialize_at(instr_idx, cache_base, Instruction::SendGen); } else { unsafe { @@ -8032,7 +8127,13 @@ impl ExecutingFrame<'_> { self.commit_specialization(instr_idx, cache_base, new_op); } - fn specialize_for_iter(&mut self, vm: &VirtualMachine, instr_idx: usize, cache_base: usize) { + fn specialize_for_iter( + &mut self, + vm: &VirtualMachine, + jump_delta: u32, + instr_idx: usize, + cache_base: usize, + ) { if !matches!( self.code.instructions.read_op(instr_idx), Instruction::ForIter { .. } @@ -8047,7 +8148,11 @@ impl ExecutingFrame<'_> { Some(Instruction::ForIterList) } else if iter.downcast_ref_if_exact::(vm).is_some() { Some(Instruction::ForIterTuple) - } else if iter.downcast_ref_if_exact::(vm).is_some() { + } else if iter.downcast_ref_if_exact::(vm).is_some() + && jump_delta <= i16::MAX as u32 + && self.for_iter_has_end_for_shape(instr_idx, jump_delta) + && !self.specialization_eval_frame_active(vm) + { Some(Instruction::ForIterGen) } else { None @@ -8056,6 +8161,28 @@ impl ExecutingFrame<'_> { self.commit_specialization(instr_idx, cache_base, new_op); } + #[inline] + fn specialization_eval_frame_active(&self, _vm: &VirtualMachine) -> bool { + false + } + + #[inline] + fn for_iter_has_end_for_shape(&self, instr_idx: usize, jump_delta: u32) -> bool { + let target_idx = instr_idx + + 1 + + Instruction::ForIter { + delta: Arg::marker(), + } + .cache_entries() + + jump_delta as usize; + self.code.instructions.get(target_idx).is_some_and(|unit| { + matches!( + unit.op, + Instruction::EndFor | Instruction::InstrumentedEndFor + ) + }) + } + /// Handle iterator exhaustion in specialized FOR_ITER handlers. /// Skips END_FOR if present at target and jumps. fn for_iter_jump_on_exhausted(&mut self, target: bytecode::Label) { From 1d69c5b1b2a99e78f102bd9592397f7562849d6f Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 09:26:06 +0900 Subject: [PATCH 02/17] vm: tighten call specialization runtime guards --- crates/vm/src/frame.rs | 68 +++++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 24 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index a9eee109516..015c5655149 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -4377,11 +4377,11 @@ impl ExecutingFrame<'_> { let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self.localsplus.stack_index(stack_len - 2).is_some(); let callable = self.nth_value(2); - let self_is_exact_list = self + let self_is_list = self .localsplus .stack_index(stack_len - 2) .as_ref() - .is_some_and(|obj| obj.class().is(vm.ctx.types.list_type)); + .is_some_and(|obj| obj.downcast_ref::().is_some()); let is_list_append = callable .downcast_ref::() @@ -4389,12 +4389,12 @@ impl ExecutingFrame<'_> { descr.method.name == "append" && descr.objclass.is(vm.ctx.types.list_type) }); - if is_list_append && self_or_null_is_some && self_is_exact_list { + if is_list_append && self_or_null_is_some && self_is_list { let item = self.pop_value(); let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); if let Some(list_obj) = self_or_null.as_ref() - && let Some(list) = list_obj.downcast_ref_if_exact::(vm) + && let Some(list) = list_obj.downcast_ref::() { list.append(item); // CALL_LIST_APPEND fuses the following POP_TOP. @@ -4422,14 +4422,19 @@ impl ExecutingFrame<'_> { let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self.localsplus.stack_index(stack_len - 1).is_some(); let callable = self.nth_value(1); - let func = if self_or_null_is_some { - callable - .downcast_ref::() - .map(|d| d.method.func) + let descr = if self_or_null_is_some { + callable.downcast_ref::() } else { None }; - if let Some(func) = func { + if let Some(descr) = descr + && self + .localsplus + .stack_index(stack_len - 1) + .as_ref() + .is_some_and(|self_obj| self_obj.class().is(descr.objclass)) + { + let func = descr.method.func; let self_val = self.pop_value_opt().unwrap(); self.pop_value(); // callable let args = FuncArgs { @@ -4453,14 +4458,19 @@ impl ExecutingFrame<'_> { let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self.localsplus.stack_index(stack_len - 2).is_some(); let callable = self.nth_value(2); - let func = if self_or_null_is_some { - callable - .downcast_ref::() - .map(|d| d.method.func) + let descr = if self_or_null_is_some { + callable.downcast_ref::() } else { None }; - if let Some(func) = func { + if let Some(descr) = descr + && self + .localsplus + .stack_index(stack_len - 2) + .as_ref() + .is_some_and(|self_obj| self_obj.class().is(descr.objclass)) + { + let func = descr.method.func; let obj = self.pop_value(); let self_val = self.pop_value_opt().unwrap(); self.pop_value(); // callable @@ -4486,14 +4496,19 @@ impl ExecutingFrame<'_> { .localsplus .stack_index(stack_len - nargs as usize - 1) .is_some(); - let func = if self_or_null_is_some { - callable - .downcast_ref::() - .map(|d| d.method.func) + let descr = if self_or_null_is_some { + callable.downcast_ref::() } else { None }; - if let Some(func) = func { + if let Some(descr) = descr + && self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .as_ref() + .is_some_and(|self_obj| self_obj.class().is(descr.objclass)) + { + let func = descr.method.func; let positional_args: Vec = self.pop_multiple(nargs as usize).collect(); let self_val = self.pop_value_opt().unwrap(); @@ -4613,14 +4628,19 @@ impl ExecutingFrame<'_> { .localsplus .stack_index(stack_len - nargs as usize - 1) .is_some(); - let func = if self_or_null_is_some { - callable - .downcast_ref::() - .map(|d| d.method.func) + let descr = if self_or_null_is_some { + callable.downcast_ref::() } else { None }; - if let Some(func) = func { + if let Some(descr) = descr + && self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .as_ref() + .is_some_and(|self_obj| self_obj.class().is(descr.objclass)) + { + let func = descr.method.func; let positional_args: Vec = self.pop_multiple(nargs as usize).collect(); let self_val = self.pop_value_opt().unwrap(); From f0a235ee59794389497877d3fb93ea3757e692ac Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 20:11:10 +0900 Subject: [PATCH 03/17] vm: add send_none fastpath for generator specialization --- crates/vm/src/coroutine.rs | 57 ++++++++++++++++++++++++++------------ crates/vm/src/frame.rs | 8 ++++-- 2 files changed, 46 insertions(+), 19 deletions(-) diff --git a/crates/vm/src/coroutine.rs b/crates/vm/src/coroutine.rs index ac7aeba5443..c4c2df6c102 100644 --- a/crates/vm/src/coroutine.rs +++ b/crates/vm/src/coroutine.rs @@ -115,27 +115,12 @@ impl Coro { result } - pub fn send( + fn finalize_send_result( &self, jen: &PyObject, - value: PyObjectRef, + result: PyResult, vm: &VirtualMachine, ) -> PyResult { - if self.closed.load() { - return Ok(PyIterReturn::StopIteration(None)); - } - self.frame.locals_to_fast(vm)?; - let value = if self.frame.lasti() > 0 { - Some(value) - } else if !vm.is_none(&value) { - return Err(vm.new_type_error(format!( - "can't send non-None value to a just-started {}", - gen_name(jen, vm), - ))); - } else { - None - }; - let result = self.run_with_context(jen, vm, |f| f.resume(value, vm)); self.maybe_close(&result); match result { Ok(exec_res) => Ok(exec_res.into_iter_return(vm)), @@ -158,6 +143,44 @@ impl Coro { } } + pub(crate) fn send_none(&self, jen: &PyObject, vm: &VirtualMachine) -> PyResult { + if self.closed.load() { + return Ok(PyIterReturn::StopIteration(None)); + } + self.frame.locals_to_fast(vm)?; + let value = if self.frame.lasti() > 0 { + Some(vm.ctx.none()) + } else { + None + }; + let result = self.run_with_context(jen, vm, |f| f.resume(value, vm)); + self.finalize_send_result(jen, result, vm) + } + + pub fn send( + &self, + jen: &PyObject, + value: PyObjectRef, + vm: &VirtualMachine, + ) -> PyResult { + if self.closed.load() { + return Ok(PyIterReturn::StopIteration(None)); + } + self.frame.locals_to_fast(vm)?; + let value = if self.frame.lasti() > 0 { + Some(value) + } else if !vm.is_none(&value) { + return Err(vm.new_type_error(format!( + "can't send non-None value to a just-started {}", + gen_name(jen, vm), + ))); + } else { + None + }; + let result = self.run_with_context(jen, vm, |f| f.resume(value, vm)); + self.finalize_send_result(jen, result, vm) + } + pub fn throw( &self, jen: &PyObject, diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 015c5655149..3de0148db8c 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3163,7 +3163,11 @@ impl ExecutingFrame<'_> { let receiver = self.top_value(); let ret = if can_fast_send { let coro = self.builtin_coro(receiver).unwrap(); - coro.send(receiver, val, vm)? + if vm.is_none(&val) { + coro.send_none(receiver, vm)? + } else { + coro.send(receiver, val, vm)? + } } else { self._send(receiver, val, vm)? }; @@ -5344,7 +5348,7 @@ impl ExecutingFrame<'_> { self.execute_for_iter(vm, target)?; return Ok(None); } - match generator.as_coro().send(iter, vm.ctx.none(), vm) { + match generator.as_coro().send_none(iter, vm) { Ok(PyIterReturn::Return(value)) => { self.push_value(value); } From e518287ea340ee3725fb2609b113b04b04b0d3ea Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 20:42:35 +0900 Subject: [PATCH 04/17] vm: restrict method-descriptor specialization to methods --- crates/vm/src/frame.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 3de0148db8c..efd134611cb 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -23,7 +23,7 @@ use crate::{ convert::{ToPyObject, ToPyResult}, coroutine::Coro, exceptions::ExceptionCtor, - function::{ArgMapping, Either, FuncArgs}, + function::{ArgMapping, Either, FuncArgs, PyMethodFlags}, object::PyAtomicBorrow, object::{Traverse, TraverseFn}, protocol::{PyIter, PyIterReturn}, @@ -4390,7 +4390,8 @@ impl ExecutingFrame<'_> { callable .downcast_ref::() .is_some_and(|descr| { - descr.method.name == "append" + descr.method.flags.contains(PyMethodFlags::METHOD) + && descr.method.name == "append" && descr.objclass.is(vm.ctx.types.list_type) }); if is_list_append && self_or_null_is_some && self_is_list { @@ -4432,6 +4433,7 @@ impl ExecutingFrame<'_> { None }; if let Some(descr) = descr + && descr.method.flags.contains(PyMethodFlags::METHOD) && self .localsplus .stack_index(stack_len - 1) @@ -4468,6 +4470,7 @@ impl ExecutingFrame<'_> { None }; if let Some(descr) = descr + && descr.method.flags.contains(PyMethodFlags::METHOD) && self .localsplus .stack_index(stack_len - 2) @@ -4506,6 +4509,7 @@ impl ExecutingFrame<'_> { None }; if let Some(descr) = descr + && descr.method.flags.contains(PyMethodFlags::METHOD) && self .localsplus .stack_index(stack_len - nargs as usize - 1) @@ -4638,6 +4642,7 @@ impl ExecutingFrame<'_> { None }; if let Some(descr) = descr + && descr.method.flags.contains(PyMethodFlags::METHOD) && self .localsplus .stack_index(stack_len - nargs as usize - 1) @@ -7775,7 +7780,10 @@ impl ExecutingFrame<'_> { } // Try to specialize method descriptor calls - if self_or_null_is_some && let Some(descr) = callable.downcast_ref::() { + if self_or_null_is_some + && let Some(descr) = callable.downcast_ref::() + && descr.method.flags.contains(PyMethodFlags::METHOD) + { let call_cache_entries = Instruction::CallListAppend.cache_entries(); let next_idx = cache_base + call_cache_entries; let next_is_pop_top = if next_idx < self.code.instructions.len() { From 01cdfb5253f2d386973fd692a4a461b998c29598 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 20:51:51 +0900 Subject: [PATCH 05/17] vm: deopt call specializations on guard misses --- crates/vm/src/frame.rs | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index efd134611cb..cc7953fe883 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -4043,6 +4043,9 @@ impl ExecutingFrame<'_> { self.push_value(result); Ok(None) } else { + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -4079,12 +4082,12 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) - } else { - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) } + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } Instruction::CallLen => { let instr_idx = self.lasti() as usize - 1; @@ -4364,15 +4367,12 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) - } else { - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) } + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } Instruction::CallListAppend => { let nargs: u32 = arg.into(); @@ -4562,6 +4562,9 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); self.execute_call_vectorcall(nargs, vm) } Instruction::CallAllocAndEnterInit => { @@ -4714,6 +4717,9 @@ impl ExecutingFrame<'_> { if callable.downcast_ref::().is_some() || callable.downcast_ref::().is_some() { + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); let args = self.collect_positional_args(nargs); return self.execute_call(args, vm); } @@ -4845,6 +4851,9 @@ impl ExecutingFrame<'_> { if callable.downcast_ref::().is_some() || callable.downcast_ref::().is_some() { + self.deoptimize(Instruction::CallKw { + argc: Arg::marker(), + }); let args = self.collect_keyword_args(nargs); return self.execute_call(args, vm); } From 690921a2196d7c12ed70e41470d4b44700ffac6d Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 20:52:56 +0900 Subject: [PATCH 06/17] vm: match CPython send/for-iter closed-frame guards --- crates/vm/src/frame.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index cc7953fe883..d9a76c07afe 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3158,7 +3158,7 @@ impl ExecutingFrame<'_> { || receiver.downcast_ref_if_exact::(vm).is_some()) && self .builtin_coro(receiver) - .is_some_and(|coro| !coro.running()); + .is_some_and(|coro| !coro.running() && !coro.closed()); let val = self.pop_value(); let receiver = self.top_value(); let ret = if can_fast_send { @@ -3197,7 +3197,7 @@ impl ExecutingFrame<'_> { || receiver.downcast_ref_if_exact::(vm).is_some()) && self .builtin_coro(receiver) - .is_some_and(|coro| !coro.running()); + .is_some_and(|coro| !coro.running() && !coro.closed()); let val = self.pop_value(); if can_fast_send { @@ -5355,7 +5355,7 @@ impl ExecutingFrame<'_> { return Ok(None); } if let Some(generator) = iter.downcast_ref_if_exact::(vm) { - if generator.as_coro().running() { + if generator.as_coro().running() || generator.as_coro().closed() { self.deoptimize(Instruction::ForIter { delta: Arg::marker(), }); From ed4b7fae11f5361ea28fab64b9083799f53ee380 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 20:59:09 +0900 Subject: [PATCH 07/17] vm: restrict len/isinstance specialization to builtins --- crates/vm/src/frame.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index d9a76c07afe..2885b4bd255 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -4102,7 +4102,11 @@ impl ExecutingFrame<'_> { let callable_tag = &*callable as *const PyObject as u32; let is_len_callable = callable .downcast_ref_if_exact::(vm) - .is_some_and(|native| native.zelf.is_none() && native.value.name == "len"); + .is_some_and(|native| { + native.zelf.is_none() + && native.value.name == "len" + && native.module.is_some_and(|m| m.as_str() == "builtins") + }); if null.is_none() && cached_tag == callable_tag && is_len_callable { let len = obj.length(vm)?; self.push_value(vm.ctx.new_int(len).into()); @@ -4136,7 +4140,9 @@ impl ExecutingFrame<'_> { let is_isinstance_callable = callable .downcast_ref_if_exact::(vm) .is_some_and(|native| { - native.zelf.is_none() && native.value.name == "isinstance" + native.zelf.is_none() + && native.value.name == "isinstance" + && native.module.is_some_and(|m| m.as_str() == "builtins") }); if cached_tag == callable_tag && is_isinstance_callable { let nargs_usize = nargs as usize; @@ -7825,12 +7831,14 @@ impl ExecutingFrame<'_> { let callable_tag = callable as *const PyObject as u32; let new_op = if native.zelf.is_none() && native.value.name == "len" + && native.module.is_some_and(|m| m.as_str() == "builtins") && nargs == 1 && effective_nargs == 1 { Instruction::CallLen } else if native.zelf.is_none() && native.value.name == "isinstance" + && native.module.is_some_and(|m| m.as_str() == "builtins") && effective_nargs == 2 { Instruction::CallIsinstance From 78dbbb0152f39139d75e4a74392ac943b15aee45 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 21:25:54 +0900 Subject: [PATCH 08/17] vm: use exact-type guards for call specializations --- crates/vm/src/frame.rs | 81 +++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 2885b4bd255..ea07535d28d 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -4023,14 +4023,14 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); // Stack: [callable, self_or_null, arg1, ..., argN] let callable = self.nth_value(nargs + 1); - if let Some(func) = callable.downcast_ref::() + if let Some(func) = callable.downcast_ref_if_exact::(vm) && func.func_version() == cached_version && cached_version != 0 { let pos_args: Vec = self.pop_multiple(nargs as usize).collect(); let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); - let func = callable.downcast_ref::().unwrap(); + let func = callable.downcast_ref_if_exact::(vm).unwrap(); let args = if let Some(self_val) = self_or_null { let mut all_args = Vec::with_capacity(pos_args.len() + 1); all_args.push(self_val); @@ -4063,11 +4063,11 @@ impl ExecutingFrame<'_> { .is_some(); let callable = self.nth_value(nargs + 1); if !self_or_null_is_some - && let Some(bound_method) = callable.downcast_ref::() + && let Some(bound_method) = callable.downcast_ref_if_exact::(vm) { let bound_function = bound_method.function_obj().clone(); let bound_self = bound_method.self_obj().clone(); - if let Some(func) = bound_function.downcast_ref::() + if let Some(func) = bound_function.downcast_ref_if_exact::(vm) && func.func_version() == cached_version && cached_version != 0 { @@ -4308,7 +4308,7 @@ impl ExecutingFrame<'_> { let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); - if let Some(func) = callable.downcast_ref::() + if let Some(func) = callable.downcast_ref_if_exact::(vm) && func.func_version() == cached_version && cached_version != 0 { @@ -4348,11 +4348,11 @@ impl ExecutingFrame<'_> { .is_some(); let callable = self.nth_value(nargs + 1); if !self_or_null_is_some - && let Some(bound_method) = callable.downcast_ref::() + && let Some(bound_method) = callable.downcast_ref_if_exact::(vm) { let bound_function = bound_method.function_obj().clone(); let bound_self = bound_method.self_obj().clone(); - if let Some(func) = bound_function.downcast_ref::() + if let Some(func) = bound_function.downcast_ref_if_exact::(vm) && func.func_version() == cached_version && cached_version != 0 { @@ -4392,14 +4392,13 @@ impl ExecutingFrame<'_> { .stack_index(stack_len - 2) .as_ref() .is_some_and(|obj| obj.downcast_ref::().is_some()); - let is_list_append = - callable - .downcast_ref::() - .is_some_and(|descr| { - descr.method.flags.contains(PyMethodFlags::METHOD) - && descr.method.name == "append" - && descr.objclass.is(vm.ctx.types.list_type) - }); + let is_list_append = callable + .downcast_ref_if_exact::(vm) + .is_some_and(|descr| { + descr.method.flags.contains(PyMethodFlags::METHOD) + && descr.method.name == "append" + && descr.objclass.is(vm.ctx.types.list_type) + }); if is_list_append && self_or_null_is_some && self_is_list { let item = self.pop_value(); let self_or_null = self.pop_value_opt(); @@ -4434,7 +4433,7 @@ impl ExecutingFrame<'_> { let self_or_null_is_some = self.localsplus.stack_index(stack_len - 1).is_some(); let callable = self.nth_value(1); let descr = if self_or_null_is_some { - callable.downcast_ref::() + callable.downcast_ref_if_exact::(vm) } else { None }; @@ -4471,7 +4470,7 @@ impl ExecutingFrame<'_> { let self_or_null_is_some = self.localsplus.stack_index(stack_len - 2).is_some(); let callable = self.nth_value(2); let descr = if self_or_null_is_some { - callable.downcast_ref::() + callable.downcast_ref_if_exact::(vm) } else { None }; @@ -4510,7 +4509,7 @@ impl ExecutingFrame<'_> { .stack_index(stack_len - nargs as usize - 1) .is_some(); let descr = if self_or_null_is_some { - callable.downcast_ref::() + callable.downcast_ref_if_exact::(vm) } else { None }; @@ -4591,7 +4590,7 @@ impl ExecutingFrame<'_> { { // Look up __init__ (guarded by type_version) if let Some(init) = cls.get_attr(identifier!(vm, __init__)) - && let Some(init_func) = init.downcast_ref::() + && let Some(init_func) = init.downcast_ref_if_exact::(vm) && init_func.can_specialize_call(nargs + 1) { // Allocate object directly (tp_new == object.__new__) @@ -4646,7 +4645,7 @@ impl ExecutingFrame<'_> { .stack_index(stack_len - nargs as usize - 1) .is_some(); let descr = if self_or_null_is_some { - callable.downcast_ref::() + callable.downcast_ref_if_exact::(vm) } else { None }; @@ -4720,8 +4719,10 @@ impl ExecutingFrame<'_> { .stack_index(stack_len - nargs as usize - 1) .is_some(); let callable = self.nth_value(nargs + 1); - if callable.downcast_ref::().is_some() - || callable.downcast_ref::().is_some() + if callable.downcast_ref_if_exact::(vm).is_some() + || callable + .downcast_ref_if_exact::(vm) + .is_some() { self.deoptimize(Instruction::Call { argc: Arg::marker(), @@ -4755,7 +4756,7 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); // Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names] let callable = self.nth_value(nargs + 2); - if let Some(func) = callable.downcast_ref::() + if let Some(func) = callable.downcast_ref_if_exact::(vm) && func.func_version() == cached_version && cached_version != 0 { @@ -4807,11 +4808,11 @@ impl ExecutingFrame<'_> { .is_some(); let callable = self.nth_value(nargs + 2); if !self_or_null_is_some - && let Some(bound_method) = callable.downcast_ref::() + && let Some(bound_method) = callable.downcast_ref_if_exact::(vm) { let bound_function = bound_method.function_obj().clone(); let bound_self = bound_method.self_obj().clone(); - if let Some(func) = bound_function.downcast_ref::() + if let Some(func) = bound_function.downcast_ref_if_exact::(vm) && func.func_version() == cached_version && cached_version != 0 { @@ -4854,8 +4855,10 @@ impl ExecutingFrame<'_> { .stack_index(stack_len - nargs as usize - 2) .is_some(); let callable = self.nth_value(nargs + 2); - if callable.downcast_ref::().is_some() - || callable.downcast_ref::().is_some() + if callable.downcast_ref_if_exact::(vm).is_some() + || callable + .downcast_ref_if_exact::(vm) + .is_some() { self.deoptimize(Instruction::CallKw { argc: Arg::marker(), @@ -6425,7 +6428,7 @@ impl ExecutingFrame<'_> { args }; - let is_python_call = callable.downcast_ref::().is_some(); + let is_python_call = callable.downcast_ref_if_exact::(vm).is_some(); // Fire CALL event let call_arg0 = if self.monitoring_mask & monitoring::EVENT_CALL != 0 { @@ -6728,7 +6731,7 @@ impl ExecutingFrame<'_> { let func = self.top_value(); // Get the function reference and call the new method let func_ref = func - .downcast_ref::() + .downcast_ref_if_exact::(vm) .expect("SET_FUNCTION_ATTRIBUTE expects function on stack"); let payload: &PyFunction = func_ref.payload(); @@ -7706,7 +7709,7 @@ impl ExecutingFrame<'_> { .is_some(); let callable = self.nth_value(nargs + 1); - if let Some(func) = callable.downcast_ref::() { + if let Some(func) = callable.downcast_ref_if_exact::(vm) { if self.specialization_eval_frame_active(vm) { unsafe { self.code.instructions.write_adaptive_counter( @@ -7753,8 +7756,10 @@ impl ExecutingFrame<'_> { // Bound Python method object (`method`) specialization. if !self_or_null_is_some - && let Some(bound_method) = callable.downcast_ref::() - && let Some(func) = bound_method.function_obj().downcast_ref::() + && let Some(bound_method) = callable.downcast_ref_if_exact::(vm) + && let Some(func) = bound_method + .function_obj() + .downcast_ref_if_exact::(vm) { if self.specialization_eval_frame_active(vm) { unsafe { @@ -7796,7 +7801,7 @@ impl ExecutingFrame<'_> { // Try to specialize method descriptor calls if self_or_null_is_some - && let Some(descr) = callable.downcast_ref::() + && let Some(descr) = callable.downcast_ref_if_exact::(vm) && descr.method.flags.contains(PyMethodFlags::METHOD) { let call_cache_entries = Instruction::CallListAppend.cache_entries(); @@ -7892,7 +7897,7 @@ impl ExecutingFrame<'_> { if let (Some(cls_new_fn), Some(obj_new_fn)) = (cls_new, object_new) && cls_new_fn as usize == obj_new_fn as usize && let Some(init) = cls.get_attr(identifier!(vm, __init__)) - && let Some(init_func) = init.downcast_ref::() + && let Some(init_func) = init.downcast_ref_if_exact::(vm) && init_func.can_specialize_call(nargs + 1) { let version = cls.tp_version_tag.load(Acquire); @@ -7941,7 +7946,7 @@ impl ExecutingFrame<'_> { .is_some(); let callable = self.nth_value(nargs + 2); - if let Some(func) = callable.downcast_ref::() { + if let Some(func) = callable.downcast_ref_if_exact::(vm) { if self.specialization_eval_frame_active(vm) { unsafe { self.code.instructions.write_adaptive_counter( @@ -7976,8 +7981,10 @@ impl ExecutingFrame<'_> { } if !self_or_null_is_some - && let Some(bound_method) = callable.downcast_ref::() - && let Some(func) = bound_method.function_obj().downcast_ref::() + && let Some(bound_method) = callable.downcast_ref_if_exact::(vm) + && let Some(func) = bound_method + .function_obj() + .downcast_ref_if_exact::(vm) { if self.specialization_eval_frame_active(vm) { unsafe { From 01a4c27014ab5b94200388fbb53b96d9bcec4003 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 21:28:42 +0900 Subject: [PATCH 09/17] vm: align class-call specialization flow with CPython --- crates/vm/src/frame.rs | 46 +++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index ea07535d28d..3910277fe3c 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -7866,30 +7866,38 @@ impl ExecutingFrame<'_> { } // type/str/tuple(x) and class-call specializations - if callable.class().is(vm.ctx.types.type_type) - && let Some(cls) = callable.downcast_ref::() - { - if !self_or_null_is_some && nargs == 1 { - let new_op = if callable.is(&vm.ctx.types.type_type.as_object()) { - Some(Instruction::CallType1) - } else if callable.is(&vm.ctx.types.str_type.as_object()) { - Some(Instruction::CallStr1) - } else if callable.is(&vm.ctx.types.tuple_type.as_object()) { - Some(Instruction::CallTuple1) - } else { - None - }; - if let Some(new_op) = new_op { - self.specialize_at(instr_idx, cache_base, new_op); + if let Some(cls) = callable.downcast_ref::() { + if cls.slots.flags.has_feature(PyTypeFlags::IMMUTABLETYPE) { + if !self_or_null_is_some && nargs == 1 { + let new_op = if callable.is(&vm.ctx.types.type_type.as_object()) { + Some(Instruction::CallType1) + } else if callable.is(&vm.ctx.types.str_type.as_object()) { + Some(Instruction::CallStr1) + } else if callable.is(&vm.ctx.types.tuple_type.as_object()) { + Some(Instruction::CallTuple1) + } else { + None + }; + if let Some(new_op) = new_op { + self.specialize_at(instr_idx, cache_base, new_op); + return; + } + } + if cls.slots.vectorcall.load().is_some() { + self.specialize_at(instr_idx, cache_base, Instruction::CallBuiltinClass); return; } + self.specialize_at(instr_idx, cache_base, Instruction::CallNonPyGeneral); + return; } - if cls.slots.flags.has_feature(PyTypeFlags::IMMUTABLETYPE) - && cls.slots.vectorcall.load().is_some() - { - self.specialize_at(instr_idx, cache_base, Instruction::CallBuiltinClass); + + // CPython only considers CALL_ALLOC_AND_ENTER_INIT for types whose + // metaclass is exactly `type`. + if !callable.class().is(vm.ctx.types.type_type) { + self.specialize_at(instr_idx, cache_base, Instruction::CallNonPyGeneral); return; } + // CallAllocAndEnterInit: heap type with default __new__ if !self_or_null_is_some && cls.slots.flags.has_feature(PyTypeFlags::HEAPTYPE) { let object_new = vm.ctx.types.object_type.slots.new.load(); From 397143b08f3ac6e40df1754bdf2bc01c24cdd26c Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 21:41:25 +0900 Subject: [PATCH 10/17] vm: prefer FAST call opcodes for positional builtin calls --- crates/vm/src/frame.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 3910277fe3c..62757ce30c8 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -7823,7 +7823,7 @@ impl ExecutingFrame<'_> { match nargs { 0 => Instruction::CallMethodDescriptorNoargs, 1 => Instruction::CallMethodDescriptorO, - _ => Instruction::CallMethodDescriptorFastWithKeywords, + _ => Instruction::CallMethodDescriptorFast, } }; self.specialize_at(instr_idx, cache_base, new_op); @@ -7849,8 +7849,6 @@ impl ExecutingFrame<'_> { Instruction::CallIsinstance } else if effective_nargs == 1 { Instruction::CallBuiltinO - } else if effective_nargs > 1 { - Instruction::CallBuiltinFastWithKeywords } else { Instruction::CallBuiltinFast }; From 41e58d372ea71932ef1debb104f400d01b4ec6ea Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 21:45:30 +0900 Subject: [PATCH 11/17] vm: add callable identity guard to CALL_LIST_APPEND --- crates/vm/src/frame.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 62757ce30c8..2abea6b2595 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -4381,12 +4381,16 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallListAppend => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { // Stack: [callable, self_or_null, item] let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self.localsplus.stack_index(stack_len - 2).is_some(); let callable = self.nth_value(2); + let callable_tag = callable as *const PyObject as u32; let self_is_list = self .localsplus .stack_index(stack_len - 2) @@ -4399,7 +4403,11 @@ impl ExecutingFrame<'_> { && descr.method.name == "append" && descr.objclass.is(vm.ctx.types.list_type) }); - if is_list_append && self_or_null_is_some && self_is_list { + if cached_tag == callable_tag + && is_list_append + && self_or_null_is_some + && self_is_list + { let item = self.pop_value(); let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); @@ -7818,6 +7826,12 @@ impl ExecutingFrame<'_> { && descr.objclass.is(vm.ctx.types.list_type) && next_is_pop_top { + let callable_tag = callable as *const PyObject as u32; + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); + } Instruction::CallListAppend } else { match nargs { From c694fe11f292fe04e7c28bc71233e5b94b710621 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 21:48:52 +0900 Subject: [PATCH 12/17] vm: make CALL_LIST_APPEND runtime guard pointer-based --- crates/vm/src/frame.rs | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 2abea6b2595..962fe558304 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -4396,18 +4396,7 @@ impl ExecutingFrame<'_> { .stack_index(stack_len - 2) .as_ref() .is_some_and(|obj| obj.downcast_ref::().is_some()); - let is_list_append = callable - .downcast_ref_if_exact::(vm) - .is_some_and(|descr| { - descr.method.flags.contains(PyMethodFlags::METHOD) - && descr.method.name == "append" - && descr.objclass.is(vm.ctx.types.list_type) - }); - if cached_tag == callable_tag - && is_list_append - && self_or_null_is_some - && self_is_list - { + if cached_tag == callable_tag && self_or_null_is_some && self_is_list { let item = self.pop_value(); let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); From ab82f5bffd143b8dfa6224c8c7a415899a1b0220 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 22:08:37 +0900 Subject: [PATCH 13/17] vm: align call guard cache and fallback behavior with CPython --- crates/vm/src/frame.rs | 165 +++++++++++++++++++++-------------------- 1 file changed, 85 insertions(+), 80 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 962fe558304..515f644c9c6 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -4092,14 +4092,14 @@ impl ExecutingFrame<'_> { Instruction::CallLen => { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let cached_ptr = self.code.instructions.read_cache_ptr(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { // Stack: [callable, null, arg] let obj = self.pop_value(); // arg let null = self.pop_value_opt(); let callable = self.pop_value(); - let callable_tag = &*callable as *const PyObject as u32; + let callable_ptr = &*callable as *const PyObject as usize; let is_len_callable = callable .downcast_ref_if_exact::(vm) .is_some_and(|native| { @@ -4107,7 +4107,7 @@ impl ExecutingFrame<'_> { && native.value.name == "len" && native.module.is_some_and(|m| m.as_str() == "builtins") }); - if null.is_none() && cached_tag == callable_tag && is_len_callable { + if null.is_none() && cached_ptr == callable_ptr && is_len_callable { let len = obj.length(vm)?; self.push_value(vm.ctx.new_int(len).into()); return Ok(None); @@ -4126,7 +4126,7 @@ impl ExecutingFrame<'_> { Instruction::CallIsinstance => { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let cached_ptr = self.code.instructions.read_cache_ptr(cache_base + 1); let nargs: u32 = arg.into(); let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self @@ -4136,7 +4136,7 @@ impl ExecutingFrame<'_> { let effective_nargs = nargs + u32::from(self_or_null_is_some); if effective_nargs == 2 { let callable = self.nth_value(nargs + 1); - let callable_tag = callable as *const PyObject as u32; + let callable_ptr = callable as *const PyObject as usize; let is_isinstance_callable = callable .downcast_ref_if_exact::(vm) .is_some_and(|native| { @@ -4144,7 +4144,7 @@ impl ExecutingFrame<'_> { && native.value.name == "isinstance" && native.module.is_some_and(|m| m.as_str() == "builtins") }); - if cached_tag == callable_tag && is_isinstance_callable { + if cached_ptr == callable_ptr && is_isinstance_callable { let nargs_usize = nargs as usize; let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); let self_or_null = self.pop_value_opt(); @@ -4374,29 +4374,26 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } Instruction::CallListAppend => { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); + let cached_ptr = self.code.instructions.read_cache_ptr(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { // Stack: [callable, self_or_null, item] let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self.localsplus.stack_index(stack_len - 2).is_some(); let callable = self.nth_value(2); - let callable_tag = callable as *const PyObject as u32; + let callable_ptr = callable as *const PyObject as usize; let self_is_list = self .localsplus .stack_index(stack_len - 2) .as_ref() .is_some_and(|obj| obj.downcast_ref::().is_some()); - if cached_tag == callable_tag && self_or_null_is_some && self_is_list { + if cached_ptr == callable_ptr && self_or_null_is_some && self_is_list { let item = self.pop_value(); let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); @@ -4454,9 +4451,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); self.execute_call_vectorcall(nargs, vm) } Instruction::CallMethodDescriptorO => { @@ -4492,9 +4486,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); self.execute_call_vectorcall(nargs, vm) } Instruction::CallMethodDescriptorFast => { @@ -4534,9 +4525,6 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); self.execute_call_vectorcall(nargs, vm) } Instruction::CallBuiltinClass => { @@ -4670,9 +4658,6 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); self.execute_call_vectorcall(nargs, vm) } Instruction::CallBuiltinFastWithKeywords => { @@ -4721,9 +4706,6 @@ impl ExecutingFrame<'_> { .downcast_ref_if_exact::(vm) .is_some() { - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); let args = self.collect_positional_args(nargs); return self.execute_call(args, vm); } @@ -4857,9 +4839,6 @@ impl ExecutingFrame<'_> { .downcast_ref_if_exact::(vm) .is_some() { - self.deoptimize(Instruction::CallKw { - argc: Arg::marker(), - }); let args = self.collect_keyword_args(nargs); return self.execute_call(args, vm); } @@ -7754,23 +7733,49 @@ impl ExecutingFrame<'_> { // Bound Python method object (`method`) specialization. if !self_or_null_is_some && let Some(bound_method) = callable.downcast_ref_if_exact::(vm) - && let Some(func) = bound_method + { + if let Some(func) = bound_method .function_obj() .downcast_ref_if_exact::(vm) - { - if self.specialization_eval_frame_active(vm) { + { + if self.specialization_eval_frame_active(vm) { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + let version = func.get_version_for_current_state(); + if version == 0 { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + + let new_op = if func.can_specialize_call(nargs + 1) { + Instruction::CallBoundMethodExactArgs + } else { + Instruction::CallBoundMethodGeneral + }; unsafe { - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); + self.code + .instructions + .write_cache_u32(cache_base + 1, version); } - return; - } - let version = func.get_version_for_current_state(); - if version == 0 { + self.specialize_at(instr_idx, cache_base, new_op); + } else { + // Match CPython: bound methods wrapping non-Python callables + // are not specialized as CALL_NON_PY_GENERAL. unsafe { self.code.instructions.write_adaptive_counter( cache_base, @@ -7779,20 +7784,7 @@ impl ExecutingFrame<'_> { ), ); } - return; } - - let new_op = if func.can_specialize_call(nargs + 1) { - Instruction::CallBoundMethodExactArgs - } else { - Instruction::CallBoundMethodGeneral - }; - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, version); - } - self.specialize_at(instr_idx, cache_base, new_op); return; } @@ -7815,11 +7807,11 @@ impl ExecutingFrame<'_> { && descr.objclass.is(vm.ctx.types.list_type) && next_is_pop_top { - let callable_tag = callable as *const PyObject as u32; + let callable_ptr = callable as *const PyObject as usize; unsafe { self.code .instructions - .write_cache_u32(cache_base + 1, callable_tag); + .write_cache_ptr(cache_base + 1, callable_ptr); } Instruction::CallListAppend } else { @@ -7836,7 +7828,7 @@ impl ExecutingFrame<'_> { // Try to specialize builtin calls if let Some(native) = callable.downcast_ref_if_exact::(vm) { let effective_nargs = nargs + u32::from(self_or_null_is_some); - let callable_tag = callable as *const PyObject as u32; + let callable_ptr = callable as *const PyObject as usize; let new_op = if native.zelf.is_none() && native.value.name == "len" && native.module.is_some_and(|m| m.as_str() == "builtins") @@ -7859,7 +7851,7 @@ impl ExecutingFrame<'_> { unsafe { self.code .instructions - .write_cache_u32(cache_base + 1, callable_tag); + .write_cache_ptr(cache_base + 1, callable_ptr); } } self.specialize_at(instr_idx, cache_base, new_op); @@ -7991,23 +7983,43 @@ impl ExecutingFrame<'_> { if !self_or_null_is_some && let Some(bound_method) = callable.downcast_ref_if_exact::(vm) - && let Some(func) = bound_method + { + if let Some(func) = bound_method .function_obj() .downcast_ref_if_exact::(vm) - { - if self.specialization_eval_frame_active(vm) { + { + if self.specialization_eval_frame_active(vm) { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + let version = func.get_version_for_current_state(); + if version == 0 { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } unsafe { - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); + self.code + .instructions + .write_cache_u32(cache_base + 1, version); } - return; - } - let version = func.get_version_for_current_state(); - if version == 0 { + self.specialize_at(instr_idx, cache_base, Instruction::CallKwBoundMethod); + } else { + // Match CPython: bound methods wrapping non-Python callables + // are not specialized as CALL_KW_NON_PY. unsafe { self.code.instructions.write_adaptive_counter( cache_base, @@ -8016,14 +8028,7 @@ impl ExecutingFrame<'_> { ), ); } - return; - } - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, version); } - self.specialize_at(instr_idx, cache_base, Instruction::CallKwBoundMethod); return; } From f630d01d96c72843ee14dc391ef5b5ee92f41dc7 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 22:15:02 +0900 Subject: [PATCH 14/17] vm: use base vectorcall fallback for EXIT-style call misses --- crates/vm/src/frame.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 515f644c9c6..8b05d6284af 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -4374,8 +4374,7 @@ impl ExecutingFrame<'_> { return Ok(None); } } - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallListAppend => { let instr_idx = self.lasti() as usize - 1; @@ -4706,8 +4705,7 @@ impl ExecutingFrame<'_> { .downcast_ref_if_exact::(vm) .is_some() { - let args = self.collect_positional_args(nargs); - return self.execute_call(args, vm); + return self.execute_call_vectorcall(nargs, vm); } let nargs_usize = nargs as usize; let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); @@ -4839,8 +4837,7 @@ impl ExecutingFrame<'_> { .downcast_ref_if_exact::(vm) .is_some() { - let args = self.collect_keyword_args(nargs); - return self.execute_call(args, vm); + return self.execute_call_kw_vectorcall(nargs, vm); } let nargs_usize = nargs as usize; let kwarg_names_obj = self.pop_value(); From eeff41296580f202e33bc202f0a179b1209805da Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 22:18:42 +0900 Subject: [PATCH 15/17] vm: simplify CALL_LEN/CALL_ISINSTANCE runtime guards --- crates/vm/src/frame.rs | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 8b05d6284af..be0bd179364 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -4100,14 +4100,7 @@ impl ExecutingFrame<'_> { let null = self.pop_value_opt(); let callable = self.pop_value(); let callable_ptr = &*callable as *const PyObject as usize; - let is_len_callable = callable - .downcast_ref_if_exact::(vm) - .is_some_and(|native| { - native.zelf.is_none() - && native.value.name == "len" - && native.module.is_some_and(|m| m.as_str() == "builtins") - }); - if null.is_none() && cached_ptr == callable_ptr && is_len_callable { + if null.is_none() && cached_ptr == callable_ptr { let len = obj.length(vm)?; self.push_value(vm.ctx.new_int(len).into()); return Ok(None); @@ -4137,14 +4130,7 @@ impl ExecutingFrame<'_> { if effective_nargs == 2 { let callable = self.nth_value(nargs + 1); let callable_ptr = callable as *const PyObject as usize; - let is_isinstance_callable = callable - .downcast_ref_if_exact::(vm) - .is_some_and(|native| { - native.zelf.is_none() - && native.value.name == "isinstance" - && native.module.is_some_and(|m| m.as_str() == "builtins") - }); - if cached_ptr == callable_ptr && is_isinstance_callable { + if cached_ptr == callable_ptr { let nargs_usize = nargs as usize; let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); let self_or_null = self.pop_value_opt(); From 2a23d3badf6c27dfb7eb31add757e699711f7e65 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 22:31:54 +0900 Subject: [PATCH 16/17] vm: infer call-convention flags for CPython-style CALL specialization --- crates/derive-impl/src/pyclass.rs | 24 ++- crates/derive-impl/src/pymodule.rs | 9 +- crates/derive-impl/src/util.rs | 71 ++++++++ crates/vm/src/frame.rs | 271 +++++++++++++++++++---------- crates/vm/src/function/method.rs | 10 +- 5 files changed, 286 insertions(+), 99 deletions(-) diff --git a/crates/derive-impl/src/pyclass.rs b/crates/derive-impl/src/pyclass.rs index dfb02a3eda8..a65320cdb52 100644 --- a/crates/derive-impl/src/pyclass.rs +++ b/crates/derive-impl/src/pyclass.rs @@ -1,8 +1,8 @@ use super::Diagnostic; use crate::util::{ ALL_ALLOWED_NAMES, ClassItemMeta, ContentItem, ContentItemInner, ErrorVec, ExceptionItemMeta, - ItemMeta, ItemMetaInner, ItemNursery, SimpleItemMeta, format_doc, pyclass_ident_and_attrs, - pyexception_ident_and_attrs, text_signature, + ItemMeta, ItemMetaInner, ItemNursery, SimpleItemMeta, format_doc, infer_native_call_flags, + pyclass_ident_and_attrs, pyexception_ident_and_attrs, text_signature, }; use core::str::FromStr; use proc_macro2::{Delimiter, Group, Span, TokenStream, TokenTree}; @@ -1015,6 +1015,16 @@ where let raw = item_meta.raw()?; let sig_doc = text_signature(func.sig(), &py_name); + let has_receiver = func + .sig() + .inputs + .iter() + .any(|arg| matches!(arg, syn::FnArg::Receiver(_))); + let drop_first_typed = match self.inner.attr_name { + AttrName::Method | AttrName::ClassMethod if !has_receiver => 1, + _ => 0, + }; + let call_flags = infer_native_call_flags(func.sig(), drop_first_typed); // Add #[allow(non_snake_case)] for setter methods like set___name__ let method_name = ident.to_string(); @@ -1031,6 +1041,7 @@ where doc, raw, attr_name: self.inner.attr_name, + call_flags, }); Ok(()) } @@ -1248,6 +1259,7 @@ struct MethodNurseryItem { raw: bool, doc: Option, attr_name: AttrName, + call_flags: TokenStream, } impl MethodNursery { @@ -1278,7 +1290,7 @@ impl ToTokens for MethodNursery { } else { quote! { None } }; - let flags = match &item.attr_name { + let binding_flags = match &item.attr_name { AttrName::Method => { quote! { rustpython_vm::function::PyMethodFlags::METHOD } } @@ -1290,6 +1302,12 @@ impl ToTokens for MethodNursery { } _ => unreachable!(), }; + let call_flags = &item.call_flags; + let flags = quote! { + rustpython_vm::function::PyMethodFlags::from_bits_retain( + (#binding_flags).bits() | (#call_flags).bits() + ) + }; // TODO: intern // let py_name = if py_name.starts_with("__") && py_name.ends_with("__") { // let name_ident = Ident::new(&py_name, ident.span()); diff --git a/crates/derive-impl/src/pymodule.rs b/crates/derive-impl/src/pymodule.rs index 775e6858520..b4b5535200c 100644 --- a/crates/derive-impl/src/pymodule.rs +++ b/crates/derive-impl/src/pymodule.rs @@ -2,8 +2,8 @@ use crate::error::Diagnostic; use crate::pystructseq::PyStructSequenceMeta; use crate::util::{ ALL_ALLOWED_NAMES, AttrItemMeta, AttributeExt, ClassItemMeta, ContentItem, ContentItemInner, - ErrorVec, ItemMeta, ItemNursery, ModuleItemMeta, SimpleItemMeta, format_doc, iter_use_idents, - pyclass_ident_and_attrs, text_signature, + ErrorVec, ItemMeta, ItemNursery, ModuleItemMeta, SimpleItemMeta, format_doc, + infer_native_call_flags, iter_use_idents, pyclass_ident_and_attrs, text_signature, }; use core::str::FromStr; use proc_macro2::{Delimiter, Group, TokenStream, TokenTree}; @@ -525,6 +525,7 @@ struct FunctionNurseryItem { cfgs: Vec, ident: Ident, doc: String, + call_flags: TokenStream, } impl FunctionNursery { @@ -550,7 +551,6 @@ struct ValidatedFunctionNursery(FunctionNursery); impl ToTokens for ValidatedFunctionNursery { fn to_tokens(&self, tokens: &mut TokenStream) { let mut inner_tokens = TokenStream::new(); - let flags = quote! { rustpython_vm::function::PyMethodFlags::empty() }; for item in &self.0.items { let ident = &item.ident; let cfgs = &item.cfgs; @@ -558,6 +558,7 @@ impl ToTokens for ValidatedFunctionNursery { let py_names = &item.py_names; let doc = &item.doc; let doc = quote!(Some(#doc)); + let flags = &item.call_flags; inner_tokens.extend(quote![ #( @@ -706,12 +707,14 @@ impl ModuleItem for FunctionItem { py_names } }; + let call_flags = infer_native_call_flags(func.sig(), 0); args.context.function_items.add_item(FunctionNurseryItem { ident: ident.to_owned(), py_names, cfgs: args.cfgs.to_vec(), doc, + call_flags, }); Ok(()) } diff --git a/crates/derive-impl/src/util.rs b/crates/derive-impl/src/util.rs index a4bf7e6a8fe..068bde9bccd 100644 --- a/crates/derive-impl/src/util.rs +++ b/crates/derive-impl/src/util.rs @@ -732,6 +732,77 @@ pub(crate) fn text_signature(sig: &Signature, name: &str) -> String { } } +pub(crate) fn infer_native_call_flags(sig: &Signature, drop_first_typed: usize) -> TokenStream { + // Best-effort mapping of Rust function signatures to CPython-style + // METH_* calling convention flags used by CALL specialization. + let mut typed_args = Vec::new(); + for arg in &sig.inputs { + let syn::FnArg::Typed(typed) = arg else { + continue; + }; + let ty_tokens = &typed.ty; + let ty = quote!(#ty_tokens).to_string().replace(' ', ""); + // `vm: &VirtualMachine` is not a Python-level argument. + if ty.starts_with('&') && ty.ends_with("VirtualMachine") { + continue; + } + typed_args.push(ty); + } + + let mut user_args = typed_args.into_iter(); + for _ in 0..drop_first_typed { + if user_args.next().is_none() { + break; + } + } + + let mut has_keywords = false; + let mut variable_arity = false; + let mut fixed_positional = 0usize; + + for ty in user_args { + let is_named = |name: &str| { + ty == name + || ty.starts_with(&format!("{name}<")) + || ty.contains(&format!("::{name}<")) + || ty.ends_with(&format!("::{name}")) + }; + + if is_named("FuncArgs") { + has_keywords = true; + variable_arity = true; + continue; + } + if is_named("KwArgs") { + has_keywords = true; + variable_arity = true; + continue; + } + if is_named("PosArgs") || is_named("OptionalArg") || is_named("OptionalOption") { + variable_arity = true; + continue; + } + fixed_positional += 1; + } + + if has_keywords { + quote! { + rustpython_vm::function::PyMethodFlags::from_bits_retain( + rustpython_vm::function::PyMethodFlags::FASTCALL.bits() + | rustpython_vm::function::PyMethodFlags::KEYWORDS.bits() + ) + } + } else if variable_arity { + quote! { rustpython_vm::function::PyMethodFlags::FASTCALL } + } else { + match fixed_positional { + 0 => quote! { rustpython_vm::function::PyMethodFlags::NOARGS }, + 1 => quote! { rustpython_vm::function::PyMethodFlags::O }, + _ => quote! { rustpython_vm::function::PyMethodFlags::FASTCALL }, + } + } +} + fn func_sig(sig: &Signature) -> String { sig.inputs .iter() diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index be0bd179364..8e8ac8a4975 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -4230,30 +4230,30 @@ impl ExecutingFrame<'_> { .is_some(); let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable = self.nth_value(nargs + 1); - if callable - .downcast_ref_if_exact::(vm) - .is_some() - && effective_nargs == 1 - { - let nargs_usize = nargs as usize; - let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); - let self_or_null = self.pop_value_opt(); - let callable = self.pop_value(); - let mut args_vec = Vec::with_capacity(effective_nargs as usize); - if let Some(self_val) = self_or_null { - args_vec.push(self_val); + if let Some(native) = callable.downcast_ref_if_exact::(vm) { + let call_conv = native.value.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS); + if call_conv == PyMethodFlags::O && effective_nargs == 1 { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let mut args_vec = Vec::with_capacity(effective_nargs as usize); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); + } + args_vec.extend(pos_args); + let result = + callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; + self.push_value(result); + return Ok(None); } - args_vec.extend(pos_args); - let result = - callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; - self.push_value(result); - return Ok(None); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallBuiltinFast => { let nargs: u32 = arg.into(); @@ -4264,29 +4264,33 @@ impl ExecutingFrame<'_> { .is_some(); let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable = self.nth_value(nargs + 1); - if callable - .downcast_ref_if_exact::(vm) - .is_some() - { - let nargs_usize = nargs as usize; - let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); - let self_or_null = self.pop_value_opt(); - let callable = self.pop_value(); - let mut args_vec = Vec::with_capacity(effective_nargs as usize); - if let Some(self_val) = self_or_null { - args_vec.push(self_val); + if let Some(native) = callable.downcast_ref_if_exact::(vm) { + let call_conv = native.value.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS); + if call_conv == PyMethodFlags::FASTCALL { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let mut args_vec = Vec::with_capacity(effective_nargs as usize); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); + } + args_vec.extend(pos_args); + let result = + callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; + self.push_value(result); + return Ok(None); } - args_vec.extend(pos_args); - let result = - callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; - self.push_value(result); - return Ok(None); } self.deoptimize(Instruction::Call { argc: Arg::marker(), }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallPyGeneral => { let instr_idx = self.lasti() as usize - 1; @@ -4418,6 +4422,13 @@ impl ExecutingFrame<'_> { }; if let Some(descr) = descr && descr.method.flags.contains(PyMethodFlags::METHOD) + && (descr.method.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS)) + == PyMethodFlags::NOARGS && self .localsplus .stack_index(stack_len - 1) @@ -4452,6 +4463,13 @@ impl ExecutingFrame<'_> { }; if let Some(descr) = descr && descr.method.flags.contains(PyMethodFlags::METHOD) + && (descr.method.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS)) + == PyMethodFlags::O && self .localsplus .stack_index(stack_len - 2) @@ -4488,6 +4506,13 @@ impl ExecutingFrame<'_> { }; if let Some(descr) = descr && descr.method.flags.contains(PyMethodFlags::METHOD) + && (descr.method.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS)) + == PyMethodFlags::FASTCALL && self .localsplus .stack_index(stack_len - nargs as usize - 1) @@ -4621,6 +4646,13 @@ impl ExecutingFrame<'_> { }; if let Some(descr) = descr && descr.method.flags.contains(PyMethodFlags::METHOD) + && (descr.method.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS)) + == (PyMethodFlags::FASTCALL | PyMethodFlags::KEYWORDS) && self .localsplus .stack_index(stack_len - nargs as usize - 1) @@ -4655,23 +4687,28 @@ impl ExecutingFrame<'_> { .is_some(); let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable = self.nth_value(nargs + 1); - if callable - .downcast_ref_if_exact::(vm) - .is_some() - { - let nargs_usize = nargs as usize; - let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); - let self_or_null = self.pop_value_opt(); - let callable = self.pop_value(); - let mut args_vec = Vec::with_capacity(effective_nargs as usize); - if let Some(self_val) = self_or_null { - args_vec.push(self_val); + if let Some(native) = callable.downcast_ref_if_exact::(vm) { + let call_conv = native.value.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS); + if call_conv == (PyMethodFlags::FASTCALL | PyMethodFlags::KEYWORDS) { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let mut args_vec = Vec::with_capacity(effective_nargs as usize); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); + } + args_vec.extend(pos_args); + let result = + callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; + self.push_value(result); + return Ok(None); } - args_vec.extend(pos_args); - let result = - callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; - self.push_value(result); - return Ok(None); } self.deoptimize(Instruction::Call { argc: Arg::marker(), @@ -7785,24 +7822,58 @@ impl ExecutingFrame<'_> { false }; - let new_op = if nargs == 1 - && descr.method.name == "append" - && descr.objclass.is(vm.ctx.types.list_type) - && next_is_pop_top - { - let callable_ptr = callable as *const PyObject as usize; - unsafe { - self.code - .instructions - .write_cache_ptr(cache_base + 1, callable_ptr); + let call_conv = descr.method.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS); + + let new_op = if call_conv == PyMethodFlags::NOARGS { + if nargs != 0 { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; } - Instruction::CallListAppend - } else { - match nargs { - 0 => Instruction::CallMethodDescriptorNoargs, - 1 => Instruction::CallMethodDescriptorO, - _ => Instruction::CallMethodDescriptorFast, + Instruction::CallMethodDescriptorNoargs + } else if call_conv == PyMethodFlags::O { + if nargs != 1 { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + if descr.method.name == "append" + && descr.objclass.is(vm.ctx.types.list_type) + && next_is_pop_top + { + let callable_ptr = callable as *const PyObject as usize; + unsafe { + self.code + .instructions + .write_cache_ptr(cache_base + 1, callable_ptr); + } + Instruction::CallListAppend + } else { + Instruction::CallMethodDescriptorO } + } else if call_conv == PyMethodFlags::FASTCALL { + Instruction::CallMethodDescriptorFast + } else if call_conv == (PyMethodFlags::FASTCALL | PyMethodFlags::KEYWORDS) { + Instruction::CallMethodDescriptorFastWithKeywords + } else { + Instruction::CallNonPyGeneral }; self.specialize_at(instr_idx, cache_base, new_op); return; @@ -7812,23 +7883,47 @@ impl ExecutingFrame<'_> { if let Some(native) = callable.downcast_ref_if_exact::(vm) { let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable_ptr = callable as *const PyObject as usize; - let new_op = if native.zelf.is_none() - && native.value.name == "len" - && native.module.is_some_and(|m| m.as_str() == "builtins") - && nargs == 1 - && effective_nargs == 1 - { - Instruction::CallLen - } else if native.zelf.is_none() - && native.value.name == "isinstance" - && native.module.is_some_and(|m| m.as_str() == "builtins") - && effective_nargs == 2 - { - Instruction::CallIsinstance - } else if effective_nargs == 1 { - Instruction::CallBuiltinO + let call_conv = native.value.flags + & (PyMethodFlags::VARARGS + | PyMethodFlags::FASTCALL + | PyMethodFlags::NOARGS + | PyMethodFlags::O + | PyMethodFlags::KEYWORDS); + let new_op = if call_conv == PyMethodFlags::O { + if effective_nargs != 1 { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + if native.zelf.is_none() + && native.value.name == "len" + && native.module.is_some_and(|m| m.as_str() == "builtins") + && nargs == 1 + { + Instruction::CallLen + } else { + Instruction::CallBuiltinO + } + } else if call_conv == PyMethodFlags::FASTCALL { + if native.zelf.is_none() + && native.value.name == "isinstance" + && native.module.is_some_and(|m| m.as_str() == "builtins") + && effective_nargs == 2 + { + Instruction::CallIsinstance + } else { + Instruction::CallBuiltinFast + } + } else if call_conv == (PyMethodFlags::FASTCALL | PyMethodFlags::KEYWORDS) { + Instruction::CallBuiltinFastWithKeywords } else { - Instruction::CallBuiltinFast + Instruction::CallNonPyGeneral }; if matches!(new_op, Instruction::CallLen | Instruction::CallIsinstance) { unsafe { diff --git a/crates/vm/src/function/method.rs b/crates/vm/src/function/method.rs index 52624cbbf86..211f7e3adc5 100644 --- a/crates/vm/src/function/method.rs +++ b/crates/vm/src/function/method.rs @@ -12,11 +12,11 @@ bitflags::bitflags! { // METH_XXX flags in CPython #[derive(Copy, Clone, Debug, PartialEq)] pub struct PyMethodFlags: u32 { - // const VARARGS = 0x0001; - // const KEYWORDS = 0x0002; + const VARARGS = 0x0001; + const KEYWORDS = 0x0002; // METH_NOARGS and METH_O must not be combined with the flags above. - // const NOARGS = 0x0004; - // const O = 0x0008; + const NOARGS = 0x0004; + const O = 0x0008; // METH_CLASS and METH_STATIC are a little different; these control // the construction of methods for a class. These cannot be used for @@ -31,7 +31,7 @@ bitflags::bitflags! { // const COEXIST = 0x0040; // if not Py_LIMITED_API - // const FASTCALL = 0x0080; + const FASTCALL = 0x0080; // This bit is preserved for Stackless Python // const STACKLESS = 0x0100; From c7d095a3ac8fdcbfcf6a40d694ab52392f31416c Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 23:06:19 +0900 Subject: [PATCH 17/17] vm: check use_tracing in eval_frame_active, add SendGen send_none - Implement specialization_eval_frame_active to check vm.use_tracing so specializations are skipped when tracing/profiling is active - Add send_none fastpath in SendGen handler for the common None case --- crates/vm/src/frame.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 8e8ac8a4975..47d583b578c 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3203,7 +3203,12 @@ impl ExecutingFrame<'_> { if can_fast_send { let receiver = self.top_value(); let coro = self.builtin_coro(receiver).unwrap(); - match coro.send(receiver, val, vm)? { + let ret = if vm.is_none(&val) { + coro.send_none(receiver, vm)? + } else { + coro.send(receiver, val, vm)? + }; + match ret { PyIterReturn::Return(value) => { self.push_value(value); return Ok(None); @@ -8310,8 +8315,8 @@ impl ExecutingFrame<'_> { } #[inline] - fn specialization_eval_frame_active(&self, _vm: &VirtualMachine) -> bool { - false + fn specialization_eval_frame_active(&self, vm: &VirtualMachine) -> bool { + vm.use_tracing.get() } #[inline]