From a1daf3d9d2328891b5a362e6efc0781d670585b8 Mon Sep 17 00:00:00 2001 From: PurHur Date: Mon, 18 May 2026 19:30:53 +0000 Subject: [PATCH] Implement string offset read/write for VM and JIT (#198) Add writable string byte offsets in the VM, LLVM JIT helpers for dim fetch/assign, and compliance PHPT cases for ASCII $str[$i] access. Co-authored-by: Cursor --- lib/JIT.php | 29 +++++++ lib/JIT/StringOffsetHelper.php | 52 +++++++++++++ lib/JIT/Variable.php | 7 +- lib/VM.php | 4 +- lib/VM/Variable.php | 75 +++++++++++++++++++ .../cases/language/string_offset.phpt | 14 ++++ .../cases/language/string_offset_jit.phpt | 11 +++ 7 files changed, 190 insertions(+), 2 deletions(-) create mode 100644 lib/JIT/StringOffsetHelper.php create mode 100644 test/compliance/cases/language/string_offset.phpt create mode 100644 test/compliance/cases/language/string_offset_jit.phpt diff --git a/lib/JIT.php b/lib/JIT.php index c2b2ab78..f37d1097 100644 --- a/lib/JIT.php +++ b/lib/JIT.php @@ -191,6 +191,15 @@ private function compileBlockInternal( $dimOp = $block->getOperand($op->arg3); $dim = $this->context->getVariableFromOp($dimOp); $resultOp = $block->getOperand($op->arg1); + if ($value->type === Variable::TYPE_STRING) { + $charPtr = StringOffsetHelper::dimFetch( + $this->context, + $value->value, + $dim + ); + $this->context->makeVariableFromValueOp($charPtr, $resultOp); + break; + } if ($value->type === Variable::TYPE_HASHTABLE) { $this->assignOperand( $resultOp, @@ -314,6 +323,21 @@ private function compileBlockInternal( JIT\ValueEchoHelper::echo($this->context, $arg->value); break; case Variable::TYPE_STRING: + if ($arg->kind === Variable::KIND_VALUE + && 'i8*' === $this->context->getStringFromType($arg->value->typeOf()) + ) { + $byte = $this->context->builder->load($arg->value); + $fmt = $this->context->builder->pointerCast( + $this->context->constantFromString('%c'), + $this->context->getTypeFromString('char*') + ); + $this->context->builder->call( + $this->context->lookupFunction('printf'), + $fmt, + $byte + ); + break; + } $argValue = $this->context->helper->loadValue($arg); $fmt = $this->context->builder->pointerCast( $this->context->constantFromString("%.*s"), @@ -565,6 +589,11 @@ private function assignOperand(Operand $result, Variable $value): void { return; } $result = $this->context->getVariableFromOp($result); + if ($result->kind === Variable::KIND_VALUE && $result->type === Variable::TYPE_STRING) { + StringOffsetHelper::dimAssign($this->context, $result->value, $value); + + return; + } if ($result->kind !== Variable::KIND_VARIABLE) { throw new \LogicException("Cannot assign to a value"); } diff --git a/lib/JIT/StringOffsetHelper.php b/lib/JIT/StringOffsetHelper.php new file mode 100644 index 00000000..0575d278 --- /dev/null +++ b/lib/JIT/StringOffsetHelper.php @@ -0,0 +1,52 @@ +builder->load($strSlot); + $map = $context->structFieldMap['__string__']; + $chars = $context->builder->structGep($str, $map['value']); + $offset = $context->builder->truncOrBitCast( + $context->helper->loadValue($dim), + $context->getTypeFromString('size_t') + ); + + return $context->builder->gep($chars, $offset); + } + + public static function dimAssign(Context $context, PHPLLVM\Value $charPtr, Variable $value): void + { + $byte = self::assignByte($context, $value); + $context->builder->store($byte, $charPtr); + } + + private static function assignByte(Context $context, Variable $value): PHPLLVM\Value + { + $i8 = $context->getTypeFromString('int8'); + switch ($value->type) { + case Variable::TYPE_NATIVE_LONG: + $long = $context->helper->loadValue($value); + $trunc = $context->builder->truncOrBitCast($long, $i8); + + return $trunc; + case Variable::TYPE_STRING: + $str = $context->helper->loadValue($value); + $map = $context->structFieldMap['__string__']; + $chars = $context->builder->structGep($str, $map['value']); + + return $context->builder->load($chars); + default: + throw new \LogicException( + 'String offset assignment supports int or string RHS in JIT (got type ' . $value->type . ')' + ); + } + } +} diff --git a/lib/JIT/Variable.php b/lib/JIT/Variable.php index 95047be1..e3909418 100755 --- a/lib/JIT/Variable.php +++ b/lib/JIT/Variable.php @@ -365,7 +365,12 @@ public function toString(\gcc_jit_block_ptr $block): Variable { public function dimFetch(self $dim, ?Type $expectedType = null): Variable { switch ($this->type) { case self::TYPE_STRING: - $ptr = $this->context->type->string->dimFetch($this->value, $dim->value); + $ptr = StringOffsetHelper::dimFetch( + $this->context, + $this->value, + $dim + ); + return new Variable( $this->context, self::TYPE_STRING, diff --git a/lib/VM.php b/lib/VM.php index 76d65c35..dbd72a0a 100755 --- a/lib/VM.php +++ b/lib/VM.php @@ -66,7 +66,9 @@ public function run(Block $block): int { } $arg3 = $frame->scope[$op->arg3]; if ($container->type === Variable::TYPE_STRING) { - $arg1->string($container->toString()[$arg3->toInt()]); + $offset = new Variable(Variable::TYPE_STRING_OFFSET); + $offset->stringOffset($container, $arg3->toInt()); + $arg1->indirect($offset); } elseif ($container->type === Variable::TYPE_ARRAY) { $arg1->indirect($container->toArray()->findVariable($arg3, false)); } else { diff --git a/lib/VM/Variable.php b/lib/VM/Variable.php index 3f454147..ab796f06 100755 --- a/lib/VM/Variable.php +++ b/lib/VM/Variable.php @@ -22,6 +22,8 @@ final class Variable { const TYPE_OBJECT = 5; const TYPE_ARRAY = 6; const TYPE_INDIRECT = 7; + /** Writable single-byte view of a parent string (Zend-style $str[$i]). */ + const TYPE_STRING_OFFSET = 8; const NUMERIC = self::TYPE_INTEGER | self::TYPE_FLOAT; @@ -35,6 +37,8 @@ final class Variable { private ObjectEntry $object; private Variable $indirect; private HashTable $array; + private Variable $stringOffsetParent; + private int $stringOffsetIndex; public int $next = -1; @@ -231,6 +235,8 @@ public function toString(): string { return $this->bool ? '1' : ''; case self::TYPE_INDIRECT: return $this->indirect->toString(); + case self::TYPE_STRING_OFFSET: + return $this->readStringOffset(); case self::TYPE_ARRAY: // todo: raise notice return 'Array'; @@ -267,6 +273,16 @@ public function reset(): void { unset($this->bool); unset($this->object); unset($this->indirect); + unset($this->stringOffsetParent); + unset($this->stringOffsetIndex); + } + + public function stringOffset(Variable $parent, int $index): void + { + $this->reset(); + $this->type = self::TYPE_STRING_OFFSET; + $this->stringOffsetParent = $parent; + $this->stringOffsetIndex = $index; } public function castFrom(int $type, self $var) { @@ -307,6 +323,11 @@ public function copyFrom(self $var): void { // destroy the indirection $var = $var->indirect; } + if ($this->type === self::TYPE_STRING_OFFSET) { + $this->writeStringOffset($var); + + return; + } switch ($var->type) { case self::TYPE_NULL: $this->null(); @@ -643,6 +664,60 @@ public function unaryOp(int $opCode, Variable $expr): void { } throw new \LogicException("UnaryOp $opCode not implemented for type $expr->type"); } + + private function readStringOffset(): string + { + $parent = $this->stringOffsetParent->resolveIndirect(); + if ($parent->type !== self::TYPE_STRING) { + throw new \LogicException('String offset parent is not a string'); + } + $str = $parent->string; + $index = $this->stringOffsetIndex; + if ($index < 0 || $index >= strlen($str)) { + return ''; + } + + return $str[$index]; + } + + private function writeStringOffset(self $value): void + { + $parent = $this->stringOffsetParent->resolveIndirect(); + if ($parent->type !== self::TYPE_STRING) { + throw new \LogicException('String offset parent is not a string'); + } + $str = $parent->string; + $index = $this->stringOffsetIndex; + if ($index < 0) { + throw new \LogicException('Illegal string offset'); + } + $byte = self::byteFromAssignValue($value); + $len = strlen($str); + if ($index >= $len) { + $str .= str_repeat("\0", $index - $len + 1); + } + $str[$index] = $byte; + $parent->string($str); + } + + private static function byteFromAssignValue(self $value): string + { + $value = $value->resolveIndirect(); + switch ($value->type) { + case self::TYPE_STRING: + $s = $value->string; + + return '' === $s ? '' : $s[0]; + case self::TYPE_INTEGER: + return chr($value->integer & 0xff); + case self::TYPE_NULL: + return "\0"; + default: + $s = $value->toString(); + + return '' === $s ? '' : $s[0]; + } + } } const TYPE_PAIR_INTEGER_INTEGER = (Variable::TYPE_INTEGER << 8) | Variable::TYPE_INTEGER; diff --git a/test/compliance/cases/language/string_offset.phpt b/test/compliance/cases/language/string_offset.phpt new file mode 100644 index 00000000..4a78a183 --- /dev/null +++ b/test/compliance/cases/language/string_offset.phpt @@ -0,0 +1,14 @@ +--TEST-- +String offset read and assignment (ASCII bytes) +--FILE-- +