PHP的zend_string结构:
struct _zend_string { zend_refcounted_h gc; zend_ulong h; /* hash value 字符串对应的hash值*/ size_t len; char val[1]; };会看到有一个zend_ulong h;的hash值。为什么要增加一个hash值呢?它的作用是啥? 这是因为PHP中大量的结构体都是基于Hashtable实现的, 增删改查Hashtable的操作占据了大量的CPU时间, 而字符串要查找首先要求它的Hash值, 理论上我们完全可以把一个字符串的Hash值计算好以后, 就存下来, 避免再次计算等等。
为了提高性能尽最大的努力在优化。
len和val[1]就可以表示一个字符串,字符串内容保存在val中,长度用len表示。再获取字符串的时候就在val取len长度即可。 C语言字符串的结尾是\0,就是非二进制安全的。 PHP的设计是二进制安全的设计,是按长度取字符串,不关心内容是否有\0。
gc是垃圾回收的使用。引用计数:
typedef struct _zend_refcounted_h { uint32_t refcount; /* reference counter 32-bit */ union { uint32_t type_info; } u; } zend_refcounted_h;准备一个string.php文件:
<?php $a = 'hello world!'; echo $a; $b = time(). '-time'; echo $b; $c = $b; echo $b; echo $c; $c = 'change'; echo $b; echo $c;然后调试看看PHP内部执行过程:
gdb php (gdb) b ZEND_ECHO_SPEC_CV_HANDLER #打个断点 (gdb) r string.php #运行php文件 (gdb) n 33117 z = _get_zval_ptr_cv_undef(opline->op1.var EXECUTE_DATA_CC); (gdb) n 33119 if (Z_TYPE_P(z) == IS_STRING) { #第一个字符串 $a = 'hello world!'; (gdb) p z $2 = (zval *) 0x7ffff601e080 (gdb) p *z $3 = {value = {lval = 140737321016448, dval = 6.9533475401958222e-310, counted = 0x7ffff6069c80, str = 0x7ffff6069c80, arr = 0x7ffff6069c80, obj = 0x7ffff6069c80, res = 0x7ffff6069c80, ref = 0x7ffff6069c80, ast = 0x7ffff6069c80, zv = 0x7ffff6069c80, ptr = 0x7ffff6069c80, ce = 0x7ffff6069c80, func = 0x7ffff6069c80, ww = {w1 = 4127628416, w2 = 32767}}, u1 = {v = {type = 6 '\006', type_flags = 0 '\000', const_flags = 0 '\000', reserved = 0 '\000'}, type_info = 6}, u2 = {next = 0, cache_slot = 0, lineno = 0, num_args = 0, fe_pos = 0, fe_iter_idx = 0, access_flags = 0, property_guard = 0, extra = 0}} (gdb) p $2.value.str $4 = (zend_string *) 0x7ffff6069c80 (gdb) p *$2.value.str #打印string内容 可以看到refcount = 0, 常量字符串的refcount等于0, 用flags来区分 $5 = {gc = {refcount = 0, u = {v = {type = 6 '\006', flags = 2 '\002', gc_info = 0}, type_info = 518}}, h = 15212097803322581250, len = 12, val = "h"} (gdb) p *$2.value.str.val@12 #打印val值 $6 = "hello world!" #接着看下一个变量 $b = time(). '-time'; (gdb) c Continuing. hello world! Breakpoint 1, ZEND_ECHO_SPEC_CV_HANDLER (execute_data=0x7ffff601e030) at /root/php-7.2.31/Zend/zend_vm_execute.h:33112 33112 USE_OPLINE (gdb) n 33117 z = _get_zval_ptr_cv_undef(opline->op1.var EXECUTE_DATA_CC); (gdb) n 33119 if (Z_TYPE_P(z) == IS_STRING) { (gdb) p z $11 = (zval *) 0x7ffff601e090 (gdb) p *z $12 = {value = {lval = 140737321016608, dval = 6.9533475402037272e-310, counted = 0x7ffff6069d20, str = 0x7ffff6069d20, arr = 0x7ffff6069d20, obj = 0x7ffff6069d20, res = 0x7ffff6069d20, ref = 0x7ffff6069d20, ast = 0x7ffff6069d20, zv = 0x7ffff6069d20, ptr = 0x7ffff6069d20, ce = 0x7ffff6069d20, func = 0x7ffff6069d20, ww = {w1 = 4127628576, w2 = 32767}}, u1 = {v = {type = 6 '\006', type_flags = 20 '\024', const_flags = 0 '\000', reserved = 0 '\000'}, type_info = 5126}, u2 = {next = 0, cache_slot = 0, lineno = 0, num_args = 0, fe_pos = 0, fe_iter_idx = 0, access_flags = 0, property_guard = 0, extra = 0}} (gdb) p *z.value.str #变量字符串refcount = 1, $13 = {gc = {refcount = 1, u = {v = {type = 6 '\006', flags = 0 '\000', gc_info = 0}, type_info = 6}}, h = 0, len = 15, val = "1"} (gdb) p *z.value.str.val@15 $14 = "1584764535-time" #接着看 $c = $b; (gdb) c Continuing. 1584764535-time Breakpoint 1, ZEND_ECHO_SPEC_CV_HANDLER (execute_data=0x7ffff601e030) at /root/php-7.2.31/Zend/zend_vm_execute.h:33112 33112 USE_OPLINE (gdb) n 33117 z = _get_zval_ptr_cv_undef(opline->op1.var EXECUTE_DATA_CC); (gdb) n 33119 if (Z_TYPE_P(z) == IS_STRING) { (gdb) p z $15 = (zval *) 0x7ffff601e090 #地址跟$b的一致 (gdb) p *z $16 = {value = {lval = 140737321016608, dval = 6.9533475402037272e-310, counted = 0x7ffff6069d20, str = 0x7ffff6069d20, arr = 0x7ffff6069d20, obj = 0x7ffff6069d20, res = 0x7ffff6069d20, ref = 0x7ffff6069d20, ast = 0x7ffff6069d20, zv = 0x7ffff6069d20, ptr = 0x7ffff6069d20, ce = 0x7ffff6069d20, func = 0x7ffff6069d20, ww = {w1 = 4127628576, w2 = 32767}}, u1 = {v = {type = 6 '\006', type_flags = 20 '\024', const_flags = 0 '\000', reserved = 0 '\000'}, type_info = 5126}, u2 = {next = 0, cache_slot = 0, lineno = 0, num_args = 0, fe_pos = 0, fe_iter_idx = 0, access_flags = 0, property_guard = 0, extra = 0}} (gdb) p *z.value.str #这里可以看到 refcount = 2 $17 = {gc = {refcount = 2, u = {v = {type = 6 '\006', flags = 0 '\000', gc_info = 0}, type_info = 6}}, h = 0, len = 15, val = "1"} # 当$c重新赋值后 $c = 'change'; 再看看$b $c的情况: (gdb) c Continuing. 1584764535-time Breakpoint 1, ZEND_ECHO_SPEC_CV_HANDLER (execute_data=0x7ffff601e030) at /root/php-7.2.31/Zend/zend_vm_execute.h:33112 33112 USE_OPLINE (gdb) n 33117 z = _get_zval_ptr_cv_undef(opline->op1.var EXECUTE_DATA_CC); (gdb) n 33119 if (Z_TYPE_P(z) == IS_STRING) { (gdb) p *z.value.str #refcount减了1 $23 = {gc = {refcount = 1, u = {v = {type = 6 '\006', flags = 0 '\000', gc_info = 0}, type_info = 6}}, h = 0, len = 15, val = "1"} (gdb) p *z.value.str.val@15 $24 = "1584764535-time" (gdb) c Continuing. 1584764535-time Breakpoint 1, ZEND_ECHO_SPEC_CV_HANDLER (execute_data=0x7ffff601e030) at /root/php-7.2.31/Zend/zend_vm_execute.h:33112 33112 USE_OPLINE (gdb) n 33117 z = _get_zval_ptr_cv_undef(opline->op1.var EXECUTE_DATA_CC); (gdb) n 33119 if (Z_TYPE_P(z) == IS_STRING) { (gdb) p z # $c地址改变了 写时复制 $25 = (zval *) 0x7ffff601e0a0 (gdb) p *z $26 = {value = {lval = 140737320987200, dval = 6.953347538750779e-310, counted = 0x7ffff6062a40, str = 0x7ffff6062a40, arr = 0x7ffff6062a40, obj = 0x7ffff6062a40, res = 0x7ffff6062a40, ref = 0x7ffff6062a40, ast = 0x7ffff6062a40, zv = 0x7ffff6062a40, ptr = 0x7ffff6062a40, ce = 0x7ffff6062a40, func = 0x7ffff6062a40, ww = {w1 = 4127599168, w2 = 32767}}, u1 = {v = {type = 6 '\006', type_flags = 0 '\000', const_flags = 0 '\000', reserved = 0 '\000'}, type_info = 6}, u2 = {next = 0, cache_slot = 0, lineno = 0, num_args = 0, fe_pos = 0, fe_iter_idx = 0, access_flags = 0, property_guard = 0, extra = 0}} (gdb) p *z.value.str $27 = {gc = {refcount = 0, u = {v = {type = 6 '\006', flags = 2 '\002', gc_info = 0}, type_info = 518}}, h = 9223378990245265867, len = 6, val = "c"} (gdb) p *z.value.str.val@6 $28 = "change"写时复制(Copy on Write,也缩写为COW)的应用场景非常多, 比如Linux中对进程复制中内存使用的优化,在各种编程语言中,如C++的STL等等中均有类似的应用。 COW是常用的优化手段,可以归类于:资源延迟分配。只有在真正需要使用资源时才占用资源, 写时复制通常能减少资源的占用。