什么是PHP-Parser ?
PHP-Parser入门
use PhpParser\ParserFactory;
$parser = (new ParserFactory)->create(ParserFactory::PREFER_PHP7);
通过解析器的parse方法将PHP代码解析成抽象语法树:
<?php
use PhpParser\Error;
use PhpParser\ParserFactory;
require 'vendor/autoload.php';
$code = file_get_contents("./test.php");
$parser = (new ParserFactory)->create(ParserFactory::PREFER_PHP7);
try {
$ast = $parser->parse($code);
} catch (Error $error) {
echo "Parse error: {$error->getMessage()}\n";
}
//view.php
<?php
require 'vendor/autoload.php';
use PhpParser\Error;
use PhpParser\NodeDumper;
use PhpParser\ParserFactory;
//获取sample.php的代码内容
$code = file_get_contents('sample.php');
//初始化解析器
$parser = (new ParserFactory)->create(ParserFactory::PREFER_PHP7);
try {
//解析sample.php内容,转换为ast
$ast = $parser->parse($code);
} catch (Error $error) {
echo "Parse error: {$error->getMessage()}\n";
return;
}
$dumper = new NodeDumper;
//优化ast并dump
echo $dumper->dump($ast) . "\n";
<?php
$a = 'a'.'ssert';
$a($_POST['x']);
=======
array(
0: Stmt_Expression(
expr: Expr_Assign(
var: Expr_Variable(
name: a
)
expr: Expr_BinaryOp_Concat(
left: Scalar_String(
value: a
)
right: Scalar_String(
value: ssert
)
)
)
)
1: Stmt_Expression(
expr: Expr_FuncCall(
name: Expr_Variable(
name: a
)
args: array(
0: Arg(
name: null
value: Expr_ArrayDimFetch(
var: Expr_Variable(
name: _POST
)
dim: Scalar_String(
value: x
)
)
byRef: false
unpack: false
)
)
)
)
$prettyPrinter = new PrettyPrinter\Standard;
$prettyCode = $prettyPrinter->prettyPrintFile($ast);
echo $prettyCode;
“PhpParser\NodeVisitor“接口,该接口定义4个遍历方法:
//方法在遍历开始之前调用
public function beforeTraverse(array $nodes);
//在遍历子节点之前调用
public function enterNode(\PhpParser\Node $node);
//在离开当前节点时调用
public function leaveNode(\PhpParser\Node $node);
//在遍历之后调用一次
public function afterTraverse(array $nodes)
PHP-Parser实战
1.字符二元操作符还原
针对字符串的异或、拼接、与或非等操作进行还原,基础样本如下:
<?php
$a = 'a'.'s'.'s'.'e'.'r'.'t';
$a($_POST['x']);
?>
首先输出AST进行查看。
array(
0: Stmt_Expression(
expr: Expr_Assign(
var: Expr_Variable(
name: a
)
expr: Expr_BinaryOp_Concat(
left: Expr_BinaryOp_Concat(
left: Expr_BinaryOp_Concat(
left: Expr_BinaryOp_Concat(
left: Expr_BinaryOp_Concat(
left: Scalar_String(
value: a
)
right: Scalar_String(
value: s
)
)
right: Scalar_String(
value: s
)
)
right: Scalar_String(
value: e
)
)
right: Scalar_String(
value: r
)
)
right: Scalar_String(
value: t
)
)
)
)
1: Stmt_Expression(
expr: Expr_FuncCall(
name: Expr_Variable(
name: a
)
args: array(
0: Arg(
name: null
value: Expr_ArrayDimFetch(
var: Expr_Variable(
name: _POST
)
dim: Scalar_String(
value: x
)
)
byRef: false
unpack: false
)
)
)
)
)
class BinaryOpReducer extends NodeVisitorAbstract
{
public function leaveNode(Node $node) {
if ($node instanceof Node\Expr\BinaryOp\Concat && $node->left instanceof Node\Scalar\String_ && $node->right instanceof Node\Scalar\String_) {
return new PhpParser\Node\Scalar\String_($node->left->value . $node->right->value);
}
}
}
<?php
$a = 'assert';
$a($_POST['x']);
class Base64Reducer extends NodeVisitorAbstract
public function leaveNode(Node $node) {
if ($node instanceof Node\Scalar\String_) {
$name = $node->value;
return new Expr\FuncCall(
new Node\Name("base64_decode"),
[new Node\Arg(new Node\Scalar\String_(base64_encode($name)))]
);
}
}
}
<?php
$str = "Threatbook";
?>
--After parser:--
$str = base64_decode('VGhyZWF0Ym9vaw==');
代码如下:
// 变量重命名
class ReNameVariable extends NodeVisitorAbstract{
public $Count = 0;
public $NewName = [];
public function leaveNode(Node $node){
//判断Variable类型的节点
if ($node instanceof Node\Expr\Variable) {
//匹配不含字母数字的乱码变量
if (!preg_match('/^[a-zA-Z0-9_]+$/', $node->name)) {
//如果这个变量再次出现,使用已经有的替换值进行替换
if (in_array($node->name, array_keys($this->NewName))){
$new_var_name = str_replace($node->name, 'v_' . $this->NewName[$node->name], $node->name);
return (new Node\Expr\Variable($new_var_name));
}else{
//记录新的变量名到数组
$this->NewName[$node->name] = $this->Count++;
$new_var_name = str_replace($node->name, 'v_' . $this->NewName[$node->name], $node->name);
return (new Node\Expr\Variable($new_var_name));
}
}
return ;
}
}
可以看到原本的不可见变量名已经被重命名成了“v_“格式的变量。同时可以观察到“GLOBALS“变量的键名也是乱码字符,借鉴变量名重命名的思路对所有”GLOBALS“数组的键名进行重命名:
和上面不同的是我们恢复的是二维数组,所以要多包含一层判断:
class ReNameArrayKeyValue extends NodeVisitorAbstract{
private $Count = [];
private $NewName = [];
public function leaveNode(Node $node){
if ( $node instanceof Node\Expr\ArrayDimFetch && !($node->var instanceof Node\Expr\ArrayDimFetch) && !($node->dim instanceof Node\Expr\ArrayDimFetch) ) {
$key = $node->dim->value;
$name = $node->var->name;
if (!preg_match('/^[a-zA-Z0-9_]+$/', $key)) {
if ($this->Count[$name] !== null){
// 判断该数组当前键值
if ($this->NewName[$name][$key] !== null){
$new_key_name = str_replace($key, 'arr_' . $this->NewName[$name][$key], $key);
return new Node\Expr\ArrayDimFetch( new Node\Expr\Variable($name), new Node\Scalar\String_($new_key_name) );
}else{
// 未替换该键值的操作
$this->NewName[$name][$key] = $this->Count[$name]++;
$new_key_name = str_replace($key, 'arr_' . $this->NewName[$name][$key], $key);
return new Node\Expr\ArrayDimFetch( new Node\Expr\Variable($name), new Node\Scalar\String_($new_key_name) );
}
}else{
$this->NewName[$name] = [];
$this->Count[$name] = 0;
$this->NewName[$name][$key] = $this->Count[$name]++;
$new_key_name = str_replace($key, 'arr_' . $this->NewName[$name][$key], $key);
return new Node\Expr\ArrayDimFetch( new Node\Expr\Variable($name), new Node\Scalar\String_($new_key_name) );
}
}
return ;
}
}
}
class ArrayToConstant extends NodeVisitorAbstract
{
public $variableName = '';
public $constants = [];
public function enterNode(Node $node)
//unserialize(base64_decode(类型的调用
if ($node instanceof Node\Expr\Assign &&
$node->expr instanceof Node\Expr\FuncCall &&
$node->expr->name instanceof Node\Name &&
is_string($node->expr->name->parts[0]) &&
$node->expr->name->parts[0] == 'unserialize' &&
count($node->expr->args) === 1 &&
$node->expr->args[0] instanceof Node\Arg &&
$node->expr->args[0]->value instanceof Node\Expr\FuncCall &&
$node->expr->args[0]->value->name instanceof Node\Name &&
is_string($node->expr->args[0]->value->name->parts[0]) &&
$node->expr->args[0]->value->name->parts[0] == 'base64_decode'
) {
$string = $node->expr->args[0]->value->args[0]->value->value;
$array = unserialize(base64_decode($string));
$this->variableName = $node->var->name;
$this->constants = $array;
return new Node\Expr\Assign($node->var, Node\Scalar\LNumber::fromString("0"));
}else if(
//('unserialize')(('base64_decode')类型的调用
$node instanceof Node\Expr\Assign &&
$node->expr instanceof Node\Expr\FuncCall &&
$node->expr->name instanceof Node\Scalar\String_ &&
is_string($node->expr->name->value) &&
$node->expr->name->value == 'unserialize' &&
count($node->expr->args) === 1 &&
$node->expr->args[0] instanceof Node\Arg &&
$node->expr->args[0]->value instanceof Node\Expr\FuncCall &&
$node->expr->args[0]->value->name instanceof Node\Scalar\String_ &&
is_string($node->expr->args[0]->value->name->value) &&
$node->expr->args[0]->value->name->value == 'base64_decode')
{
$string = $node->expr->args[0]->value->args[0]->value->value;
$array = unserialize(base64_decode($string));
$this->variableName = $node->var->name;
$this->constants = $array;
return new Node\Expr\Assign($node->var, Node\Scalar\LNumber::fromString("0"));
}else{
return;
}
}
public function leaveNode(Node $node)
if ($this->_variableName === '') return;
if ($node instanceof Node\Expr\ArrayDimFetch && $node->var->name === $this->_variableName) {
$val = $this->constants[$node->dim->value];
//判断该 GLOBALS 值是否存在
if ($val === null){
return;
}
if (is_string($val)) {
return new Node\Scalar\String_($val);
} elseif (is_double($val)) {
return new Node\Scalar\DNumber($val);
} elseif (is_int($val)) {
return new Node\Scalar\LNumber($val);
} else {
return new Node\Expr\ConstFetch(new Node\Name\FullyQualified(json_encode($val)));
}
}
}
}
class ExpressionToNumber extends NodeVisitorAbstract
public function leaveNode(Node $node)
{
if ($node instanceof Node\Expr\BinaryOp\Plus &&
($node->left instanceof Node\Scalar\LNumber || $node->left instanceof Node\Scalar\String_ || $node->left instanceof Node\Expr\UnaryMinus) && $node->right instanceof Node\Expr\BinaryOp\Minus && ($node->right->left instanceof Node\Scalar\LNumber || $node->right->left instanceof Node\Scalar\String_) && ($node->right->right instanceof Node\Scalar\LNumber || $node->right->right instanceof Node\Scalar\String_)) {
if ($node->left instanceof Node\Expr\UnaryMinus) {
$a = -($node->left->expr->value);
} else {
$a = $node->left->value;
}
$b = $node->right->left->value;
$c = $node->right->right->value;
return new Node\Scalar\LNumber($a + $b - $c);
}
}
}
class ChrReducer extends NodeVisitorAbstract {
public function leaveNode(Node $node){
if ($node instanceof Node\Expr\FuncCall && is_string($node->name->value) && $node->name->value == 'chr' && count($node->args) === 1 && $node->args[0] instanceof Node\Arg && $node->args[0]->value instanceof Node\Scalar\LNumber
){
$char = $node->args[0]->value->value;
return new Node\Scalar\String_(chr($char));
}
}
}
class ConcatReducer extends NodeVisitorAbstract
{
public function leaveNode(Node $node)
if ($node instanceof Node\Expr\BinaryOp\Concat){
if ($node->left instanceof Node\Scalar\String_ && is_string($node->left->value) && $node->right instanceof Node\Scalar\String_ && is_string($node->right->value)){
return new Node\Scalar\String_($node->left->value . $node->right->value);
}
}
}
}
class Rot13Reducer extends NodeVisitorAbstract{
public function leaveNode(Node $node){
if ($node instanceof Node\Expr\FuncCall && $node->name instanceof Node\Scalar\String_ &&
is_string( $node->name->value ) &&
$node->name->value == 'str_rot13' &&
count( $node->args ) === 1 &&
$node->args[0] instanceof Node\Arg &&
$node->args[0]->value instanceof Node\Scalar\String_ &&
is_string($node->args[0]->value->value)
){
return new Node\Scalar\String_(str_rot13($node->args[0]->value->value));
}
}
}
结语
参考链接