通过v8 0.1.5源码分析js的编译、执行过程

theanarkh

发布于 2020-02-18 12:44:39

2.3K0

发布于 2020-02-18 12:44:39

文章被收录于专栏：原创分享

我们看使用v8执行一段js代码开始，分析整个流程。

    v8::Isolate::Scope isolate_scope(isolate);

    // Create a stack-allocated handle scope.
    v8::HandleScope handle_scope(isolate);

    // Create a new context.
    v8::Local<v8::Context> context = v8::Context::New(isolate);

    // Enter the context for compiling and running the hello world script.
    v8::Context::Scope context_scope(context);

    // Create a string containing the JavaScript source code.
    v8::Local<v8::String> source = v8::String::NewFromUtf8(isolate, "'Hello' + ', World!'", v8::NewStringType::kNormal).ToLocalChecked();

    // Compile the source code.
    v8::Local<v8::Script> script = v8::Script::Compile(context, source).ToLocalChecked();

    // Run the script to get the result.
    v8::Local<v8::Value> result = script->Run(context).ToLocalChecked();

我们主要关注Compile和Run这两个函数。这两个函数都属于Script这个类，我们看看定义。

class Script {
 public:

  static Local<Script> Compile(Handle<String> source,ScriptOrigin* origin = NULL,ScriptData* pre_data = NULL);

  Local<Value> Run();
};

实现（省略不重要的代码）

// OpenHandle和ToLocal是v8用来转换内部类和外部类的工具。比如v8对外提供的类是Object，这个只是一个对外的简单接口，对外的实现是JSObject类，即Object类最终是使用JSObject这个类的功能
Local<Script> Script::Compile(v8::Handle<String> source,v8::ScriptOrigin* origin,v8::ScriptData* script_data) {

  i::Handle<i::String> str = Utils::OpenHandle(*source);
  i::Handle<i::JSFunction> boilerplate = i::Compiler::Compile(str,name_obj,line_offset,column_offset,NULL,pre_data);
  i::Handle<i::JSFunction> result = i::Factory::NewFunctionFromBoilerplate(boilerplate, i::Top::global_context());
  return Local<Script>(ToApi<Script>(result));
}

Local<Value> Script::Run() {

  i::Object* raw_result = NULL;
  {
    HandleScope scope;
    i::Handle<i::JSFunction> fun = Utils::OpenHandle(this);
    i::Handle<i::Object> global(i::Top::context()->global());
    i::Handle<i::Object> result = i::Execution::Call(fun, global, 0, NULL, &has_pending_exception);
    raw_result = *result;
  }
  i::Handle<i::Object> result(raw_result);
  return Utils::ToLocal(result);
}

对于一个js脚步，首先用Compile函数进行编译，然后用Run函数执行。我们从Compile函数开始，看一下整个编译的过程。

Handle<JSFunction> Compiler::Compile(Handle<String> source,
                                     Handle<String> script_name,
                                     int line_offset, int column_offset,
                                     v8::Extension* extension,
                                     ScriptDataImpl* input_pre_data) {

  // The VM is in the COMPILER state until exiting this function.
  VMState state(COMPILER);

  // Create a script object describing the script to be compiled.
  Handle<Script> script = Factory::NewScript(source);

  Handle<JSFunction> result = MakeFunction(true, false, script, extension, pre_data);

  return result;
}

我们看到Compile函数比较简单，主要是以源码字符串为参数，新建一个Script对象，然后调用了MakeFunction。

static Handle<JSFunction> MakeFunction(bool is_global,
                                       bool is_eval,
                                       Handle<Script> script,
                                       v8::Extension* extension,
                                       ScriptDataImpl* pre_data) {

  // Build AST.
  FunctionLiteral* lit = MakeAST(is_global, script, extension, pre_data);

  // Compile the code.
  Handle<Code> code = MakeCode(lit, script, is_eval);

  // Allocate function.
  Handle<JSFunction> fun = Factory::NewFunctionBoilerplate(lit->name(),
                                      lit->materialized_literal_count(),
                                      code);

  return fun;
}

MakeFunction函数主要是调用MakeAST函数构造ast树，然后调用MakeCode生成二进制代码。我们先看MakeAST。

FunctionLiteral* MakeAST(bool compile_in_global_context,
                         Handle<Script> script,
                         v8::Extension* extension,
                         ScriptDataImpl* pre_data) {

  AstBuildingParser parser(script, allow_natives_syntax, extension, pre_data);
  Handle<String> source = Handle<String>(String::cast(script->source()));
  SafeStringInputBuffer input(source.location());
  FunctionLiteral* result = parser.ParseProgram(source, &input, compile_in_global_context);

  return result;
}

MakeAST函数新建一个parser，然后调用他的ParseProgram开始进行代码的解析。

FunctionLiteral* Parser::ParseProgram(Handle<String> source,
                                      unibrow::CharacterStream* stream,
                                      bool in_global_context) {
  // 初始化词法解析的扫描器
  scanner_.Init(source, stream, 0);

  Scope::Type type = in_global_context ? Scope::GLOBAL_SCOPE : Scope::EVAL_SCOPE;
  Handle<String> no_name = factory()->EmptySymbol();

  FunctionLiteral* result = NULL;
  { 
    // 新建一个scope
    Scope* scope = factory()->NewScope(top_scope_, type, inside_with());
    // 设置解析器的当前作用域是scope，lexical_scope对应保存了之前的scope，析构的时候恢复
    LexicalScope lexical_scope(this, scope);
    TemporaryScope temp_scope(this);
    // 一个list，承载解析的结果
    ZoneListWrapper<Statement> body(16);
    bool ok = true;
    // 开始解析
    ParseSourceElements(&body, Token::EOS, &ok);
    if (ok) {
      // 返回一个FunctionLiteral对象
      result = NEW(FunctionLiteral(no_name, top_scope_,
                                   body.elements(),
                                   temp_scope.materialized_literal_count(),
                                   temp_scope.expected_property_count(),
                                   0, 0, source->length(), false));
    }
  }

  return result;
}

ParseProgram调用ParseSourceElements函数解析代码，body里保存解析的结果，最后生成一个FunctionLiteral对象返回。所以我们知道FunctionLiteral对象保存了解析的结果。接着看ParseSourceElements。

void* Parser::ParseSourceElements(ZoneListWrapper<Statement>* processor,
                                  int end_token,
                                  bool* ok) {
  // 设置解析器的target_stack_为scope的，析构的时候恢复
  TargetScope scope(this);

  while (peek() != end_token) {
    Statement* stat = ParseStatement(NULL, CHECK_OK);
    // 解析成功，加到list里
    if (stat && !stat->IsEmpty()) processor->Add(stat);
  }
  return 0;
}

ParseSourceElements就是遍历代码，调用ParseStatement解析，把解析结果存到一个list里。

Statement* Parser::ParseStatement(ZoneStringList* labels, bool* ok) {

  int statement_pos = scanner().peek_location().beg_pos;
  Statement* stmt = NULL;
  switch (peek()) {
    // 很多case，和语言特性相关的，比如if for等关键字的解析，只列下面几个
    case Token::CONST:  // fall through
    case Token::VAR:
      stmt = ParseVariableStatement(ok);
      break;
    case Token::IF:
      stmt = ParseIfStatement(labels, ok);
      break;

    case Token::DO:
      stmt = ParseDoStatement(labels, ok);
      break;

    case Token::WHILE:
      stmt = ParseWhileStatement(labels, ok);
      break;
    case Token::FUNCTION:
      return ParseFunctionDeclaration(ok);

    default:
      stmt = ParseExpressionOrLabelledStatement(labels, ok);
  }

  // Store the source position of the statement
  if (stmt != NULL) stmt->set_statement_pos(statement_pos);
  return stmt;
}

这里就是根据当前解析的token，调用不同的解析方法，比如if，变量声明，函数等。我们看一下变量声明，大致了解一下ast里有什么。

Block* Parser::ParseVariableStatement(bool* ok) {

  Expression* dummy; 
  Block* result = ParseVariableDeclarations(true, &dummy, CHECK_OK);
  ExpectSemicolon(CHECK_OK);
  return result;
}

Block* Parser::ParseVariableDeclarations(bool accept_IN,
                                         Expression** var,

  Variable::Mode mode = Variable::VAR;
  bool is_const = false;
  ...// 判断是不是常量

  Block* block = NEW(Block(NULL, 1, true));
  VariableProxy* last_var = NULL;  // the last variable declared
  int nvars = 0;  // the number of variables declared
  do {
    // Parse variable name.
    if (nvars > 0) Consume(Token::COMMA);
    // 变量名
    Handle<String> name = ParseIdentifier(CHECK_OK);
    // 在当前scope定义变量，即在scope对象里保存这个name的信息
    last_var = Declare(name, mode, NULL, is_const ,CHECK_OK);
    nvars++;

    Expression* value = NULL;
    int position = -1;

    // 初始化了，比如var a = 1;
    if (peek() == Token::ASSIGN) {
      Expect(Token::ASSIGN, CHECK_OK);
      position = scanner().location().beg_pos;
      // 解析初始化的值
      value = ParseAssignmentExpression(accept_IN, CHECK_OK);
    }
    // 有值
    if (value != NULL) {
      // last_var即表示变量名的对象
      Assignment* assignment = NEW(Assignment(op, last_var, value, position));
      if (block) {
        block->AddStatement(NEW(ExpressionStatement(assignment)));  
      }
    }
  } while (peek() == Token::COMMA);

  return block;
}

我们看上面对变量定义的解析过程，大致是 1 解析变量名。 2 在当前scope定义一个变量。名字等于1的解析结果 3 判断是否初始化了，即是否有值，有的话解析 4 有值则加到block里 5 返回这个block。 6 这个block又会挂载到ast树的父节点中。这时候的ast和scope链大概是。

在这里插入图片描述通过对整个代码的分析，最后就是整个树变得越来越大。ast树里有各种各样的节点，在ast.h里有定义。这样ast树就构造完成了。我们回到最开始的地方。

 // 开始解析
    ParseSourceElements(&body, Token::EOS, &ok);
    if (ok) {
      // 返回一个FunctionLiteral对象
      result = NEW(FunctionLiteral(no_name, top_scope_,
                                   body.elements(),
                                   temp_scope.materialized_literal_count(),
                                   temp_scope.expected_property_count(),
                                   0, 0, source->length(), false));
    }

调用ParseSourceElements构造完ast树后，调用方新建一个FunctionLiteral对象返回给上层。我们一直往上回溯。到MakeAST->MakeFunction。

FunctionLiteral* lit = MakeAST(is_global, script, extension, pre_data);
Handle<Code> code = MakeCode(lit, script, is_eval)

解析完ast后，以MakeAST返回的FunctionLiteral对象开始生成二进制代码。我们继续分析MakeCode。

static Handle<Code> MakeCode(FunctionLiteral* literal,
                             Handle<Script> script,
                             bool is_eval) {

  Scope* top = literal->scope();
  while (top->outer_scope() != NULL) top = top->outer_scope();
  top->AllocateVariables();

  // Generate code and return it.
  Handle<Code> result = CodeGenerator::MakeCode(literal, script, is_eval);
  return result;
}

生成代码涉及到平台，不同的平台对应不同的机器指令。我们以ia32为例。

Handle<Code> CodeGenerator::MakeCode(FunctionLiteral* fun, Handle<Script> script, bool is_eval) {
  Handle<Code> code = Ia32CodeGenerator::MakeCode(fun, script, is_eval);
  return code;
}

Handle<Code> Ia32CodeGenerator::MakeCode(FunctionLiteral* flit,
                                         Handle<Script> script,
                                         bool is_eval) {
  // Generate code.
  const int initial_buffer_size = 4 * KB;
  Ia32CodeGenerator cgen(initial_buffer_size, script, is_eval);

  cgen.GenCode(flit);
  CodeDesc desc;
  cgen.masm()->GetCode(&desc);

  ScopeInfo<> sinfo(flit->scope());
  Code::Flags flags = Code::ComputeFlags(Code::FUNCTION);
  Handle<Code> code = Factory::NewCode(desc, &sinfo, flags);

  Bootstrapper::AddFixup(*code, cgen.masm());
  return code;
}

主要是GenCode和GetCode这两个方法，GenCode是生成代码的，GetCode是把代码保存到desc变量里。我们看一下代码。

// 代码很多，只列出某些
void Ia32CodeGenerator::GenCode(FunctionLiteral* fun) {
  Scope* scope = fun->scope();
  ZoneList<Statement*>* body = fun->body();
  // 遍历ast树，生成二进制代码。
  VisitStatements(body);
}
// Ia32CodeGenerator继承Visitor类
void Visitor::VisitStatements(ZoneList<Statement*>* statements) {
  for (int i = 0; i < statements->length(); i++) {
    Visit(statements->at(i));
  }
}
// this是Ia32CodeGenerator类对象
void Visit(Node* node) { node->Accept(this); }

通过层层调用，最后执行node的Accept函数。就是ast树上的每个节点都定义了Accept方法来解析自己，生成代码的。所有节点的Accept函数是宏定义实现的。

  NODE_LIST(V) V(Block) V(Declaration) ...

  #define DECL_ACCEPT(type)                \
      void type::Accept(Visitor* v) {        \
        if (v->CheckStackOverflow()) return; \
        v->Visit##type(this);                \
      }
    NODE_LIST(DECL_ACCEPT)
  #undef DECL_ACCEPT

宏展开后形如。

// v是Ia32CodeGenerator类对象
void Block::Accept(Visitor* v) { 
    if (v->CheckStackOverflow()) return; v->VisitBlock(this); 
} 
void Declaration::Accept(Visitor* v) { 
    if (v->CheckStackOverflow()) return; v->VisitDeclaration(this); 
}
...

我们看待ast各个节点的Accept函数并不是自己实现的，直接调用对应的Visit函数，Visit函数也是根据不同平台有不同的实现。因为他是生成代码的函数。这里是Ia32CodeGenerator类实现，下面找一个看一下实现。

void Ia32CodeGenerator::VisitArrayLiteral(ArrayLiteral* node) {
  Comment cmnt(masm_, "[ ArrayLiteral");
  // Load the resulting object.
  Load(node->result());
  for (int i = 0; i < node->values()->length(); i++) {
    Expression* value = node->values()->at(i);

    if (value->AsLiteral() == NULL) {

      Load(value);

      // Get the value off the stack.
      __ pop(eax);
      // Fetch the object literal while leaving on the stack.
      __ mov(ecx, TOS);
      // Get the elements array.
      __ mov(ecx, FieldOperand(ecx, JSObject::kElementsOffset));

      // Write to the indexed properties array.
      int offset = i * kPointerSize + Array::kHeaderSize;
      __ mov(FieldOperand(ecx, offset), eax);

      // Update the write barrier for the array address.
      __ RecordWrite(ecx, offset, eax, ebx);
    }
  }
}

我们不需要看懂里面的意思，感受一下就行。我们看到里面是一些看起来像汇编的代码。比如push的实现

// pc是执行一个buffer的地址，不断往里写入机器指令
#define EMIT(x)  *pc_++ = (x)
void Assembler::push(const Immediate& x) {
  EnsureSpace ensure_space(this);
  last_pc_ = pc_;
  if (x.is_int8()) {
    EMIT(0x6a);
    EMIT(x.x_);
  } else {
    EMIT(0x68);
    emit(x);
  }
}

可以看到，就是不断地生成机器代码。遍历完ast树后，就得到一段二进制代码。我们往前回溯一下过程。下面再列一下开始遍历ast树生成代码的那段逻辑。

    cgen.GenCode(flit);
    CodeDesc desc;
      cgen.masm()->GetCode(&desc);
      return code;

GenCode生成代码后，保存到desc里。返回一个code对象，回到MakeFunction函数看剩余的代码。

Handle<Code> code = MakeCode(lit, script, is_eval);
Handle<JSFunction> fun = Factory::NewFunctionBoilerplate(lit->name(),
                                      lit->materialized_literal_count(),
                                      code);

继续回溯到Compiler函数。直接返回MakeFunction函数返回的result。编译结束。下面继续看Run函数。

Local<Value> Script::Run() {

  i::Object* raw_result = NULL;
  {
    HandleScope scope;
    i::Handle<i::JSFunction> fun = Utils::OpenHandle(this);
    i::Handle<i::Object> global(i::Top::context()->global());
    i::Handle<i::Object> result = i::Execution::Call(fun, global, 0, NULL, &has_pending_exception);
    raw_result = *result;
  }
  i::Handle<i::Object> result(raw_result);
  return Utils::ToLocal(result);
}

里面的this就是编译时返回的result。转成一个函数对象。我们看到主要是调用i::Execution::Call执行。

Handle<Object> Execution::Call(Handle<JSFunction> func,
                               Handle<Object> receiver,
                               int argc,
                               Object*** args,
                               bool* pending_exception) {
  return Invoke(false, func, receiver, argc, args, pending_exception);
}

static Handle<Object> Invoke(bool construct,
                             Handle<JSFunction> func,
                             Handle<Object> receiver,
                             int argc,
                             Object*** args,
                             bool* has_pending_exception) {

  // Entering JavaScript.
  VMState state(JS);
  // 函数指针
  typedef Object* (*JSEntryFunction)(
    byte* entry,
    Object* function,
    Object* receiver,
    int argc,
    Object*** args);

  Handle<Code> code;
  if (construct) {
    JSConstructEntryStub stub;
    code = stub.GetCode();
  } else {
    JSEntryStub stub;
    code = stub.GetCode();
  }

  {
    SaveContext save;
    NoHandleAllocation na;
    JSEntryFunction entry = FUNCTION_CAST<JSEntryFunction>(code->entry());

    // Call the function through the right JS entry stub.
    value = CALL_GENERATED_CODE(entry, func->code()->entry(), *func,
                                *receiver, argc, args);
  }

  return Handle<Object>(value);
}

#define CALL_GENERATED_CODE(entry, p0, p1, p2, p3, p4) entry(p0, p1, p2, p3, p4);

大概就是函数指针指向一段代码，直接执行。这就是Run函数的逻辑。