Archive for category Computers and Internet

What Inception reminds me of?

Assassin Creed 2

1 Comment

Fork 科普

From Wikipedia,

In computing, when a process forks, it creates a
copy of itself. More generally, a fork in
a multithreading environment means that a thread of
execution is duplicated, creating a child thread from the parent
thread.

Under Unix and Unix-like operating systems,
the parent and the child processes can tell each other apart by
examining the return value of the fork() system call. In
the child process, the return value of fork() is 0, whereas
the return value in the parent process is the PID of the
newly-created child process.

The fork operation creates a separate address space for
the child. The child process has an exact copy of all the memory
segments of the parent process, though if copy-on-write semantics
are implemented actual physical memory may not be assigned (i.e.,
both processes may share the same physical memory segments for a
while). Both the parent and child processes possess the same code
segments, but execute independently of each other.

简单的说,
就是子进程把父进程
copy了一下,
另起门户。

那么具体是怎么实现的呢?
各位大牛可能要说了, 不就是
syscall么,
idsys_fork,
底层用do_fork实现。可是大家可曾想过,
这已经到了
kernel了, 算是bottom
half了吧, 那么用户空间怎么走到内核的呢?


我们来看个简单的程序

#include <stdio.h>   /* printf, stderr, fprintf */
#include <unistd.h>  /* _exit, fork */
#include <stdlib.h>  /* exit */
#include <errno.h>   /* errno */
 
int main(void)
{
   pid_t  pid;
 
   /* Output from both the child and the parent process
    * will be written to the standard output,
    * as they both run at the same time.
    */
   pid = fork();
   if (pid == 0)
   {
      /* Child process:
       * When fork() returns 0, we are in
       * the child process.
       * Here we count up to ten, one each second.
       */
      int j;
      for (j = 0; j < 10; j++)
      {
         printf("child: %dn", j);
         sleep(1);
      }
      _exit(0);  /* Note that we do not use exit() */
   }
   else if (pid > 0)
   { 
      /* Parent process:
       * When fork() returns a positive number, we are in the parent process
       * (the fork return value is the PID of the newly-created child process).
       * Again we count up to ten.
       */
      int i;
      for (i = 0; i < 10; i++)
      {
         printf("parent: %dn", i);
         sleep(1);
      }
      exit(0);
   }
   else
   {   
      /* Error:
       * When fork() returns a negative number, an error happened
       * (for example, number of processes reached the limit).
       */
      fprintf(stderr, "can't fork, error %dn", errno);
      exit(EXIT_FAILURE);
   }
}

这里可以看到fork
的定义在unistd.h中,
我们进去看看

/* Clone the calling process, creating
an exact copy.

Return -1 for errors, 0 to the new
process,

and the process ID of the new
process to the old process. */

extern __pid_t fork (void) __THROW;

#if defined __USE_BSD || defined
__USE_XOPEN_EXTENDED

/* Clone the calling process, but
without copying the whole address space.

The calling process is suspended
until the new process exits or is

replaced by a call to `execve’.
Return -1 for errors, 0 to the new process,

and the process ID of the new
process to the old process. */

extern __pid_t vfork (void) __THROW;

#endif

貌似跟到这里就跟丢了,
只好去
c库里面找找了,(鉴于我用的是Ubuntu
10 我们这里的c库也就是glibc
虽然可称得上最操蛋的
c库,
不过硬骨头啃完,还怕软的么
?)

# nm /usr/lib/libc.a | grep fork

nm: udiv_qrnnd.o: no symbols

nm: old_atexit.o: no symbols

U __unregister_atfork

nm: mp_clz_tab.o: no symbols

U __fork

U __linkin_atfork

000004cc b atfork_mem

000004e8 b atfork_recursive_cntr

00002080 t free_atfork

00004dc0 t malloc_atfork

fork.o:

00000000 W __fork

00000004 C __fork_generation_pointer

00000004 C __fork_handlers

U __fork_lock

00000000 T __libc_fork

00000000 W fork

vfork.o:

00000000 T __vfork

00000000 W vfork

nm: getopt_init.o: no symbols

nm: init-posix.o: no symbols

U __fork

U __vfork

U __fork

nm: lseek64.o: no symbols

nm: oldgetrlimit64.o: no symbols

U __fork

nm: internal_accept4.o: no symbols

register-atfork.o:

U __fork_handlers

00000000 B __fork_lock

00000000 T __linkin_atfork

00000030 T __register_atfork

00000020 b fork_handler_pool

unregister-atfork.o:

U __fork_handlers

U __fork_lock

00000000 T __unregister_atfork

U __fork_generation_pointer

U __register_atfork

U __fork

U __fork

nm: getutmpx.o: no symbols

可见fork的实现还是在c库中,
nptl/sysdeps/unix/sysv/linux/pt-fork.c(NPTL
== Native Posix Thread Library)

#include <unistd.h>

pid_t

__fork(void)

{

return __libc_fork();

}

strong_alias(__fork, fork)

不过这个也不是fork啊,
大家可以注意一下这个宏
, strong_alias(__fork, fork),
它的实现是,

__typeof(__fork) fork
__attribute__((alias("__fork")));

还有个weak
alias,

void fork()__attribute__((weak,
alias("__fork")));

简单google了一下,
貌似这个
strong alias是为了防止c库符号被其他库符号覆盖掉而使用的,
如果
fork()被覆盖了,
还有
__fork()可以用,
不过
weakstrong
alias区别没搞清楚。

好了,
回到主线, 看看
__libc_fork()实现,
./nptl/sysdeps/unix/sysv/linux/fork.c

pid_t

__libc_fork (void)

{

pid_t pid;

/* We need to prevent the getpid()
code to update the PID field so

that, if a signal arrives in the
child very early and the signal

handler uses getpid(), the value
returned is correct. */

pid_t parentpid = THREAD_GETMEM
(THREAD_SELF, pid);

THREAD_SETMEM (THREAD_SELF, pid,
-parentpid);

#ifdef ARCH_FORK

pid = ARCH_FORK ();

#else

# error "ARCH_FORK must be defined
so that the CLONE_SETTID flag is used"

pid = INLINE_SYSCALL (fork, 0);

#endif

if (pid == 0)

{

struct pthread *self =
THREAD_SELF;

if (__fork_generation_pointer !=
NULL)

*__fork_generation_pointer += 4;

/* Adjust the PID field for the
new process. */

THREAD_SETMEM (self, pid,
THREAD_GETMEM (self, tid));

}

else

{

/* Restore the PID value. */

THREAD_SETMEM (THREAD_SELF, pid,
parentpid);

}

return pid;

}

weak_alias (__libc_fork, __fork)

libc_hidden_def (__fork)

weak_alias (__libc_fork, fork)

继续跟入i386
ARCH_FORK, ./nptl/sysdeps/unix/sysv/linux/i386/fork.c,

#include <sched.h>

#include <signal.h>

#include <sysdep.h>

#include <tls.h>

#define ARCH_FORK()

INLINE_SYSCALL (clone, 5,

CLONE_CHILD_SETTID |
CLONE_CHILD_CLEARTID | SIGCHLD, 0,

NULL, NULL, &THREAD_SELF->tid)

#include "../fork.c"

这里的INLINE_SYSCALL实现是,
sysdeps/unix/sysv/linux/i386/sysdep.h

#define INLINE_SYSCALL(name, nr,
args…)

({

unsigned int resultvar =
INTERNAL_SYSCALL (name, , nr, args);

if (__builtin_expect
(INTERNAL_SYSCALL_ERROR_P (resultvar, ), 0))

{

__set_errno (INTERNAL_SYSCALL_ERRNO
(resultvar, ));

resultvar = 0xffffffff;

}

(int) resultvar; })

这里又是INTERNAL_SYSCALL

#ifdef I386_USE_SYSENTER

# ifdef SHARED

# define INTERNAL_SYSCALL(name, err,
nr, args…)

({

register unsigned int
resultvar;

EXTRAVAR_##nr

asm volatile (

LOADARGS_##nr

"movl %1, %%eaxnt"

"call *%%gs:%P2nt"

RESTOREARGS_##nr

: "=a" (resultvar)

: "i" (__NR_##name), "i"
(offsetof (tcbhead_t, sysinfo))

ASMFMT_##nr(args) : "memory",
"cc");

(int) resultvar; })

# else

# define INTERNAL_SYSCALL(name, err,
nr, args…)

({

register unsigned int
resultvar;

EXTRAVAR_##nr

asm volatile (

LOADARGS_##nr

"movl %1, %%eaxnt"

"call *_dl_sysinfont"

RESTOREARGS_##nr

: "=a" (resultvar)

: "i" (__NR_##name)
ASMFMT_##nr(args) : "memory", "cc");

(int) resultvar; })

# endif

#else

# define INTERNAL_SYSCALL(name, err,
nr, args…)

({

register unsigned int
resultvar;

EXTRAVAR_##nr

asm volatile (

LOADARGS_##nr

"movl %1, %%eaxnt"

"int $0x80nt"

RESTOREARGS_##nr

: "=a" (resultvar)

: "i" (__NR_##name)
ASMFMT_##nr(args) : "memory", "cc");

(int) resultvar; })

#endif

这里差不多就明白了,
I386_USE_SYSENTER没有define时,
就使用了经典的
intel系统调用代码int
$0x80了,这里的参数是__NR_clone.

不过I386_USE_SYSENTER是什么意思呢?
call *%%gs:%P2call
*_dl_sysinfo又是什么意思呢? 天色不早了, 下次再说吧

Leave a comment

cool keyboard

 

Leave a comment