Linux_Kernel_map4vehicle/srcxray.py

1488 lines
47 KiB
Python
Raw Permalink Normal View History

2018-07-19 02:46:21 +08:00
#!/usr/bin/python3
#
2018-07-19 04:48:22 +08:00
# srcxray - source code X-ray
2018-07-19 02:46:21 +08:00
#
# Analyzes interconnections between functions and structures in source code.
#
2020-07-12 16:59:45 +08:00
# Uses doxygen, git grep --show-functionm and cscope to
2018-07-19 02:46:21 +08:00
# reveal references between identifiers.
#
2020-07-12 16:59:45 +08:00
# Since 2018, Costa Shulyupin, costa@MakeLinux.net
2018-07-19 02:46:21 +08:00
#
2022-11-04 15:17:04 +08:00
# install system packages: python3-scipy cscope
2018-07-19 02:46:21 +08:00
2020-07-20 02:13:23 +08:00
import inspect
from inspect import (currentframe, getframeinfo, getouterframes, stack,
2020-07-21 02:31:20 +08:00
getmembers, isfunction)
2020-07-20 02:13:23 +08:00
import types
2018-07-19 12:52:33 +08:00
import random
import os
import sys
2020-07-20 02:13:23 +08:00
from sys import *
2018-07-19 12:52:33 +08:00
import collections
2018-08-07 13:59:50 +08:00
from munch import *
from subprocess import *
2018-07-19 12:52:33 +08:00
import re
2018-07-22 05:32:07 +08:00
import networkx as nx
2018-07-31 10:18:58 +08:00
# from networkx.drawing.nx_agraph import read_dot # changes order of successors
2018-10-30 04:09:05 +08:00
# from networkx.drawing.nx_pydot import read_dot # no bad
2018-07-26 13:35:40 +08:00
from networkx.generators.ego import *
2018-08-07 13:59:50 +08:00
from networkx.algorithms.dag import *
2018-07-31 10:18:58 +08:00
from networkx.utils import open_file, make_str
2018-07-26 13:35:40 +08:00
from pprint import pprint
import difflib
2018-07-29 04:02:03 +08:00
import glob
from pathlib import *
2018-08-27 14:08:45 +08:00
import pygraphviz # python3-pygraphviz
2020-07-28 04:40:40 +08:00
import graphviz
2018-08-08 14:08:05 +08:00
import unittest
import types
2019-11-06 17:25:59 +08:00
from xml.dom.minidom import parse
import xml.dom.minidom
import ast
2020-08-09 17:51:23 +08:00
import xml.etree.ElementTree as ET
2018-07-29 04:02:03 +08:00
2018-07-29 14:22:36 +08:00
default_root = 'starts'
2021-05-09 20:37:33 +08:00
stop = list()
2020-12-26 21:21:20 +08:00
ignore = list()
2021-05-09 20:32:31 +08:00
ignored = set()
2020-12-26 21:08:50 +08:00
show_ignored = False
2020-10-07 22:51:24 +08:00
level_limit = 6
2020-12-26 21:04:01 +08:00
lines = 0
2021-05-09 20:49:37 +08:00
lines_limit = int(os.popen('stty size', 'r').read().split()[0]) or 30
2018-10-30 04:09:05 +08:00
cflow_structs = False
2018-08-07 14:13:26 +08:00
scaled = False
verbose = False
2018-08-07 14:13:26 +08:00
files = collections.defaultdict(list)
2018-07-19 04:48:22 +08:00
2020-07-21 02:31:20 +08:00
2018-07-29 21:01:48 +08:00
def print_limited(a, out=None):
2020-07-21 03:31:40 +08:00
# exits when reaches limit of printed lines
2018-07-29 21:01:48 +08:00
out = out if out else sys.stdout
2020-12-26 21:04:01 +08:00
global lines
lines += 1
if lines > lines_limit + 1:
2020-12-26 21:28:53 +08:00
out.write(str(a) + '\n')
out.write('\t\n')
2018-07-19 02:46:21 +08:00
sys.exit(1)
2020-12-26 21:04:01 +08:00
# raise(Exception('Reached lines limit'))
2020-10-07 22:52:00 +08:00
out.write(str(a) + '\n')
2018-07-19 02:46:21 +08:00
2022-11-04 12:36:44 +08:00
2020-12-26 21:04:01 +08:00
def print_limited2(a, out=None):
# exits when reaches limit of printed lines
out = out if out else sys.stdout
global lines
lines += 1
if lines > lines_limit + 1:
global level_limit
level_limit = 2
out.write(str(a) + '\n')
2018-07-19 04:48:22 +08:00
2022-11-04 12:36:44 +08:00
2018-07-19 02:46:21 +08:00
def log(*args, **kwargs):
2020-07-21 03:31:40 +08:00
# log with context function
2020-06-24 13:07:44 +08:00
if not verbose:
return
2018-07-26 13:36:01 +08:00
s = str(*args).rstrip()
2018-08-27 13:59:08 +08:00
frameinfo = getframeinfo(currentframe().f_back)
print("%s:%d %s" % (frameinfo.filename, frameinfo.lineno, stack()[1][3]),
2018-07-26 13:36:01 +08:00
s, file=sys.stderr, **kwargs)
return s
2018-07-19 02:46:21 +08:00
2018-07-19 12:52:33 +08:00
2018-07-19 02:46:21 +08:00
def popen(p):
2020-07-21 03:31:40 +08:00
# shortcut for reading output of subcommand
2020-06-24 13:07:44 +08:00
log(p)
return check_output(p, shell=True).decode('utf-8').splitlines()
2018-07-19 02:46:21 +08:00
2018-07-19 12:52:33 +08:00
2020-07-12 16:59:45 +08:00
def extract_referrer(line):
2020-07-21 03:31:40 +08:00
# Extract referrer function from oupput of
# git grep --show-function.
# With quirks for linux kernel
2018-07-19 02:46:21 +08:00
line = re.sub(r'__ro_after_init', '', line)
line = re.sub(r'FNAME\((\w+)\)', r'\1', line)
line = re.sub(r'.*TRACE_EVENT.*', '', line)
file_num = r'(^[^\s]+)=(\d+)=[^,]*'
# file=(*name)
m = re.match(file_num + r'\(\*(\b\w+)\)\s*[\(\[=][^;]*$', line)
if not m:
m = re.match(file_num + r'(\b\w+)\s*[\(\[=][^;]*$', line)
2018-07-19 02:46:21 +08:00
if not m:
m = re.match(file_num + r'struct (\b\w+)', line)
2018-07-19 02:46:21 +08:00
if m:
return m.groups()
2018-07-19 02:46:21 +08:00
2018-07-19 12:52:33 +08:00
2020-07-12 16:59:45 +08:00
def extract_referrer_test():
2020-07-21 03:31:40 +08:00
# unittest of extract_referrer
passed = 0
2018-07-19 02:46:21 +08:00
for a in {
"f=1=good2()",
"f=2=static int fastop(struct x86_emulate_ctxt *ctxt, "
2018-07-19 13:29:29 +08:00
+ "void (*fop)(struct fastop *))",
"f=3=int good(a, bad (*func)(arg))",
"f=4=EXPORT_SYMBOL_GPL(bad);",
"f=5=bad (*good)()",
"f=6=int FNAME(good)(a)",
"f=7=TRACE_EVENT(bad)",
"f:8: a=in bad()",
"f=9=struct good",
2020-07-21 02:31:20 +08:00
}:
2020-07-12 16:59:45 +08:00
r = extract_referrer(a)
#print(a, '->', r)
if 'bad' in a and r and 'bad' in r[2]:
print("ERROR: ", a, '->', r)
elif 'good' in a and not r:
print("ERROR:", a)
else:
passed += 1
log(passed)
2018-07-19 02:46:21 +08:00
2018-07-19 12:52:33 +08:00
2020-07-12 16:59:45 +08:00
def func_referrers_git_grep(name):
2020-07-21 03:31:40 +08:00
# Subfunction for searching referrers with
# git grep --show-function.
# Works slowly.
# Obsoleted by doxygen_xml.
2018-08-08 14:03:47 +08:00
res = list()
2018-07-19 02:46:21 +08:00
r = None
for line in popen(r'git grep --threads 1 --no-index --word-regexp '
r'--show-function --line-number '
2018-07-19 17:54:54 +08:00
r'"^\s.*\b%s" '
2018-08-07 10:53:42 +08:00
r'**.\[hc\] **.cpp **.cc **.hh || true' % (name)):
2018-07-19 13:29:29 +08:00
# Filter out names in comment afer function,
# when comment start from ' *'
2018-07-19 02:46:21 +08:00
# To see the problem try "git grep -p and"
2018-07-19 13:34:54 +08:00
for p in {
# exludes:
2018-07-19 17:54:54 +08:00
r'.*:\s+\* .*%s',
2018-07-19 13:34:54 +08:00
r'.*/\*.*%s',
r'.*//.*%s',
r'.*".*\b%s\b.*"'}:
if re.match(p % (name), line):
r = None
break
2020-12-26 21:21:20 +08:00
if r and r[2] != name and r[2] not in ignore:
2018-08-08 14:03:47 +08:00
res.append(r)
2018-07-19 02:46:21 +08:00
r = None
2020-07-12 16:59:45 +08:00
r = extract_referrer(line)
2020-06-24 13:08:59 +08:00
# r is list of file line func
if verbose and r:
2020-07-21 02:31:20 +08:00
print("%-40s\t%s" % (("%s:%s" % (r[0], r[1])), r[2]))
2018-07-19 02:46:21 +08:00
return res
2018-07-19 12:52:33 +08:00
2018-07-19 02:46:21 +08:00
cscope_warned = False
2018-07-19 12:52:33 +08:00
2020-07-12 16:59:45 +08:00
def func_referrers_cscope(name):
2020-07-21 03:31:40 +08:00
# Subfunction for searching referrers with cscope.
# Works fast.
2020-07-28 04:41:01 +08:00
# Prefer to use doxygen_xml.
2018-07-19 02:46:21 +08:00
global cscope_warned
if not os.path.isfile('cscope.out'):
if not cscope_warned:
2018-08-27 15:10:04 +08:00
print("Recommended: cscope -Rcbk", file=sys.stderr)
2018-07-19 02:46:21 +08:00
cscope_warned = True
return []
2020-06-24 13:08:59 +08:00
res = list()
r = None
for l in popen(r'cscope -d -L3 "%s"' % (name)):
log(l)
m = re.match(r'([^ ]*) ([^ ]*) ([^ ]*) (.*)', l)
file, func, line_num, line_str = m.groups()
2020-12-26 21:21:20 +08:00
if func in ignore:
2020-07-21 02:31:20 +08:00
continue
2020-06-24 13:08:59 +08:00
res.append([file, line_num, func])
if not res and len(name) > 3:
log(name)
2020-07-12 16:59:45 +08:00
res = func_referrers_git_grep(name)
2020-06-24 13:07:44 +08:00
log(res)
2018-07-19 02:46:21 +08:00
return res
2018-07-19 12:52:33 +08:00
2020-07-12 16:59:45 +08:00
def referrers_tree(name, referrer=None, printed=None, level=0):
2020-07-20 02:13:23 +08:00
'''
2020-07-22 00:29:21 +08:00
prints text referrers outline.
2020-07-20 02:13:23 +08:00
Ex: nfs_root_data
2020-07-21 03:31:40 +08:00
Obsoleted by doxygen_xml.
2020-07-20 02:13:23 +08:00
'''
if not referrer:
2018-07-19 02:46:21 +08:00
if os.path.isfile('cscope.out'):
2020-07-12 16:59:45 +08:00
referrer = func_referrers_cscope
2018-07-19 02:46:21 +08:00
else:
2018-10-30 04:09:05 +08:00
print("Using git grep only, recommended to run: cscope -Rcbk",
2020-07-21 02:31:20 +08:00
file=sys.stderr)
2020-07-12 16:59:45 +08:00
referrer = func_referrers_git_grep
if isinstance(referrer, str):
referrer = eval(referrer)
2018-07-19 12:52:33 +08:00
if not printed:
printed = set()
2020-06-24 13:09:25 +08:00
# definition
# cscope -d -L1 "arv_camera_new"
2020-10-07 22:52:00 +08:00
if level > level_limit - 2:
2020-12-26 21:28:53 +08:00
print_limited(level*'\t' + name + '')
2020-10-07 22:52:00 +08:00
return ''
2018-07-19 02:46:21 +08:00
if name in printed:
print_limited(level*'\t' + name + ' ^')
return
printed.add(name)
2020-10-07 22:52:00 +08:00
print_limited(level*'\t' + name)
2020-07-12 16:59:45 +08:00
for a in referrer(name):
2020-06-24 13:09:25 +08:00
name = a[2]
2020-07-12 16:59:45 +08:00
referrers_tree(name, referrer, printed, level + 1)
2018-07-19 02:46:21 +08:00
2020-07-21 02:31:20 +08:00
2020-07-12 16:59:45 +08:00
def referrers(name):
2020-07-20 02:14:04 +08:00
'''
2020-07-22 00:29:21 +08:00
simply greps referrers of a symbol
2020-07-20 02:14:04 +08:00
Ex: nfs_root_data
2020-07-21 03:31:40 +08:00
Prefer to use doxygen_xml.
2020-07-20 02:14:04 +08:00
'''
2020-07-12 16:59:45 +08:00
print(' '.join([a[2] for a in func_referrers_git_grep(name)]))
2018-07-19 12:52:33 +08:00
2020-07-12 16:59:45 +08:00
def referrers_dep(name, referrer=None, printed=None, level=0):
2020-07-22 00:29:21 +08:00
# prints referrers tree in compact format of
2020-07-21 03:31:40 +08:00
# dependency of make
# Obsoleted by doxygen_xml.
2020-07-12 16:59:45 +08:00
if not referrer:
2018-07-19 19:03:30 +08:00
if os.path.isfile('cscope.out'):
2020-07-12 16:59:45 +08:00
referrer = func_referrers_cscope
2018-07-19 19:03:30 +08:00
else:
2018-10-30 04:09:05 +08:00
print("Using git grep only, recommended to run: cscope -Rcbk",
2018-07-19 19:03:30 +08:00
file=sys.stderr)
2020-07-12 16:59:45 +08:00
referrer = func_referrers_git_grep
if isinstance(referrer, str):
referrer = eval(referrer)
2018-07-19 19:03:30 +08:00
if not printed:
printed = set()
if name in printed:
return
if level > level_limit - 2:
return ''
2020-07-20 02:14:04 +08:00
referrers = [a[2] for a in referrer(name)]
2020-07-12 16:59:45 +08:00
if referrers:
2018-07-19 19:03:30 +08:00
printed.add(name)
2020-07-12 16:59:45 +08:00
print("%s:" % (name), ' '.join(referrers))
for a in referrers:
referrers_dep(a, referrer, printed, level + 1)
2018-07-19 19:03:30 +08:00
else:
pass
# TODO: print terminal
2020-12-26 21:28:53 +08:00
# print('⋮')
2018-07-19 19:03:30 +08:00
2018-07-19 12:52:33 +08:00
def call_tree(node, printed=None, level=0):
2020-07-20 02:13:23 +08:00
'''
2020-07-22 00:29:21 +08:00
prints call tree of a function
2020-07-20 02:13:23 +08:00
Ex: start_kernel
2020-07-21 03:31:40 +08:00
Obsoleted by doxygen_xml.
2020-07-20 02:13:23 +08:00
'''
2018-07-19 02:46:21 +08:00
if not os.path.isfile('cscope.out'):
2018-10-30 04:09:05 +08:00
print("Please run: cscope -Rcbk", file=sys.stderr)
2018-07-19 12:52:33 +08:00
return False
2018-07-19 13:29:29 +08:00
if printed is None:
2018-07-19 12:52:33 +08:00
printed = set()
2018-07-19 02:46:21 +08:00
if node in printed:
2020-12-26 21:04:01 +08:00
print_limited2(level*'\t' + node + ' ^')
2018-07-19 02:46:21 +08:00
return
2020-12-26 21:04:01 +08:00
elif level > level_limit - 2:
2020-12-26 21:28:53 +08:00
print_limited2(level*'\t' + node + '')
2020-12-26 21:04:01 +08:00
return ''
2018-07-19 02:46:21 +08:00
else:
2020-12-26 21:04:01 +08:00
print_limited2(level*'\t' + node)
2018-07-19 02:46:21 +08:00
printed.add(node)
local_printed = set()
2018-07-19 12:52:33 +08:00
for line in popen('cscope -d -L2 "%s"' % (node)):
2018-07-19 13:29:29 +08:00
a = line.split()[1]
2020-12-26 21:08:50 +08:00
if a in local_printed:
continue
2021-05-09 20:37:33 +08:00
if a in stop:
print_limited2((level + 1)*'\t' + a +
2022-11-04 12:36:44 +08:00
(' ^' if a in local_printed else ''))
2021-05-09 20:37:33 +08:00
local_printed.add(a)
continue
2020-12-26 21:21:20 +08:00
if a in ignore:
2021-05-09 20:32:31 +08:00
ignored.add(a)
2020-12-26 21:08:50 +08:00
if show_ignored:
print_limited2((level + 1)*'\t' + '\033[2;30m' + a +
2022-11-04 12:36:44 +08:00
(' ^' if a in local_printed else '') +
'\033[0m')
2020-12-26 21:08:50 +08:00
local_printed.add(a)
2018-07-19 12:52:33 +08:00
continue
2018-07-19 13:29:29 +08:00
local_printed.add(a)
2018-07-19 16:41:45 +08:00
# try:
2020-12-26 21:08:50 +08:00
call_tree(a, printed, level + 1)
2018-07-19 16:41:45 +08:00
# except Exception:
# pass
2018-07-19 02:46:21 +08:00
2018-07-19 12:52:33 +08:00
2018-07-19 16:45:49 +08:00
def call_dep(node, printed=None, level=0):
2020-07-21 03:31:40 +08:00
# prints call tree in compact format of dependency of make
# Obsoleted by doxygen_xml.
2018-07-19 16:45:49 +08:00
if not os.path.isfile('cscope.out'):
2018-10-30 04:09:05 +08:00
print("Please run: cscope -Rcbk", file=sys.stderr)
2018-07-19 16:45:49 +08:00
return False
if printed is None:
printed = set()
if node in printed:
return
2018-08-08 14:03:47 +08:00
calls = list()
2018-07-19 16:45:49 +08:00
for a in [line.split()[1] for line in
popen('cscope -d -L2 "%s"' % (node))]:
2020-12-26 21:21:20 +08:00
if a in ignore:
2018-07-19 16:45:49 +08:00
continue
2018-08-08 14:03:47 +08:00
calls.append(a)
2018-07-19 16:45:49 +08:00
if calls:
if level < level_limit - 1:
printed.add(node)
2018-08-08 14:03:47 +08:00
print("%s:" % (node), ' '.join(list(dict.fromkeys(calls))))
for a in list(dict.fromkeys(calls)):
2018-07-19 16:45:49 +08:00
call_dep(a, printed, level + 1)
else:
pass
# TODO: print terminal
2020-12-26 21:28:53 +08:00
# print('⋮')
2018-07-19 16:45:49 +08:00
2018-07-22 05:32:07 +08:00
def my_graph(name=None):
2020-07-21 03:31:40 +08:00
# common subfunction
2018-07-22 05:32:07 +08:00
g = nx.DiGraph(name=name)
2018-07-26 13:36:01 +08:00
# g.graph.update({'node': {'shape': 'none', 'fontsize': 50}})
# g.graph.update({'rankdir': 'LR', 'nodesep': 0, })
2018-07-22 05:32:07 +08:00
return g
2020-07-22 00:29:21 +08:00
def reduce_graph(g, min_in_degree=None):
2020-07-20 02:13:23 +08:00
'''
2020-07-22 00:29:21 +08:00
removes leaves
2020-07-20 02:13:23 +08:00
Ex2: \"write_dot(reduce_graph(read_dot('doxygen.dot')),'reduced.dot')\"
'''
2018-07-22 22:16:40 +08:00
rm = set()
2020-07-22 00:29:21 +08:00
min_in_degree = g.number_of_nodes() + 1 if not min_in_degree else min_in_degree
2018-08-27 13:59:08 +08:00
log(g.number_of_edges())
2020-07-22 00:29:21 +08:00
rm = [n for (n, d) in g.out_degree if not d and g.in_degree(n)
< min_in_degree]
2018-07-22 22:16:40 +08:00
g.remove_nodes_from(rm)
2018-08-07 13:04:46 +08:00
print(g.number_of_edges())
2018-07-22 22:16:40 +08:00
return g
2020-07-21 03:31:40 +08:00
def includes(sym):
# subfunction, used in syscalls
# extracts include files of a symbol
2018-07-26 13:28:37 +08:00
res = []
# log(a)
for a in popen('man -s 2 %s 2> /dev/null |'
2018-08-07 10:53:42 +08:00
' head -n 20 | grep include || true' % (a)):
2018-07-26 13:28:37 +08:00
m = re.match('.*<(.*)>', a)
if m:
res.append(m.group(1))
if not res:
for a in popen('grep -l -r " %s *(" '
'/usr/include --include "*.h" '
'2> /dev/null || true' % (a)):
# log(a)
a = re.sub(r'.*/(bits)', r'\1', a)
a = re.sub(r'.*/(sys)', r'\1', a)
a = re.sub(r'/usr/include/(.*)', r'\1', a)
# log(a)
res.append(a)
res = set(res)
if res and len(res) > 1:
r = set()
for f in res:
2020-07-21 03:31:40 +08:00
# log('grep " %s \+\(" --include "%s" -r /usr/include/'%(sym, f))
2018-07-26 13:28:37 +08:00
# log(os.system(
2020-07-21 03:31:40 +08:00
# 'grep -w "%s" --include "%s" -r /usr/include/'%(sym, f)))
2018-07-26 13:28:37 +08:00
if 0 != os.system(
'grep " %s *(" --include "%s" -r /usr/include/ -q'
2020-07-21 03:31:40 +08:00
% (sym, os.path.basename(f))):
2018-07-26 13:28:37 +08:00
r.add(f)
res = res.difference(r)
log(res)
return ','.join(list(res)) if res else 'unexported'
2018-07-22 05:32:07 +08:00
def syscalls():
2020-07-21 03:31:40 +08:00
# Experimental function for exporting syscalls info
# from various sources.
# Used in creation of
# https://en.wikibooks.org/wiki/The_Linux_Kernel/Syscalls
# Ex: srcxray.py "write_dot(syscalls(), 'syscalls.dot')"
2018-07-22 05:32:07 +08:00
sc = my_graph('syscalls')
2018-07-26 13:28:37 +08:00
inc = 'includes.list'
if not os.path.isfile(inc):
os.system('ctags --langmap=c:+.h --c-kinds=+pex -I __THROW '
+ ' -R -u -f- /usr/include/ | cut -f1,2 > '
+ inc)
'''
if False:
includes = {}
with open(inc, 'r') as f:
for s in f:
includes[s.split()[0]] = s.split()[1]
log(includes)
'''
2018-07-22 05:32:07 +08:00
scd = 'SYSCALL_DEFINE.list'
if not os.path.isfile(scd):
os.system("grep SYSCALL_DEFINE -r --include='*.c' > " + scd)
with open(scd, 'r') as f:
2019-11-06 02:14:54 +08:00
v = set(['sigsuspend', 'llseek', 'sysfs',
'sync_file_range2', 'ustat', 'bdflush'])
2018-07-22 05:32:07 +08:00
for s in f:
2018-07-26 13:31:32 +08:00
if any(x in s.lower() for x in ['compat', 'stub']):
continue
2018-07-22 05:32:07 +08:00
m = re.match(r'(.*?):.*SYSCALL.*\(([\w]+)', s)
if m:
for p in {
'^old',
'^xnew',
r'.*64',
r'.*32$',
r'.*16$',
2019-11-06 02:14:54 +08:00
}:
2018-07-22 05:32:07 +08:00
if re.match(p, m.group(2)):
m = None
break
if m:
syscall = m.group(2)
syscall = re.sub('^new', '', syscall)
path = m.group(1).split('/')
2018-07-26 13:31:32 +08:00
if (m.group(1).startswith('mm/nommu.c')
or m.group(1).startswith('arch/x86/ia32')
or m.group(1).startswith('arch/')
or syscall.startswith('vm86')
2018-07-22 05:32:07 +08:00
and not m.group(1).startswith('arch/x86')):
continue
2018-07-26 13:31:32 +08:00
if syscall in v:
continue
v.add(syscall)
2018-07-22 05:32:07 +08:00
p2 = '/'.join(path[1:])
2018-07-26 13:31:32 +08:00
p2 = m.group(1)
2018-07-29 21:02:21 +08:00
# if log(difflib.get_close_matches(syscall, v) or ''):
2018-07-26 13:31:32 +08:00
# log(syscall)
# log(syscall + ' ' + (includes.get(syscall) or '------'))
# man -s 2 timerfd_settime | head -n 20
2018-08-07 10:53:42 +08:00
if False:
i = includes(syscall)
log(p2 + ' ' + str(i) + ' ' + syscall)
sc.add_edge(i, i+' - '+p2)
sc.add_edge(i+' - '+p2, 'sys_' + syscall)
else:
sc.add_edge(path[0] + '/', p2)
sc.add_edge(p2, 'sys_' + syscall)
2018-07-22 05:32:07 +08:00
return sc
2018-07-29 04:02:03 +08:00
def cleanup(a):
2020-07-21 03:31:40 +08:00
# cleanups graph file
# wrapper for remove_nodes_from
2018-08-27 13:59:08 +08:00
log('')
2018-08-07 13:55:42 +08:00
g = to_dg(a)
2018-07-29 04:02:03 +08:00
print(dg.number_of_edges())
2020-12-26 21:21:20 +08:00
dg.remove_nodes_from(ignore)
2018-07-29 04:02:03 +08:00
print(dg.number_of_edges())
write_dot(dg, a)
2018-08-07 13:05:52 +08:00
def sort_dict(d):
return [a for a, b in sorted(d.items(), key=lambda k: k[1], reverse=True)]
2020-07-21 03:31:40 +08:00
def starts(dg): # roots of trees in a graph
2018-07-26 13:34:40 +08:00
return {n: dg.out_degree(n) for (n, d) in dg.in_degree if not d}
2020-07-21 02:31:20 +08:00
2020-07-22 00:29:42 +08:00
def exclude(i, excludes_re=[]):
2020-12-26 21:21:20 +08:00
if i in ignore:
return True
2020-07-22 00:29:42 +08:00
for e in excludes_re:
if re.match(e, i):
return True
2020-07-21 02:31:20 +08:00
2020-07-22 00:29:42 +08:00
def digraph_predecessors(dg, starts, levels=100, excludes_re=[]):
'''
extracts referrers subgraph
'''
dg = to_dg(dg)
passed = set()
# for i in [_ for _ in dg.predecessors(start)]:
p = nx.DiGraph()
2020-07-22 00:29:42 +08:00
for e in excludes_re:
log(e)
while levels:
2020-07-21 02:31:20 +08:00
# log(levels)
# log(starts)
s2 = starts
starts = set()
for s in s2:
for i in dg.predecessors(s):
2020-07-22 00:29:42 +08:00
if i in passed or exclude(i, excludes_re):
continue
passed.add(i)
starts.add(i)
p.add_edge(i, s)
2020-07-21 02:31:20 +08:00
levels -= 1
return p
2018-07-26 13:34:40 +08:00
2020-07-22 00:29:21 +08:00
def digraph_tree(dg, starts=None):
2020-07-20 02:13:23 +08:00
'''
2020-07-22 00:29:21 +08:00
extract a subgraph from a graph
2020-07-20 02:13:23 +08:00
Ex2: \"write_dot(digraph_tree(read_dot('doxygen.dot'), ['main']), 'main.dot')\"
'''
2018-07-29 05:26:33 +08:00
tree = nx.DiGraph()
2018-07-29 21:03:45 +08:00
2018-07-29 05:26:33 +08:00
def sub(node):
tree.add_node(node)
for o in dg.successors(node):
2020-12-26 21:21:20 +08:00
if o in ignore or tree.has_edge(node, o) or o in starts:
2018-07-29 21:03:45 +08:00
# print(o)
2018-07-29 05:26:33 +08:00
continue
2018-07-29 21:03:45 +08:00
tree.add_edge(node, o)
2018-07-29 05:26:33 +08:00
sub(o)
printed = set()
if not starts:
starts = {}
for i in [n for (n, d) in dg.in_degree if not d]:
starts[i] = dg.out_degree(i)
2019-11-06 02:14:54 +08:00
starts = [a[0] for a in sorted(
starts.items(), key=lambda k: k[1], reverse=True)]
2018-07-29 14:06:36 +08:00
if len(starts) == 1:
sub(starts[0])
elif len(starts) > 1:
for o in starts:
2020-12-26 21:21:20 +08:00
if o in ignore:
2018-07-29 14:06:36 +08:00
continue
sub(o)
2018-07-29 05:26:33 +08:00
return tree
2018-07-29 21:01:48 +08:00
def digraph_print(dg, starts=None, dst_fn=None, sort=False):
2020-07-20 02:13:23 +08:00
'''
2020-07-22 00:29:21 +08:00
prints graph as text tree
2020-07-21 03:31:40 +08:00
Ex2: \"digraph_print(read_dot('a.dot'))\"
2020-07-20 02:13:23 +08:00
'''
2018-07-29 21:01:48 +08:00
dst = open(dst_fn, 'w') if dst_fn else None
2018-09-16 20:40:15 +08:00
printed = set()
2018-07-29 21:01:48 +08:00
2018-09-16 20:40:15 +08:00
def digraph_print_sub(path='', node=None, level=0):
2020-12-26 21:21:20 +08:00
if node in ignore:
2018-07-31 10:18:58 +08:00
return
2018-07-22 05:32:07 +08:00
if node in printed:
2020-12-26 21:04:01 +08:00
print_limited2(level*'\t' + str(node) + ' ^', dst)
2018-07-22 05:32:07 +08:00
return
2018-07-31 10:18:58 +08:00
outs = {_: dg.out_degree(_) for _ in dg.successors(node)}
if sort:
2019-11-06 02:14:54 +08:00
outs = {a: b for a, b in sorted(
outs.items(), key=lambda k: k[1], reverse=True)}
2018-07-31 10:18:58 +08:00
s = ''
2018-08-07 14:13:26 +08:00
if 'rank' in dg.nodes[node]:
s = str(dg.nodes[node]['rank'])
ranks[dg.nodes[node]['rank']].append(node)
2018-07-31 10:18:58 +08:00
if outs:
2020-12-26 21:28:53 +08:00
s += '' if level > level_limit - 2 else ''
else:
2020-07-21 02:31:20 +08:00
s += ' @' + path
2020-12-26 21:04:01 +08:00
print_limited2(level*'\t' + str(node) + s, dst)
2018-07-22 05:32:07 +08:00
printed.add(node)
if level > level_limit - 2:
return ''
passed = set()
2018-07-26 13:32:47 +08:00
for o in outs.keys():
2018-07-31 10:18:58 +08:00
if o in passed:
2018-07-22 05:32:07 +08:00
continue
passed.add(o)
2018-09-16 20:40:15 +08:00
digraph_print_sub(path + ' ' + str(node), o, level + 1)
2018-07-22 05:32:07 +08:00
2018-07-26 13:32:47 +08:00
if not starts:
starts = {}
for i in [n for (n, d) in dg.in_degree if not d]:
starts[i] = dg.out_degree(i)
2019-11-06 02:14:54 +08:00
starts = [a[0] for a in sorted(
starts.items(), key=lambda k: k[1], reverse=True)]
2018-07-26 13:32:47 +08:00
if len(starts) > 1:
2020-12-26 21:04:01 +08:00
print_limited2(default_root, dst)
2018-07-26 13:32:47 +08:00
for s in starts:
2020-12-26 21:04:01 +08:00
print_limited2('\t' + s + ' ->', dst)
2018-07-22 05:32:07 +08:00
passed = set()
2018-07-26 13:32:47 +08:00
for o in starts:
2018-07-31 10:18:58 +08:00
if o in passed:
2018-07-22 05:32:07 +08:00
continue
passed.add(o)
2018-07-29 21:01:48 +08:00
if o in dg:
2018-09-16 20:40:15 +08:00
digraph_print_sub('', o)
2018-09-09 23:39:53 +08:00
# not yet printed rest:
2022-11-04 14:31:52 +08:00
if lines < lines_limit:
for o in dg.nodes():
if o not in printed:
digraph_print_sub('', o)
2018-07-29 21:01:48 +08:00
if dst_fn:
print(dst_fn)
dst.close()
2018-07-22 05:32:07 +08:00
2018-07-22 21:33:13 +08:00
def cflow_preprocess(a):
2020-12-26 21:06:55 +08:00
# prepare Linux source for better cflow parsing results
2018-08-27 14:41:40 +08:00
with open(a, 'rb') as f:
2018-07-22 21:33:13 +08:00
for s in f:
2018-08-27 14:41:40 +08:00
try:
s = s.decode('utf-8')
except UnicodeDecodeError:
s = s.decode('latin1')
2018-10-30 04:09:05 +08:00
if cflow_structs:
# treat structs like functions
s = re.sub(r"^static struct (\w+) = ", r"\1()", s)
s = re.sub(r"^static struct (\w+)\[\] = ", r"\1()", s)
s = re.sub(r"^static const struct (\w+)\[\] = ", r"\1()", s)
s = re.sub(r"^struct (.*) =", r"\1()", s)
2018-07-22 21:33:13 +08:00
s = re.sub(r"^static __initdata int \(\*actions\[\]\)\(void\) = ",
2018-10-30 04:09:05 +08:00
"int actions()", s) # init/initramfs.c
2018-07-22 21:33:13 +08:00
s = re.sub(r"^static ", "", s)
2019-11-06 02:14:54 +08:00
s = re.sub(r"SENSOR_DEVICE_ATTR.*\((\w*),",
r"void sensor_dev_attr_\1()(", s)
2018-07-22 21:33:13 +08:00
s = re.sub(r"COMPAT_SYSCALL_DEFINE[0-9]\((\w*),",
r"compat_sys_\1(", s)
2018-08-27 15:10:04 +08:00
s = re.sub(r"SYSCALL_DEFINE[0-9]\((\w*)", r"sys_\1(", s)
2018-07-22 21:33:13 +08:00
s = re.sub(r"__setup\(.*,(.*)\)", r"void __setup() {\1();}", s)
2018-07-29 04:03:44 +08:00
s = re.sub(r"^(\w*)param\(.*,(.*)\)", r"void \1param() {\2();}", s)
2018-07-31 10:18:58 +08:00
s = re.sub(r"^(\w*)initcall\((.*)\)",
2018-07-29 04:03:44 +08:00
r"void \1initcall() {\2();}", s)
2018-07-22 21:33:13 +08:00
s = re.sub(r"^static ", "", s)
s = re.sub(r"^inline ", "", s)
s = re.sub(r"^const ", "", s)
2018-07-31 10:18:58 +08:00
s = re.sub(r"\b__initdata\b", "", s)
2018-08-27 15:10:04 +08:00
s = re.sub(r"DEFINE_PER_CPU\((.*),(.*)\)", r"\1 \2", s)
2019-11-06 17:15:48 +08:00
s = re.sub(r"^(\w+) {$", r"void \1() {", s)
2018-07-22 21:33:13 +08:00
# for line in sys.stdin:
sys.stdout.write(s)
2018-07-29 04:03:44 +08:00
# export CPATH=:include:arch/x86/include:../build/include/:../build/arch/x86/include/generated/:include/uapi
# srcxray.py "'\n'.join(cflow('init/main.c'))"
2018-08-27 15:10:04 +08:00
def cflow(a=None):
2020-07-23 00:26:28 +08:00
'''
configure and use cflow on Linux sources
'''
cflow_param = {
2022-11-04 12:36:44 +08:00
"modifier": "__init __inline__ noinline __initdata __randomize_layout asmlinkage __maybe_unused"
" __visible __init __leaf__ __ref __latent_entropy __init_or_module libmosq_EXPORT",
"wrapper": "__attribute__ __section__ "
"TRACE_EVENT MODULE_AUTHOR MODULE_DESCRIPTION MODULE_LICENSE MODULE_LICENSE MODULE_SOFTDEP "
"INIT_THREAD_INFO "
"BUG READ_ONCE EEXIST MAJOR "
"VM_FAULT_ERROR VM_FAULT_MAJOR VM_FAULT_RETRY VM_PFNMAP VM_READ VM_WRITE "
"FAULT_FLAG_ALLOW_RETRY FAULT_FLAG_KILLABLE "
"VM_BUG_ON_VMA FOLL_TOUCH FOLL_POPULATE FOLL_MLOCK VM_LOCKONFAULT VM_SHARED FOLL_WRITE "
"FOLL_PIN FOLL_NUMA FOLL_GET FOLL_FORCE FOLL_LONGTERM FOLL_FAST_ONLY"
"TASK_SIZE "
"fallthrough EHWPOISON "
"__assume_kmalloc_alignment __malloc "
"__acquires __releases __ATTR",
"type":
2020-12-26 21:06:55 +08:00
"pgd_t p4d_t pud_t pmd_t pte_t vm_flags_t"
2020-07-23 00:26:28 +08:00
# "wrapper": "__setup early_param"
2022-11-04 12:36:44 +08:00
}
2020-07-23 00:26:28 +08:00
2018-08-07 14:03:32 +08:00
if os.path.isfile('include/linux/cache.h'):
for m in popen("ctags -x --c-kinds=d include/linux/cache.h | cut -d' ' -f 1 | sort -u"):
if m in cflow_param['modifier']:
print(m)
else:
cflow_param['modifier'] += ' ' + a
2018-07-26 13:36:48 +08:00
if not a:
2019-11-06 02:14:54 +08:00
a = "$(cat cscope.files)" if os.path.isfile(
'cscope.files') else "*.c *.h *.cpp *.hh "
2018-07-26 13:36:48 +08:00
elif isinstance(a, list):
pass
elif os.path.isdir(a):
2018-08-27 15:10:04 +08:00
a = "$(find {0} -name '*.[ch]' -o -name '*.cpp' -o -name '*.hh')".format(a)
2018-07-26 13:36:48 +08:00
pass
elif os.path.isfile(a):
pass
2018-07-29 04:03:44 +08:00
# "--depth=%d " %(level_limit+1) +
# --debug=1
2019-11-06 17:16:41 +08:00
cflow = (r"cflow -m _ignore_main_get_all_ -v "
2018-07-29 04:03:44 +08:00
# + "-DCONFIG_KALLSYMSZ "
+ "--preprocess='srcxray.py cflow_preprocess' "
+ ''.join([''.join(["--symbol={0}:{1} ".format(w, p)
2019-11-06 02:14:54 +08:00
for w in cflow_param[p].split()])
for p in cflow_param.keys()])
2018-07-29 04:03:44 +08:00
+ " --include=_sxt --brief --level-indent='0=\t' "
+ a)
2019-11-06 17:16:41 +08:00
log(cflow)
2018-07-29 04:03:44 +08:00
return popen(cflow)
2018-07-31 10:18:58 +08:00
def import_cflow(a=None, cflow_out=None):
2020-07-23 00:26:28 +08:00
'''
extract graph with cflow from Linux sources
'''
2018-07-26 13:36:48 +08:00
cf = my_graph()
2018-07-22 21:33:13 +08:00
stack = list()
nprev = -1
2018-07-31 10:18:58 +08:00
cflow_out = open(cflow_out, 'w') if cflow_out else None
2018-07-29 04:03:44 +08:00
for line in cflow(a):
2018-07-31 10:18:58 +08:00
if cflow_out:
cflow_out.write(line + '\n')
2018-07-22 21:33:13 +08:00
# --print-level
m = re.match(r'^([\t]*)([^(^ ^<]+)', str(line))
if m:
n = len(m.group(1))
id = str(m.group(2))
else:
raise Exception(line)
if n <= nprev:
stack = stack[:n - nprev - 1]
# print(n, id, stack)
2020-12-26 21:21:20 +08:00
if id not in ignore:
2018-07-29 04:02:03 +08:00
if len(stack):
cf.add_edge(stack[-1], id)
2018-07-22 21:33:13 +08:00
stack.append(id)
nprev = n
return cf
2020-07-22 00:29:21 +08:00
def import_outline(outline_txt=None):
2020-07-20 02:13:23 +08:00
'''
2020-07-22 00:29:21 +08:00
converts outline to graph
2020-07-20 02:13:23 +08:00
Ex2: \"write_dot(import_outline('outline.txt'),'outline.dot')\"
'''
2018-10-30 04:12:36 +08:00
cf = my_graph()
stack = list()
nprev = -1
2020-07-22 00:29:21 +08:00
with open(outline_txt, 'r') as f:
2018-10-30 04:12:36 +08:00
for line in f:
m = re.match(r'^([\t ]*)(.*)', str(line))
if m:
n = len(m.group(1))
id = str(m.group(2))
else:
raise Exception(line)
2019-11-06 17:17:07 +08:00
if not id:
continue
id = re.sub(' \^$', '', id)
2018-10-30 04:12:36 +08:00
if n <= nprev:
stack = stack[:n - nprev - 1]
# print(n, id, stack)
2020-12-26 21:21:20 +08:00
if id not in ignore:
2018-10-30 04:12:36 +08:00
if len(stack):
cf.add_edge(stack[-1], id)
stack.append(id)
nprev = n
return cf
2020-07-23 00:43:19 +08:00
def rank_couples(dg):
'''
put couples on same rank to reduce total number of ranks and make
graph layout more compact
'''
# a=sys_clone;srcxray.py "write_dot(rank_couples(reduce_graph(remove_loops(read_dot('$a.dot')))),'$a.dot')"
couples = []
ranked = set()
for n in dg:
if n in ranked:
continue
m = n
while True:
if dg.out_degree(m) == 1:
s = list(dg.successors(m))[0]
if dg.in_degree(s) == 1:
couples.append((m, s))
ranked.update(set((m, s)))
dg.nodes[m]['rank1'] = dg.nodes[m]['rank2'] = dg.nodes[s]['rank1'] = dg.nodes[s]['rank2'] = n
m = s
continue
break
return dg
2018-08-07 14:06:07 +08:00
2020-07-23 00:43:19 +08:00
def add_rank(g):
'''
explicitly calculate and store ranks for further processing to
improve xdot output
'''
#
# srcxray.py "write_dot(add_rank('reduced.dot'), 'ranked.dot')"
g = to_dg(g)
passed1 = set()
passed2 = set()
rn1 = 1
rn2 = -1
r1 = [n for (n, d) in g.in_degree if not d]
r2 = [n for (n, d) in g.out_degree if not d]
while r1 or r2:
if r1:
nxt = set()
for n in r1:
g.nodes[n]['rank1'] = max(rn1, g.nodes[n].get('rank1', rn1))
for i in [_ for _ in g.successors(n)]:
nxt.add(i)
passed1.add(i)
rn1 += 1
r1 = nxt
if r2:
nxt = set()
for n in r2:
g.nodes[n]['rank2'] = min(rn2, g.nodes[n].get('rank2', rn2))
for i in [_ for _ in g.predecessors(n)]:
nxt.add(i)
passed2.add(i)
rn2 -= 1
r2 = nxt
g.__dict__['max_rank'] = rn1
return g
2018-07-31 10:18:58 +08:00
def write_dot(g, dot):
2020-07-20 02:13:23 +08:00
'''
2020-07-22 00:29:21 +08:00
writes a graph into a file with custom attributes
2020-07-20 02:13:23 +08:00
'''
2020-07-21 03:31:40 +08:00
# Other similar external functions to_agraph agwrite
2020-07-23 00:43:19 +08:00
def rank(g, n):
try:
if g.nodes[n]['rank1'] == g.nodes[n]['rank2']:
return g.nodes[n]['rank1']
if g.nodes[n]['rank1'] < abs(g.nodes[n]['rank2']):
return g.nodes[n]['rank1']
else:
return g.__dict__['max_rank'] + 1 + g.nodes[n]['rank2']
except KeyError:
return None
def esc(s):
# re.escape(n))
return s
2020-07-28 04:40:40 +08:00
if isinstance(g, graphviz.Digraph):
g.save(dot)
print(dot)
return
2018-07-31 10:18:58 +08:00
dot = str(dot)
dot = open(dot, 'w')
dot.write('strict digraph "None" {\n')
2018-08-07 14:06:07 +08:00
dot.write('rankdir=LR\nnodesep=0\n')
# dot.write('ranksep=50\n')
dot.write('node [fontname=Ubuntu,shape=none];\n')
# dot.write('edge [width=10000];\n')
dot.write('edge [width=1];\n')
2020-07-28 04:40:40 +08:00
if isinstance(g, nx.DiGraph):
2020-12-26 21:21:20 +08:00
g.remove_nodes_from(ignore)
2018-08-07 14:06:07 +08:00
ranks = collections.defaultdict(list)
2018-07-31 10:18:58 +08:00
for n in g.nodes():
2018-08-07 14:06:07 +08:00
r = rank(g, n)
if r:
ranks[r].append(n)
2018-07-31 10:18:58 +08:00
if not g.out_degree(n):
continue
dot.write('"%s" -> { ' % esc(n))
dot.write(' '.join(['"%s"' % (esc(str(a)))
2019-11-06 02:14:54 +08:00
for a in g.successors(n)]))
2018-08-27 15:30:29 +08:00
if scaled and r and int(r):
2018-08-07 14:06:07 +08:00
dot.write(' } [penwidth=%d label=%d];\n' % (100/r, r))
else:
dot.write(' } ;\n')
# pred
dot.write('// "%s" <- { ' % esc(n))
dot.write(' '.join(['"%s"' % (esc(str(a)))
2020-07-21 02:31:20 +08:00
for a in g.predecessors(n)]))
dot.write(' } ;\n')
2018-08-07 14:06:07 +08:00
print(ranks.keys())
for r in ranks.keys():
2019-11-06 02:14:54 +08:00
dot.write("{ rank=same %s }\n" %
(' '.join(['"%s"' % (str(a)) for a in ranks[r]])))
2018-08-07 14:06:07 +08:00
for n in g.nodes():
2022-11-04 14:34:05 +08:00
prop = Munch(g._node[n])
2018-08-07 14:06:07 +08:00
if scaled and len(ranks):
prop.fontsize = 500 + 10000 / (len(ranks[rank(g, n)]) + 1)
2019-11-06 17:17:46 +08:00
prop.fontsize = 30 + min(5 * len(g.edges(n)), 50)
2018-08-07 14:06:07 +08:00
# prop.label = n + ' ' + str(rank(g,n))
2018-08-13 04:28:53 +08:00
if prop:
dot.write('"%s" [%s]\n' % (esc(n), ','.join(
2019-11-06 02:14:54 +08:00
['%s="%s"' % (a, str(prop[a])) for a in prop])))
2018-08-27 15:30:29 +08:00
elif not g.number_of_edges():
dot.write('"%s"\n' % (n))
2018-08-07 14:06:07 +08:00
# else:
# dot.write('"%s"\n'%(n))
2018-07-31 10:18:58 +08:00
dot.write('}\n')
dot.close()
print(dot.name)
@open_file(0, mode='r')
2018-10-30 04:09:05 +08:00
def read_dot(dot):
2020-07-23 00:43:51 +08:00
# faster custom version of eponymous function from external library
# pydot.graph_from_dot_data parse_dot_data from_pydot
2018-07-31 10:18:58 +08:00
dg = nx.DiGraph()
for a in dot:
2018-08-07 14:04:10 +08:00
a = a.strip()
2018-07-31 10:18:58 +08:00
if '->' in a:
2018-08-07 14:04:10 +08:00
m = re.match('"?([^"]+)"? -> {(.+)}', a)
2018-07-31 10:18:58 +08:00
if m:
2019-11-06 02:14:54 +08:00
dg.add_edges_from([(m.group(1), b.strip('"'))
for b in m.group(2).split() if b != m.group(1)])
2018-07-31 10:18:58 +08:00
else:
2020-07-28 04:40:40 +08:00
m = re.match('"?([^"]+)"? -> "?([^"]*)"?;?', a)
2018-07-31 10:18:58 +08:00
if m:
2018-08-07 14:04:10 +08:00
if m.group(1) != m.group(2):
dg.add_edge(m.group(1), m.group(2))
else:
log(a)
2018-10-30 04:09:05 +08:00
elif re.match('.*[=\[\]{}]', a):
continue
2018-08-27 15:30:29 +08:00
else:
m = re.match('"?([^"]+)"?', a)
if m:
if m.group(1):
dg.add_node(m.group(1))
2018-07-31 10:18:58 +08:00
return dg
2018-08-07 13:55:42 +08:00
def to_dg(a):
if isinstance(a, nx.DiGraph):
2018-10-30 04:09:05 +08:00
log(a)
2018-08-07 13:55:42 +08:00
return a
if os.path.isfile(a):
2018-10-30 04:09:05 +08:00
log(a)
return read_dot(a)
2022-11-04 12:36:44 +08:00
raise (Exception(a))
2018-08-07 13:55:42 +08:00
2018-08-07 14:07:08 +08:00
def remove_loops(dg):
2020-07-21 03:31:40 +08:00
# srcxray.py "write_dot(remove_loops(read_dot('reduced.dot')), 'no-loops.dot')"
2018-08-07 14:07:08 +08:00
rm = []
visited = set()
path = [object()]
path_set = set(path)
stack = [iter(dg)]
while stack:
for v in stack[-1]:
if v in path_set:
rm.append((path[-1], v))
elif v not in visited:
visited.add(v)
path.append(v)
path_set.add(v)
stack.append(iter(dg[v]))
break
else:
path_set.remove(path.pop())
stack.pop()
# print(rm)
dg.remove_edges_from(rm)
return dg
2018-10-30 04:09:05 +08:00
def remove_couples(dg):
couples = []
for n in dg:
if dg.out_degree(n) == 1:
s = list(dg.successors(n))[0]
if dg.in_degree(s) == 1:
couples.append((n, s))
pprint(couples)
dg.remove_edges_from(couples)
return dg
2018-08-07 14:11:55 +08:00
def cflow_dir(a):
2020-07-28 04:41:01 +08:00
# generates dot files with cflow for c files
2018-08-07 14:11:55 +08:00
index = nx.DiGraph()
for c in glob.glob(os.path.join(a, "*.c")):
g = None
dot = str(Path(c).with_suffix(".dot"))
if not os.path.isfile(dot):
2018-10-30 04:09:05 +08:00
# c -> cflow and dot
2018-08-07 14:11:55 +08:00
g = import_cflow(c, Path(c).with_suffix(".cflow"))
write_dot(g, dot)
2019-11-06 02:14:54 +08:00
print(dot, popen("ctags -x %s | wc -l" % (c))
[0], len(set(e[0] for e in g.edges())))
2018-08-07 14:11:55 +08:00
else:
print(dot)
try:
# g = nx.drawing.nx_agraph.read_dot(dot)
g = read_dot(dot)
except (TypeError, pygraphviz.agraph.DotError):
print('nx_pydot <- nx_agraph')
g = nx.drawing.nx_pydot.read_dot(dot)
# digraph_print(g, [], Path(c).with_suffix(".tree"))
# index.add_nodes_from(g.nodes())
index.add_edges_from(g.edges())
write_dot(index, str(os.path.join(a, 'index.dot')))
digraph_print(digraph_tree(index), [], os.path.join(a, 'index.tree'))
return index
2018-07-29 04:16:48 +08:00
def cflow_linux():
2020-07-22 00:29:21 +08:00
'''
extracts with cflow various graphs from Linux kernel source
'''
2018-07-29 21:04:10 +08:00
dirs = ('init kernel kernel/time '
2018-07-31 10:21:50 +08:00
'fs fs/ext4 block '
'ipc net '
'lib security security/keys '
2018-07-29 21:04:10 +08:00
'arch/x86/kernel drivers/char drivers/pci '
).split()
2018-07-31 10:21:50 +08:00
# dirs += ('mm net/ipv4 crypto').split()
dirs = ('init kernel arch/x86/kernel fs ').split()
2018-09-16 20:40:54 +08:00
dirs += ['mm']
2018-07-31 10:21:50 +08:00
2018-07-29 21:04:10 +08:00
# fs/notify/fanotify fs/notify/inotify
2018-10-30 04:09:05 +08:00
all = None
2018-07-31 10:21:50 +08:00
try:
print('loading all.dot')
2018-10-30 04:09:05 +08:00
all = read_dot('all.dot')
2018-07-31 10:21:50 +08:00
# all = nx.DiGraph(read_dot('all.dot'))
except FileNotFoundError:
2018-10-30 04:09:05 +08:00
pass
2020-07-28 04:41:01 +08:00
log(all)
2018-10-30 04:09:05 +08:00
if not all:
2018-07-31 10:21:50 +08:00
all = nx.DiGraph()
for a in dirs:
print(a)
2018-08-07 14:11:55 +08:00
index = cflow_dir(a)
2018-07-31 10:21:50 +08:00
# all.add_nodes_from(index.nodes())
all.add_edges_from(index.edges())
write_dot(all, 'all.dot')
2018-08-07 14:11:55 +08:00
remove_loops(all)
print('loops: ' + str(list(all.nodes_with_selfloops())))
2018-07-31 10:21:50 +08:00
print('trees:')
2018-08-07 14:11:55 +08:00
digraph_print(all, ['x86_64_start_kernel', 'start_kernel', 'main', 'initcall', 'early_param',
'__setup', 'sys_write', 'write'],
2018-07-29 21:04:10 +08:00
'all.tree')
2018-07-31 10:21:50 +08:00
start_kernel = digraph_tree(all, ['start_kernel'])
write_dot(start_kernel, 'start_kernel.dot')
write_dot(reduce_graph(start_kernel), 'start_kernel-reduced.dot')
2019-11-06 02:14:54 +08:00
write_dot(reduce_graph(reduce_graph(start_kernel)),
'start_kernel-reduced2.dot')
2018-09-16 20:40:54 +08:00
write_dot(reduce_graph(digraph_tree(all, ['sys_clone'])), 'sys_clone.dot')
2018-07-29 04:16:48 +08:00
2020-07-22 00:29:21 +08:00
def stats(graph):
2020-07-20 02:13:23 +08:00
'''
2020-07-22 00:29:21 +08:00
measures various simple statistical metrics of a graph
Ex: graph.dot
2020-07-20 02:13:23 +08:00
'''
2020-07-22 00:29:21 +08:00
dg = to_dg(graph)
2018-08-07 13:59:50 +08:00
stat = Munch()
im = dict()
om = dict()
leaves = set()
2018-09-16 20:41:21 +08:00
roots = dict()
2018-08-07 13:59:50 +08:00
stat.edge_nodes = 0
2018-08-13 04:28:19 +08:00
stat.couples = 0
2018-08-07 13:59:50 +08:00
for n in dg:
id = dg.in_degree(n)
od = dg.out_degree(n)
if id == 1 and od == 1:
stat.edge_nodes += 1
if id:
im[n] = id
else:
2018-09-16 20:41:21 +08:00
roots[n] = od
2018-08-07 13:59:50 +08:00
if od:
om[n] = od
else:
leaves.add(n)
2018-08-13 04:28:19 +08:00
if od == 1 and dg.in_degree(list(dg.successors(n))[0]) == 1:
stat.couples += 1
2018-08-07 13:59:50 +08:00
stat.max_in_degree = max(dict(dg.in_degree).values())
stat.max_out_degree = max(dict(dg.out_degree).values())
stat.leaves = len(leaves)
stat.roots = len(roots)
2018-09-16 20:41:21 +08:00
stat.big_roots = ' '.join(sort_dict(roots)[:20])
2018-08-07 13:59:50 +08:00
# pprint(im)
# pprint(om)
stat._popular = ' '.join(sort_dict(im)[:10])
stat._biggest = ' '.join(sort_dict(om)[:10])
gd = remove_loops(dg)
stat.dag_longest_path_len = len(dag_longest_path(dg))
2018-09-16 20:41:21 +08:00
stat.__longest_path = ' '.join(dag_longest_path(dg)[:10] + [''])
2018-08-07 13:59:50 +08:00
for a in [nx.DiGraph.number_of_nodes, nx.DiGraph.number_of_edges, nx.DiGraph.number_of_selfloops,
nx.DiGraph.order]:
stat[a.__name__] = a(dg)
pprint(dict(stat))
2018-08-07 14:12:22 +08:00
def dot_expand(a, b):
2020-07-28 04:41:01 +08:00
# nodes from a with edges from b
2018-08-07 14:12:22 +08:00
a = to_dg(a)
b = to_dg(b)
2018-08-27 13:59:08 +08:00
c = my_graph()
log(a.nodes())
c.add_edges_from(b.out_edges(b.nbunch_iter(a.nodes())))
print(list(b.nbunch_iter(a.nodes())))
return c
2018-08-07 14:12:22 +08:00
2018-09-16 21:37:00 +08:00
def import_symbols():
2020-07-21 03:31:40 +08:00
# extracts and import symbols from shared libraries
2018-09-16 21:37:00 +08:00
sym = my_graph('symbols')
for l in popen('(shopt -s globstar; nm -D -C -A **/*.so.*)'):
q = l.split(maxsplit=2)
m = re.match(r'.*lib(.+).so.*:.*', q[0])
if not m:
log(q[0])
continue
if q[1] == 'U':
sym.add_edge(m.group(1), q[2])
elif q[1] == 'T':
sym.add_edge(q[2], m.group(1))
print(m.group(1), q[1], q[2])
return sym
2018-07-19 02:46:21 +08:00
me = os.path.basename(sys.argv[0])
2018-07-19 12:52:33 +08:00
2020-07-22 00:29:21 +08:00
def dir_tree(path='.'):
2020-07-20 02:13:23 +08:00
'''
2020-07-22 00:29:21 +08:00
scans directory into graph
2020-07-28 04:41:01 +08:00
Ex2: "write_dot(dir_tree('.'),'tree.dot')"
2020-07-20 02:13:23 +08:00
'''
2018-11-14 04:06:40 +08:00
stack = list()
nprev = -1
g = my_graph()
# all = nx.DiGraph()
# TODO
2020-07-22 00:29:21 +08:00
for path, dirs, files, fds in os.fwalk(path):
2018-11-14 04:06:40 +08:00
(dir, base) = os.path.split(path)
2019-11-06 17:18:07 +08:00
dir = re.sub(r'^\.\/', '', dir)
path = re.sub(r'^\.\/', '', path)
2018-11-14 04:06:40 +08:00
path2 = path.split(os.sep)
2019-11-06 17:18:07 +08:00
# print(path, fds, len(path2))
if re.match(r'\.repo/', path) or len(path2) > level_limit:
# print("skip", path)
continue
2018-11-14 04:06:40 +08:00
if len(path2) > 1:
# g.add_edge(path2[-2] + str(), path2[-1])
2020-12-26 21:04:01 +08:00
if g.number_of_edges() > lines_limit:
2020-12-26 21:28:53 +08:00
g.add_edge(dir, '')
2019-11-06 17:18:07 +08:00
break
2018-11-14 04:06:40 +08:00
g.add_edge(dir, path)
2019-11-06 17:18:07 +08:00
#g.add_node(path, label=path2[-1], xlabel='<<font point-size="1">'+path+'</font>>')
g.add_node(path, label=path2[-1])
#g.add_node(path, label=path2[-1], xlabel=path)
print(g.number_of_edges())
return g
2020-07-28 04:40:40 +08:00
def doxygen(*sources, output_dir='xml2'):
2020-07-20 02:13:23 +08:00
'''
2020-07-22 00:29:21 +08:00
extracts call graph from sources with doxygen
2020-07-20 02:13:23 +08:00
Ex: *.c
2020-07-28 04:40:40 +08:00
Ex2: "doxygen('init','xml2')"
2020-07-20 02:13:23 +08:00
'''
2020-07-22 00:29:21 +08:00
log(' '.join([i for i in sources]))
p = run(['doxygen', '-'], stdout=PIPE,
2020-07-22 00:29:21 +08:00
input="INPUT=" + ' '.join([i for i in sources]) + """
EXCLUDE_SYMBOLS=*310* *311* SOC_ENUM_SINGLE* EXPORT_SYMBOL*
CALL_GRAPH = YES
EXTRACT_ALL = YES
OPTIMIZE_OUTPUT_FOR_C = YES
EXTRACT_STATIC = YES
RECURSIVE = YES
EXCLUDE = html
2022-11-04 14:34:34 +08:00
DOT_GRAPH_MAX_NODES = 100
#GENERATE_TREEVIEW = YES
#HAVE_DOT = YES
#DOT_FONTSIZE = 15
#CALLER_GRAPH = YES
#INTERACTIVE_SVG = YES
#DOT_TRANSPARENT = YES
#DOT_MULTI_TARGETS = NO
#DOT_FONTNAME = Ubuntu
#CASE_SENSE_NAMES = YES
SOURCE_BROWSER = NO
GENERATE_HTML = NO
GENERATE_LATEX = NO
#QUIET = NO
GENERATE_XML=YES
2020-07-28 04:40:40 +08:00
MACRO_EXPANSION = YES
PREDEFINED = "DECLARE_COMPLETION(work)=struct completion work" \\
"__initdata= "
XML_OUTPUT=""" + output_dir + """
""", encoding='ascii')
print(output_dir)
#write_dot(doxygen_xml(output_dir), 'doxygen.dot')
2019-11-06 17:19:18 +08:00
def doxygen_xml(a):
2020-07-20 17:12:52 +08:00
'''
2020-07-22 00:29:21 +08:00
extracts call graph from xml directory generated by doxygen
2020-07-21 03:31:40 +08:00
Ex2: \"write_dot(doxygen_xml('xml'), 'doxygen.dot')\"
2020-07-20 17:12:52 +08:00
'''
2019-11-06 17:19:18 +08:00
g = my_graph()
for x in list(glob.glob(os.path.join(a, "*.xml")) + [a]):
2020-06-24 13:09:25 +08:00
# print(x)
2019-11-06 17:19:18 +08:00
if os.path.isfile(x):
d = xml.dom.minidom.parse(x)
for m in d.getElementsByTagName("memberdef"):
n = m.getElementsByTagName("name")[0].firstChild.data
2020-07-21 02:31:20 +08:00
file = (m.getElementsByTagName("location")[0]
.getAttribute('file'))
if file not in files:
2020-07-28 04:40:40 +08:00
log(file)
if n == 'main':
n = file + '::' + n
files[file].append(n)
2019-11-06 17:19:18 +08:00
for r in m.getElementsByTagName("references"):
g.add_edge(n, r.firstChild.data)
for r in m.getElementsByTagName("ref"):
g.add_edge(n, r.firstChild.data)
2019-11-06 17:19:18 +08:00
# referencedby
print(g.number_of_edges())
2019-11-06 17:19:18 +08:00
return g
2020-07-28 04:40:40 +08:00
def doxygen_xml_files(a):
'''
extracts call graph from xml directory generated by doxygen
with files as clusters
Ex2: \"write_dot(doxygen_xml_files('xml'), 'doxygen.dot')\"
'''
g = graphviz.Digraph()
g.attr('graph', rankdir='LR', concentrate='true', ranksep='4')
g.attr('node', shape='plaintext')
clusters = collections.defaultdict(list)
edges = list()
for x in list(glob.glob(os.path.join(a, "*.xml")) + [a]):
# print(x)
if os.path.isfile(x):
d = xml.dom.minidom.parse(x)
for m in d.getElementsByTagName("memberdef"):
n = m.getElementsByTagName("name")[0].firstChild.data
file = (m.getElementsByTagName("location")[0]
.getAttribute('file'))
if file not in files:
log(file)
if n == 'main':
n = file + '::' + n
files[file].append(n)
if not clusters.get(file):
2022-11-04 14:30:45 +08:00
with g.subgraph(
name='cluster_' + file.replace('.', '_8')) as c:
clusters[file] = c
c.attr('graph', label=file, fontsize="50")
clusters[file].node(n)
for r in m.getElementsByTagName("references"):
edges.append((n, r.firstChild.data))
for r in m.getElementsByTagName("ref"):
edges.append((n, r.firstChild.data))
2020-07-28 04:40:40 +08:00
for (a, b) in edges:
g.edge(a, b.replace('::', '__'))
return g
2019-11-06 17:22:53 +08:00
def doxygen_length(a):
2020-07-23 00:43:51 +08:00
'''
calculates length of functions using doxygen xml
'''
2019-11-06 17:22:53 +08:00
g = my_graph()
for x in list(glob.glob(os.path.join(a, "*.xml")) + [a]):
if os.path.isfile(x):
d = xml.dom.minidom.parse(x)
for m in d.getElementsByTagName("memberdef"):
n = m.getElementsByTagName("name")[0].firstChild.data
location = m.getElementsByTagName("location")[0]
# for r in m.getElementsByTagName("references"):
# g.add_edge(n, r.firstChild.data)
# referencedby
e = location.getAttribute('bodyend')
if not e or e == "-1":
continue
l = int(e) - int(location.getAttribute('bodystart'))
if l < 20:
continue
2020-07-22 00:31:16 +08:00
print(location.getAttribute('bodystart'), n, location.getAttribute(
'file'), location.getAttribute('bodyfile'), x, file=sys.stderr)
2019-11-06 17:22:53 +08:00
print("{0}:{1}:".format(location.getAttribute('bodyfile'),
location.getAttribute('bodystart')), n, l, "SLOC")
2020-07-22 00:31:16 +08:00
# <location file="common/log.cpp" line="21" column="1" bodyfile="common/log.cpp" bodystart="21" bodyend="49"/>
2018-11-14 04:06:40 +08:00
return g
2020-08-09 17:51:23 +08:00
def svg_xml(svg=None) -> nx.DiGraph:
'''
extracts call graph from svg
Ex2: \"write_dot(svg_xml('LKM.svg'), 'LKM.dot')\"
'''
if not svg:
return doc()
g = my_graph(svg)
s = xml.dom.minidom.parse(svg)
'''
for p in s.getElementsByTagName("path"):
print(p.getAttribute('inkscape:connection-end'))
print(p.getAttribute('inkscape:connection-start'))
'''
et = ET.parse(svg)
root = et.getroot()
print(svg)
print(et)
print(root)
# for e in root.iter():
# print(e.tag)
def text(c):
for t in root.findall('.//ns0:text[@id="' + c + '"]', ns):
return t.find('.//ns0:tspan', ns).text
ns = {'ns0': 'http://www.w3.org/2000/svg',
'ns1': 'http://www.inkscape.org/namespaces/inkscape'}
# for t in root.findall('.//ns0:text', ns):
# print(t.find('.//ns0:tspan', ns).text)
for p in root.findall('.//ns0:path', ns):
try:
c = (p.get("{"+ns['ns1'] + "}connection-start")[1:],
p.get("{"+ns['ns1'] + "}connection-end")[1:])
print(text(c[0]), text(c[1]))
g.add_edge(text(c[0]), text(c[1]))
except (TypeError):
pass
return g
2020-08-09 16:43:47 +08:00
def doc(m=None):
full = False
if not m:
m = (dict((name, func) for name, func
in getmembers(modules[__name__]))[stack()[1][3]])
full = True
d = inspect.getdoc(m)
if not d:
return False
if full:
print('\033[1m' + m.__name__ + '\033[0m' +
str(inspect.signature(m)) + ' -',
d.replace('Ex:',
'\033[3mExample:\033[0m ' + me + ' ' + m.__name__).
replace('Ex2:',
'\033[3mExample:\033[0m ' + me)
)
else:
print(m.__name__, '-', d.partition('\n')[0])
2018-07-19 02:46:21 +08:00
def usage():
2020-07-21 02:31:40 +08:00
#print('Run', me, 'usage')
for m in getmembers(modules[__name__]):
if isfunction(m[1]) and m[1].__module__ == __name__:
2020-08-09 16:43:47 +08:00
doc(m[1])
2020-07-21 02:31:40 +08:00
print("\nTry this: ")
2020-07-22 00:29:21 +08:00
print("cd linux;", me, "unittest")
2020-07-20 02:13:23 +08:00
print("\nEmergency termination: ^Z, kill %1")
2018-08-08 14:08:20 +08:00
print()
2018-07-19 02:46:21 +08:00
2018-07-19 12:52:33 +08:00
2018-08-08 14:08:05 +08:00
class _unittest_autotest(unittest.TestCase):
def test_1(self):
2020-07-21 03:31:40 +08:00
extract_referrer_test()
2018-08-13 04:27:49 +08:00
write_dot(nx.DiGraph([(1, 2), (2, 3), (2, 4)]), 'test.dot')
2018-10-30 04:09:05 +08:00
g = read_dot("test.dot")
2018-08-13 04:27:49 +08:00
self.assertEqual(list(g.successors("2")), ["3", "4"])
2018-08-08 14:08:05 +08:00
self.assertTrue(os.path.isdir('include/linux/'))
os.chdir('init')
2020-07-20 02:13:23 +08:00
self.assertRegex(popen('srcxray.py referrers_tree nfs_root_data')[-1],
2020-07-21 02:31:20 +08:00
r'.*prepare_namespace.*')
2020-07-20 02:13:23 +08:00
self.assertEqual('initrd_load: prepare_namespace',
2020-07-21 02:31:20 +08:00
popen('srcxray.py referrers_dep nfs_root_data')[-1])
2022-11-04 21:28:16 +08:00
self.assertTrue('parse_early_options: parse_args' in
popen('srcxray.py call_dep start_kernel'))
self.assertTrue('\tsched_init ⋮' in popen(
'srcxray.py call_tree start_kernel'))
2018-08-08 14:08:05 +08:00
os.chdir('..')
2022-11-04 12:29:30 +08:00
self.assertGreater(syscalls().number_of_edges(), 400)
2018-08-08 14:08:05 +08:00
# digraph_print:
2022-11-04 14:28:07 +08:00
self.assertEqual("\tkernel_do_mounts_initrd_sysctls_init ⋮", popen(
2019-11-06 02:14:54 +08:00
"srcxray.py import_cflow init/do_mounts_initrd.c")[-1])
2020-07-20 02:14:04 +08:00
self.assertRegex(popen(
'srcxray.py "nx.DiGraph([{1,2},{2,3},{2,4}])"')[-1],
"\t\t4.*")
2020-07-23 00:43:51 +08:00
os.system('srcxray.py doxygen init')
2020-07-28 04:40:40 +08:00
os.system(
"srcxray.py \"write_dot(doxygen_xml('xml2'), 'call_graph_dx.dot')\"")
2022-11-04 12:29:30 +08:00
self.assertGreater(
read_dot("call_graph_dx.dot").number_of_edges(), 400)
2020-07-28 04:40:40 +08:00
os.system(
"srcxray.py \"write_dot(doxygen_xml_files('xml2'), 'call_graph_dx_files.dot')\"")
2022-11-04 12:29:30 +08:00
self.assertGreater(
read_dot("call_graph_dx_files.dot").number_of_edges(), 400)
2020-07-28 04:40:40 +08:00
self.assertFalse(0 == os.system(
"grep DECLARE_COMPLETION call_graph_dx_files.dot"))
2018-08-08 14:08:05 +08:00
2018-07-19 02:46:21 +08:00
def main():
2021-05-09 20:37:33 +08:00
global stop
try:
f = open("stop.txt")
stop = f.read().splitlines()
except FileNotFoundError:
pass
2020-12-26 21:21:20 +08:00
global ignore
2020-10-07 22:51:24 +08:00
try:
f = open("ignore.txt")
2020-12-26 21:21:20 +08:00
ignore = f.read().splitlines()
2020-10-07 22:51:24 +08:00
except FileNotFoundError:
pass
2018-07-19 02:46:21 +08:00
try:
ret = False
if len(sys.argv) == 1:
usage()
2018-07-19 02:46:21 +08:00
else:
while sys.argv[1].startswith('--'):
global verbose
2020-06-24 13:08:01 +08:00
global level_limit
log(sys.argv[1][2:])
2020-06-24 13:07:44 +08:00
if sys.argv[1][2:] == 'verbose':
verbose = True
2020-06-24 13:08:01 +08:00
if sys.argv[1][2:] == 'level_limit':
level_limit = int(sys.argv[2])
sys.argv = sys.argv[1:]
2020-12-26 21:04:01 +08:00
global lines_limit
if sys.argv[1][2:] == 'lines_limit':
lines_limit = int(sys.argv[2])
sys.argv = sys.argv[1:]
sys.argv = sys.argv[1:]
2018-08-08 14:08:05 +08:00
a1 = sys.argv[1]
sys.argv = sys.argv[1:]
2018-09-16 21:01:42 +08:00
if '(' in a1:
2018-08-08 14:08:05 +08:00
ret = eval(a1)
2018-08-07 14:14:44 +08:00
# ret = exec(sys.argv[1])
2018-09-16 21:01:42 +08:00
elif len(sys.argv) == 1 and isinstance(eval(a1), types.ModuleType):
ret = eval(a1 + ".main()")
2018-07-19 02:46:21 +08:00
else:
2018-09-16 21:01:42 +08:00
ret = eval(a1 + '(' + ', '.join("'%s'" % (a)
2019-11-06 02:14:54 +08:00
for a in sys.argv[1:]) + ')')
2021-05-09 20:32:31 +08:00
if ignored:
print("Ignored:", " ".join(ignored))
2018-07-29 14:23:00 +08:00
if isinstance(ret, nx.DiGraph):
digraph_print(ret)
2018-10-30 04:09:05 +08:00
elif isinstance(ret, bool) and ret is False:
2018-07-19 02:46:21 +08:00
sys.exit(os.EX_CONFIG)
2018-10-30 04:09:05 +08:00
else:
2019-11-06 17:49:32 +08:00
if (ret is not None):
print(ret)
2018-07-19 02:46:21 +08:00
except KeyboardInterrupt:
2018-07-26 13:36:01 +08:00
log("\nInterrupted")
2020-06-24 13:09:25 +08:00
# -fdump-rtl-expand
2018-07-19 02:46:21 +08:00
2018-07-19 12:52:33 +08:00
2018-07-19 02:46:21 +08:00
if __name__ == "__main__":
main()