package.preload['bcc.vendor.argparse'] = (function (...) -- The MIT License (MIT) -- Copyright (c) 2013 - 2015 Peter Melnichenko -- Permission is hereby granted, free of charge, to any person obtaining a copy of -- this software and associated documentation files (the "Software"), to deal in -- the Software without restriction, including without limitation the rights to -- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -- the Software, and to permit persons to whom the Software is furnished to do so, -- subject to the following conditions: -- The above copyright notice and this permission notice shall be included in all -- copies or substantial portions of the Software. -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -- FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -- COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -- IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. local function deep_update(t1, t2) for k, v in pairs(t2) do if type(v) == "table" then v = deep_update({}, v) end t1[k] = v end return t1 end -- A property is a tuple {name, callback}. -- properties.args is number of properties that can be set as arguments -- when calling an object. local function class(prototype, properties, parent) -- Class is the metatable of its instances. local cl = {} cl.__index = cl if parent then cl.__prototype = deep_update(deep_update({}, parent.__prototype), prototype) else cl.__prototype = prototype end if properties then local names = {} -- Create setter methods and fill set of property names. for _, property in ipairs(properties) do local name, callback = property[1], property[2] cl[name] = function(self, value) if not callback(self, value) then self["_" .. name] = value end return self end names[name] = true end function cl.__call(self, ...) -- When calling an object, if the first argument is a table, -- interpret keys as property names, else delegate arguments -- to corresponding setters in order. if type((...)) == "table" then for name, value in pairs((...)) do if names[name] then self[name](self, value) end end else local nargs = select("#", ...) for i, property in ipairs(properties) do if i > nargs or i > properties.args then break end local arg = select(i, ...) if arg ~= nil then self[property[1]](self, arg) end end end return self end end -- If indexing class fails, fallback to its parent. local class_metatable = {} class_metatable.__index = parent function class_metatable.__call(self, ...) -- Calling a class returns its instance. -- Arguments are delegated to the instance. local object = deep_update({}, self.__prototype) setmetatable(object, self) return object(...) end return setmetatable(cl, class_metatable) end local function typecheck(name, types, value) for _, type_ in ipairs(types) do if type(value) == type_ then return true end end error(("bad property '%s' (%s expected, got %s)"):format(name, table.concat(types, " or "), type(value))) end local function typechecked(name, ...) local types = {...} return {name, function(_, value) typecheck(name, types, value) end} end local multiname = {"name", function(self, value) typecheck("name", {"string"}, value) for alias in value:gmatch("%S+") do self._name = self._name or alias table.insert(self._aliases, alias) end -- Do not set _name as with other properties. return true end} local function parse_boundaries(str) if tonumber(str) then return tonumber(str), tonumber(str) end if str == "*" then return 0, math.huge end if str == "+" then return 1, math.huge end if str == "?" then return 0, 1 end if str:match "^%d+%-%d+$" then local min, max = str:match "^(%d+)%-(%d+)$" return tonumber(min), tonumber(max) end if str:match "^%d+%+$" then local min = str:match "^(%d+)%+$" return tonumber(min), math.huge end end local function boundaries(name) return {name, function(self, value) typecheck(name, {"number", "string"}, value) local min, max = parse_boundaries(value) if not min then error(("bad property '%s'"):format(name)) end self["_min" .. name], self["_max" .. name] = min, max end} end local actions = {} local option_action = {"action", function(_, value) typecheck("action", {"function", "string"}, value) if type(value) == "string" and not actions[value] then error(("unknown action '%s'"):format(value)) end end} local option_init = {"init", function(self) self._has_init = true end} local option_default = {"default", function(self, value) if type(value) ~= "string" then self._init = value self._has_init = true return true end end} local add_help = {"add_help", function(self, value) typecheck("add_help", {"boolean", "string", "table"}, value) if self._has_help then table.remove(self._options) self._has_help = false end if value then local help = self:flag() :description "Show this help message and exit." :action(function() print(self:get_help()) os.exit(0) end) if value ~= true then help = help(value) end if not help._name then help "-h" "--help" end self._has_help = true end end} local Parser = class({ _arguments = {}, _options = {}, _commands = {}, _mutexes = {}, _require_command = true, _handle_options = true }, { args = 3, typechecked("name", "string"), typechecked("description", "string"), typechecked("epilog", "string"), typechecked("usage", "string"), typechecked("help", "string"), typechecked("require_command", "boolean"), typechecked("handle_options", "boolean"), typechecked("action", "function"), typechecked("command_target", "string"), add_help }) local Command = class({ _aliases = {} }, { args = 3, multiname, typechecked("description", "string"), typechecked("epilog", "string"), typechecked("target", "string"), typechecked("usage", "string"), typechecked("help", "string"), typechecked("require_command", "boolean"), typechecked("handle_options", "boolean"), typechecked("action", "function"), typechecked("command_target", "string"), add_help }, Parser) local Argument = class({ _minargs = 1, _maxargs = 1, _mincount = 1, _maxcount = 1, _defmode = "unused", _show_default = true }, { args = 5, typechecked("name", "string"), typechecked("description", "string"), option_default, typechecked("convert", "function", "table"), boundaries("args"), typechecked("target", "string"), typechecked("defmode", "string"), typechecked("show_default", "boolean"), typechecked("argname", "string", "table"), option_action, option_init }) local Option = class({ _aliases = {}, _mincount = 0, _overwrite = true }, { args = 6, multiname, typechecked("description", "string"), option_default, typechecked("convert", "function", "table"), boundaries("args"), boundaries("count"), typechecked("target", "string"), typechecked("defmode", "string"), typechecked("show_default", "boolean"), typechecked("overwrite", "boolean"), typechecked("argname", "string", "table"), option_action, option_init }, Argument) function Argument:_get_argument_list() local buf = {} local i = 1 while i <= math.min(self._minargs, 3) do local argname = self:_get_argname(i) if self._default and self._defmode:find "a" then argname = "[" .. argname .. "]" end table.insert(buf, argname) i = i+1 end while i <= math.min(self._maxargs, 3) do table.insert(buf, "[" .. self:_get_argname(i) .. "]") i = i+1 if self._maxargs == math.huge then break end end if i < self._maxargs then table.insert(buf, "...") end return buf end function Argument:_get_usage() local usage = table.concat(self:_get_argument_list(), " ") if self._default and self._defmode:find "u" then if self._maxargs > 1 or (self._minargs == 1 and not self._defmode:find "a") then usage = "[" .. usage .. "]" end end return usage end function actions.store_true(result, target) result[target] = true end function actions.store_false(result, target) result[target] = false end function actions.store(result, target, argument) result[target] = argument end function actions.count(result, target, _, overwrite) if not overwrite then result[target] = result[target] + 1 end end function actions.append(result, target, argument, overwrite) result[target] = result[target] or {} table.insert(result[target], argument) if overwrite then table.remove(result[target], 1) end end function actions.concat(result, target, arguments, overwrite) if overwrite then error("'concat' action can't handle too many invocations") end result[target] = result[target] or {} for _, argument in ipairs(arguments) do table.insert(result[target], argument) end end function Argument:_get_action() local action, init if self._maxcount == 1 then if self._maxargs == 0 then action, init = "store_true", nil else action, init = "store", nil end else if self._maxargs == 0 then action, init = "count", 0 else action, init = "append", {} end end if self._action then action = self._action end if self._has_init then init = self._init end if type(action) == "string" then action = actions[action] end return action, init end -- Returns placeholder for `narg`-th argument. function Argument:_get_argname(narg) local argname = self._argname or self:_get_default_argname() if type(argname) == "table" then return argname[narg] else return argname end end function Argument:_get_default_argname() return "<" .. self._name .. ">" end function Option:_get_default_argname() return "<" .. self:_get_default_target() .. ">" end -- Returns label to be shown in the help message. function Argument:_get_label() return self._name end function Option:_get_label() local variants = {} local argument_list = self:_get_argument_list() table.insert(argument_list, 1, nil) for _, alias in ipairs(self._aliases) do argument_list[1] = alias table.insert(variants, table.concat(argument_list, " ")) end return table.concat(variants, ", ") end function Command:_get_label() return table.concat(self._aliases, ", ") end function Argument:_get_description() if self._default and self._show_default then if self._description then return ("%s (default: %s)"):format(self._description, self._default) else return ("default: %s"):format(self._default) end else return self._description or "" end end function Command:_get_description() return self._description or "" end function Option:_get_usage() local usage = self:_get_argument_list() table.insert(usage, 1, self._name) usage = table.concat(usage, " ") if self._mincount == 0 or self._default then usage = "[" .. usage .. "]" end return usage end function Argument:_get_default_target() return self._name end function Option:_get_default_target() local res for _, alias in ipairs(self._aliases) do if alias:sub(1, 1) == alias:sub(2, 2) then res = alias:sub(3) break end end res = res or self._name:sub(2) return (res:gsub("-", "_")) end function Option:_is_vararg() return self._maxargs ~= self._minargs end function Parser:_get_fullname() local parent = self._parent local buf = {self._name} while parent do table.insert(buf, 1, parent._name) parent = parent._parent end return table.concat(buf, " ") end function Parser:_update_charset(charset) charset = charset or {} for _, command in ipairs(self._commands) do command:_update_charset(charset) end for _, option in ipairs(self._options) do for _, alias in ipairs(option._aliases) do charset[alias:sub(1, 1)] = true end end return charset end function Parser:argument(...) local argument = Argument(...) table.insert(self._arguments, argument) return argument end function Parser:option(...) local option = Option(...) if self._has_help then table.insert(self._options, #self._options, option) else table.insert(self._options, option) end return option end function Parser:flag(...) return self:option():args(0)(...) end function Parser:command(...) local command = Command():add_help(true)(...) command._parent = self table.insert(self._commands, command) return command end function Parser:mutex(...) local options = {...} for i, option in ipairs(options) do assert(getmetatable(option) == Option, ("bad argument #%d to 'mutex' (Option expected)"):format(i)) end table.insert(self._mutexes, options) return self end local max_usage_width = 70 local usage_welcome = "Usage: " function Parser:get_usage() if self._usage then return self._usage end local lines = {usage_welcome .. self:_get_fullname()} local function add(s) if #lines[#lines]+1+#s <= max_usage_width then lines[#lines] = lines[#lines] .. " " .. s else lines[#lines+1] = (" "):rep(#usage_welcome) .. s end end -- This can definitely be refactored into something cleaner local mutex_options = {} local vararg_mutexes = {} -- First, put mutexes which do not contain vararg options and remember those which do for _, mutex in ipairs(self._mutexes) do local buf = {} local is_vararg = false for _, option in ipairs(mutex) do if option:_is_vararg() then is_vararg = true end table.insert(buf, option:_get_usage()) mutex_options[option] = true end local repr = "(" .. table.concat(buf, " | ") .. ")" if is_vararg then table.insert(vararg_mutexes, repr) else add(repr) end end -- Second, put regular options for _, option in ipairs(self._options) do if not mutex_options[option] and not option:_is_vararg() then add(option:_get_usage()) end end -- Put positional arguments for _, argument in ipairs(self._arguments) do add(argument:_get_usage()) end -- Put mutexes containing vararg options for _, mutex_repr in ipairs(vararg_mutexes) do add(mutex_repr) end for _, option in ipairs(self._options) do if not mutex_options[option] and option:_is_vararg() then add(option:_get_usage()) end end if #self._commands > 0 then if self._require_command then add("") else add("[]") end add("...") end return table.concat(lines, "\n") end local margin_len = 3 local margin_len2 = 25 local margin = (" "):rep(margin_len) local margin2 = (" "):rep(margin_len2) local function make_two_columns(s1, s2) if s2 == "" then return margin .. s1 end s2 = s2:gsub("\n", "\n" .. margin2) if #s1 < (margin_len2-margin_len) then return margin .. s1 .. (" "):rep(margin_len2-margin_len-#s1) .. s2 else return margin .. s1 .. "\n" .. margin2 .. s2 end end function Parser:get_help() if self._help then return self._help end local blocks = {self:get_usage()} if self._description then table.insert(blocks, self._description) end local labels = {"Arguments:", "Options:", "Commands:"} for i, elements in ipairs{self._arguments, self._options, self._commands} do if #elements > 0 then local buf = {labels[i]} for _, element in ipairs(elements) do table.insert(buf, make_two_columns(element:_get_label(), element:_get_description())) end table.insert(blocks, table.concat(buf, "\n")) end end if self._epilog then table.insert(blocks, self._epilog) end return table.concat(blocks, "\n\n") end local function get_tip(context, wrong_name) local context_pool = {} local possible_name local possible_names = {} for name in pairs(context) do if type(name) == "string" then for i = 1, #name do possible_name = name:sub(1, i - 1) .. name:sub(i + 1) if not context_pool[possible_name] then context_pool[possible_name] = {} end table.insert(context_pool[possible_name], name) end end end for i = 1, #wrong_name + 1 do possible_name = wrong_name:sub(1, i - 1) .. wrong_name:sub(i + 1) if context[possible_name] then possible_names[possible_name] = true elseif context_pool[possible_name] then for _, name in ipairs(context_pool[possible_name]) do possible_names[name] = true end end end local first = next(possible_names) if first then if next(possible_names, first) then local possible_names_arr = {} for name in pairs(possible_names) do table.insert(possible_names_arr, "'" .. name .. "'") end table.sort(possible_names_arr) return "\nDid you mean one of these: " .. table.concat(possible_names_arr, " ") .. "?" else return "\nDid you mean '" .. first .. "'?" end else return "" end end local ElementState = class({ invocations = 0 }) function ElementState:__call(state, element) self.state = state self.result = state.result self.element = element self.target = element._target or element:_get_default_target() self.action, self.result[self.target] = element:_get_action() return self end function ElementState:error(fmt, ...) self.state:error(fmt, ...) end function ElementState:convert(argument) local converter = self.element._convert if converter then local ok, err if type(converter) == "function" then ok, err = converter(argument) else ok = converter[argument] end if ok == nil then self:error(err and "%s" or "malformed argument '%s'", err or argument) end argument = ok end return argument end function ElementState:default(mode) return self.element._defmode:find(mode) and self.element._default end local function bound(noun, min, max, is_max) local res = "" if min ~= max then res = "at " .. (is_max and "most" or "least") .. " " end local number = is_max and max or min return res .. tostring(number) .. " " .. noun .. (number == 1 and "" or "s") end function ElementState:invoke(alias) self.open = true self.name = ("%s '%s'"):format(alias and "option" or "argument", alias or self.element._name) self.overwrite = false if self.invocations >= self.element._maxcount then if self.element._overwrite then self.overwrite = true else self:error("%s must be used %s", self.name, bound("time", self.element._mincount, self.element._maxcount, true)) end else self.invocations = self.invocations + 1 end self.args = {} if self.element._maxargs <= 0 then self:close() end return self.open end function ElementState:pass(argument) argument = self:convert(argument) table.insert(self.args, argument) if #self.args >= self.element._maxargs then self:close() end return self.open end function ElementState:complete_invocation() while #self.args < self.element._minargs do self:pass(self.element._default) end end function ElementState:close() if self.open then self.open = false if #self.args < self.element._minargs then if self:default("a") then self:complete_invocation() else if #self.args == 0 then if getmetatable(self.element) == Argument then self:error("missing %s", self.name) elseif self.element._maxargs == 1 then self:error("%s requires an argument", self.name) end end self:error("%s requires %s", self.name, bound("argument", self.element._minargs, self.element._maxargs)) end end local args = self.args if self.element._maxargs <= 1 then args = args[1] end if self.element._maxargs == 1 and self.element._minargs == 0 and self.element._mincount ~= self.element._maxcount then args = self.args end self.action(self.result, self.target, args, self.overwrite) end end local ParseState = class({ result = {}, options = {}, arguments = {}, argument_i = 1, element_to_mutexes = {}, mutex_to_used_option = {}, command_actions = {} }) function ParseState:__call(parser, error_handler) self.parser = parser self.error_handler = error_handler self.charset = parser:_update_charset() self:switch(parser) return self end function ParseState:error(fmt, ...) self.error_handler(self.parser, fmt:format(...)) end function ParseState:switch(parser) self.parser = parser if parser._action then table.insert(self.command_actions, {action = parser._action, name = parser._name}) end for _, option in ipairs(parser._options) do option = ElementState(self, option) table.insert(self.options, option) for _, alias in ipairs(option.element._aliases) do self.options[alias] = option end end for _, mutex in ipairs(parser._mutexes) do for _, option in ipairs(mutex) do if not self.element_to_mutexes[option] then self.element_to_mutexes[option] = {} end table.insert(self.element_to_mutexes[option], mutex) end end for _, argument in ipairs(parser._arguments) do argument = ElementState(self, argument) table.insert(self.arguments, argument) argument:invoke() end self.handle_options = parser._handle_options self.argument = self.arguments[self.argument_i] self.commands = parser._commands for _, command in ipairs(self.commands) do for _, alias in ipairs(command._aliases) do self.commands[alias] = command end end end function ParseState:get_option(name) local option = self.options[name] if not option then self:error("unknown option '%s'%s", name, get_tip(self.options, name)) else return option end end function ParseState:get_command(name) local command = self.commands[name] if not command then if #self.commands > 0 then self:error("unknown command '%s'%s", name, get_tip(self.commands, name)) else self:error("too many arguments") end else return command end end function ParseState:invoke(option, name) self:close() if self.element_to_mutexes[option.element] then for _, mutex in ipairs(self.element_to_mutexes[option.element]) do local used_option = self.mutex_to_used_option[mutex] if used_option and used_option ~= option then self:error("option '%s' can not be used together with %s", name, used_option.name) else self.mutex_to_used_option[mutex] = option end end end if option:invoke(name) then self.option = option end end function ParseState:pass(arg) if self.option then if not self.option:pass(arg) then self.option = nil end elseif self.argument then if not self.argument:pass(arg) then self.argument_i = self.argument_i + 1 self.argument = self.arguments[self.argument_i] end else local command = self:get_command(arg) self.result[command._target or command._name] = true if self.parser._command_target then self.result[self.parser._command_target] = command._name end self:switch(command) end end function ParseState:close() if self.option then self.option:close() self.option = nil end end function ParseState:finalize() self:close() for i = self.argument_i, #self.arguments do local argument = self.arguments[i] if #argument.args == 0 and argument:default("u") then argument:complete_invocation() else argument:close() end end if self.parser._require_command and #self.commands > 0 then self:error("a command is required") end for _, option in ipairs(self.options) do local name = option.name or ("option '%s'"):format(option.element._name) if option.invocations == 0 then if option:default("u") then option:invoke(name) option:complete_invocation() option:close() end end local mincount = option.element._mincount if option.invocations < mincount then if option:default("a") then while option.invocations < mincount do option:invoke(name) option:close() end elseif option.invocations == 0 then self:error("missing %s", name) else self:error("%s must be used %s", name, bound("time", mincount, option.element._maxcount)) end end end for i = #self.command_actions, 1, -1 do self.command_actions[i].action(self.result, self.command_actions[i].name) end end function ParseState:parse(args) for _, arg in ipairs(args) do local plain = true if self.handle_options then local first = arg:sub(1, 1) if self.charset[first] then if #arg > 1 then plain = false if arg:sub(2, 2) == first then if #arg == 2 then self:close() self.handle_options = false else local equals = arg:find "=" if equals then local name = arg:sub(1, equals - 1) local option = self:get_option(name) if option.element._maxargs <= 0 then self:error("option '%s' does not take arguments", name) end self:invoke(option, name) self:pass(arg:sub(equals + 1)) else local option = self:get_option(arg) self:invoke(option, arg) end end else for i = 2, #arg do local name = first .. arg:sub(i, i) local option = self:get_option(name) self:invoke(option, name) if i ~= #arg and option.element._maxargs > 0 then self:pass(arg:sub(i + 1)) break end end end end end end if plain then self:pass(arg) end end self:finalize() return self.result end function Parser:error(msg) io.stderr:write(("%s\n\nError: %s\n"):format(self:get_usage(), msg)) os.exit(1) end -- Compatibility with strict.lua and other checkers: local default_cmdline = rawget(_G, "arg") or {} function Parser:_parse(args, error_handler) return ParseState(self, error_handler):parse(args or default_cmdline) end function Parser:parse(args) return self:_parse(args, self.error) end local function xpcall_error_handler(err) return tostring(err) .. "\noriginal " .. debug.traceback("", 2):sub(2) end function Parser:pparse(args) local parse_error local ok, result = xpcall(function() return self:_parse(args, function(_, err) parse_error = err error(err, 0) end) end, xpcall_error_handler) if ok then return true, result elseif not parse_error then error(result, 0) else return false, parse_error end end return function(...) return Parser(default_cmdline[0]):add_help(true)(...) end end) package.preload['bcc.vendor.posix'] = (function (...) --[[ Copyright 2016 GitHub, Inc Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] local ffi = require("ffi") -- Avoid duplicate declarations if syscall library is present local has_syscall, _ = pcall(require, "syscall") if not has_syscall then ffi.cdef [[ typedef int clockid_t; typedef long time_t; struct timespec { time_t tv_sec; long tv_nsec; }; int clock_gettime(clockid_t clk_id, struct timespec *tp); int clock_nanosleep(clockid_t clock_id, int flags, const struct timespec *request, struct timespec *remain); ]] end ffi.cdef [[ int get_nprocs(void); uint64_t strtoull(const char *nptr, char **endptr, int base); ]] local CLOCK = { REALTIME = 0, MONOTONIC = 1, PROCESS_CPUTIME_ID = 2, THREAD_CPUTIME_ID = 3, MONOTONIC_RAW = 4, REALTIME_COARSE = 5, MONOTONIC_COARSE = 6, } local function time_ns(clock) local ts = ffi.new("struct timespec[1]") assert(ffi.C.clock_gettime(clock or CLOCK.MONOTONIC_RAW, ts) == 0, "clock_gettime() failed: "..ffi.errno()) return tonumber(ts[0].tv_sec * 1e9 + ts[0].tv_nsec) end local function sleep(seconds, clock) local s, ns = math.modf(seconds) local ts = ffi.new("struct timespec[1]") ts[0].tv_sec = s ts[0].tv_nsec = ns / 1e9 ffi.C.clock_nanosleep(clock or CLOCK.MONOTONIC, 0, ts, nil) end local function cpu_count() return tonumber(ffi.C.get_nprocs()) end local function tonumber64(n, base) assert(type(n) == "string") return ffi.C.strtoull(n, nil, base or 10) end return { time_ns=time_ns, sleep=sleep, CLOCK=CLOCK, cpu_count=cpu_count, tonumber64=tonumber64, } end) package.preload['bcc.vendor.middleclass'] = (function (...) local middleclass = { _VERSION = 'middleclass v4.0.0', _DESCRIPTION = 'Object Orientation for Lua', _URL = 'https://github.com/kikito/middleclass', _LICENSE = [[ MIT LICENSE Copyright (c) 2011 Enrique GarcĂ­a Cota Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ]] } local function _createIndexWrapper(aClass, f) if f == nil then return aClass.__instanceDict else return function(self, name) local value = aClass.__instanceDict[name] if value ~= nil then return value elseif type(f) == "function" then return (f(self, name)) else return f[name] end end end end local function _propagateInstanceMethod(aClass, name, f) f = name == "__index" and _createIndexWrapper(aClass, f) or f aClass.__instanceDict[name] = f for subclass in pairs(aClass.subclasses) do if rawget(subclass.__declaredMethods, name) == nil then _propagateInstanceMethod(subclass, name, f) end end end local function _declareInstanceMethod(aClass, name, f) aClass.__declaredMethods[name] = f if f == nil and aClass.super then f = aClass.super.__instanceDict[name] end _propagateInstanceMethod(aClass, name, f) end local function _tostring(self) return "class " .. self.name end local function _call(self, ...) return self:new(...) end local function _createClass(name, super) local dict = {} dict.__index = dict local aClass = { name = name, super = super, static = {}, __instanceDict = dict, __declaredMethods = {}, subclasses = setmetatable({}, {__mode='k'}) } if super then setmetatable(aClass.static, { __index = function(_,k) return rawget(dict,k) or super.static[k] end }) else setmetatable(aClass.static, { __index = function(_,k) return rawget(dict,k) end }) end setmetatable(aClass, { __index = aClass.static, __tostring = _tostring, __call = _call, __newindex = _declareInstanceMethod }) return aClass end local function _includeMixin(aClass, mixin) assert(type(mixin) == 'table', "mixin must be a table") for name,method in pairs(mixin) do if name ~= "included" and name ~= "static" then aClass[name] = method end end for name,method in pairs(mixin.static or {}) do aClass.static[name] = method end if type(mixin.included)=="function" then mixin:included(aClass) end return aClass end local DefaultMixin = { __tostring = function(self) return "instance of " .. tostring(self.class) end, initialize = function(self, ...) end, isInstanceOf = function(self, aClass) return type(self) == 'table' and type(self.class) == 'table' and type(aClass) == 'table' and ( aClass == self.class or type(aClass.isSubclassOf) == 'function' and self.class:isSubclassOf(aClass) ) end, static = { allocate = function(self) assert(type(self) == 'table', "Make sure that you are using 'Class:allocate' instead of 'Class.allocate'") return setmetatable({ class = self }, self.__instanceDict) end, new = function(self, ...) assert(type(self) == 'table', "Make sure that you are using 'Class:new' instead of 'Class.new'") local instance = self:allocate() instance:initialize(...) return instance end, subclass = function(self, name) assert(type(self) == 'table', "Make sure that you are using 'Class:subclass' instead of 'Class.subclass'") assert(type(name) == "string", "You must provide a name(string) for your class") local subclass = _createClass(name, self) for methodName, f in pairs(self.__instanceDict) do _propagateInstanceMethod(subclass, methodName, f) end subclass.initialize = function(instance, ...) return self.initialize(instance, ...) end self.subclasses[subclass] = true self:subclassed(subclass) return subclass end, subclassed = function(self, other) end, isSubclassOf = function(self, other) return type(other) == 'table' and type(self) == 'table' and type(self.super) == 'table' and ( self.super == other or type(self.super.isSubclassOf) == 'function' and self.super:isSubclassOf(other) ) end, include = function(self, ...) assert(type(self) == 'table', "Make sure you that you are using 'Class:include' instead of 'Class.include'") for _,mixin in ipairs({...}) do _includeMixin(self, mixin) end return self end } } function middleclass.class(name, super) assert(type(name) == 'string', "A name (string) is needed for the new class") return super and super:subclass(name) or _includeMixin(_createClass(name), DefaultMixin) end setmetatable(middleclass, { __call = function(_, ...) return middleclass.class(...) end }) return middleclass end) package.preload['bcc.vendor.json'] = (function (...) --[[ json.lua A compact pure-Lua JSON library. This code is in the public domain: https://gist.github.com/tylerneylon/59f4bcf316be525b30ab The main functions are: json.stringify, json.parse. ## json.stringify: This expects the following to be true of any tables being encoded: * They only have string or number keys. Number keys must be represented as strings in json; this is part of the json spec. * They are not recursive. Such a structure cannot be specified in json. A Lua table is considered to be an array if and only if its set of keys is a consecutive sequence of positive integers starting at 1. Arrays are encoded like so: `[2, 3, false, "hi"]`. Any other type of Lua table is encoded as a json object, encoded like so: `{"key1": 2, "key2": false}`. Because the Lua nil value cannot be a key, and as a table value is considered equivalent to a missing key, there is no way to express the json "null" value in a Lua table. The only way this will output "null" is if your entire input obj is nil itself. An empty Lua table, {}, could be considered either a json object or array - it's an ambiguous edge case. We choose to treat this as an object as it is the more general type. To be clear, none of the above considerations is a limitation of this code. Rather, it is what we get when we completely observe the json specification for as arbitrary a Lua object as json is capable of expressing. ## json.parse: This function parses json, with the exception that it does not pay attention to \u-escaped unicode code points in strings. It is difficult for Lua to return null as a value. In order to prevent the loss of keys with a null value in a json string, this function uses the one-off table value json.null (which is just an empty table) to indicate null values. This way you can check if a value is null with the conditional `val == json.null`. If you have control over the data and are using Lua, I would recommend just avoiding null values in your data to begin with. --]] local json = {} -- Internal functions. local function kind_of(obj) if type(obj) ~= 'table' then return type(obj) end local i = 1 for _ in pairs(obj) do if obj[i] ~= nil then i = i + 1 else return 'table' end end if i == 1 then return 'table' else return 'array' end end local function escape_str(s) local in_char = {'\\', '"', '/', '\b', '\f', '\n', '\r', '\t'} local out_char = {'\\', '"', '/', 'b', 'f', 'n', 'r', 't'} for i, c in ipairs(in_char) do s = s:gsub(c, '\\' .. out_char[i]) end return s end -- Returns pos, did_find; there are two cases: -- 1. Delimiter found: pos = pos after leading space + delim; did_find = true. -- 2. Delimiter not found: pos = pos after leading space; did_find = false. -- This throws an error if err_if_missing is true and the delim is not found. local function skip_delim(str, pos, delim, err_if_missing) pos = pos + #str:match('^%s*', pos) if str:sub(pos, pos) ~= delim then if err_if_missing then error('Expected ' .. delim .. ' near position ' .. pos) end return pos, false end return pos + 1, true end -- Expects the given pos to be the first character after the opening quote. -- Returns val, pos; the returned pos is after the closing quote character. local function parse_str_val(str, pos, val) val = val or '' local early_end_error = 'End of input found while parsing string.' if pos > #str then error(early_end_error) end local c = str:sub(pos, pos) if c == '"' then return val, pos + 1 end if c ~= '\\' then return parse_str_val(str, pos + 1, val .. c) end -- We must have a \ character. local esc_map = {b = '\b', f = '\f', n = '\n', r = '\r', t = '\t'} local nextc = str:sub(pos + 1, pos + 1) if not nextc then error(early_end_error) end return parse_str_val(str, pos + 2, val .. (esc_map[nextc] or nextc)) end -- Returns val, pos; the returned pos is after the number's final character. local function parse_num_val(str, pos) local num_str = str:match('^-?%d+%.?%d*[eE]?[+-]?%d*', pos) local val = tonumber(num_str) if not val then error('Error parsing number at position ' .. pos .. '.') end return val, pos + #num_str end -- Public values and functions. function json.stringify(obj, as_key) local s = {} -- We'll build the string as an array of strings to be concatenated. local kind = kind_of(obj) -- This is 'array' if it's an array or type(obj) otherwise. if kind == 'array' then if as_key then error('Can\'t encode array as key.') end s[#s + 1] = '[' for i, val in ipairs(obj) do if i > 1 then s[#s + 1] = ', ' end s[#s + 1] = json.stringify(val) end s[#s + 1] = ']' elseif kind == 'table' then if as_key then error('Can\'t encode table as key.') end s[#s + 1] = '{' for k, v in pairs(obj) do if #s > 1 then s[#s + 1] = ', ' end s[#s + 1] = json.stringify(k, true) s[#s + 1] = ':' s[#s + 1] = json.stringify(v) end s[#s + 1] = '}' elseif kind == 'string' then return '"' .. escape_str(obj) .. '"' elseif kind == 'number' then if as_key then return '"' .. tostring(obj) .. '"' end return tostring(obj) elseif kind == 'boolean' then return tostring(obj) elseif kind == 'nil' then return 'null' else error('Unjsonifiable type: ' .. kind .. '.') end return table.concat(s) end json.null = {} -- This is a one-off table to represent the null value. function json.parse(str, pos, end_delim) pos = pos or 1 if pos > #str then error('Reached unexpected end of input.') end local pos = pos + #str:match('^%s*', pos) -- Skip whitespace. local first = str:sub(pos, pos) if first == '{' then -- Parse an object. local obj, key, delim_found = {}, true, true pos = pos + 1 while true do key, pos = json.parse(str, pos, '}') if key == nil then return obj, pos end if not delim_found then error('Comma missing between object items.') end pos = skip_delim(str, pos, ':', true) -- true -> error if missing. obj[key], pos = json.parse(str, pos) pos, delim_found = skip_delim(str, pos, ',') end elseif first == '[' then -- Parse an array. local arr, val, delim_found = {}, true, true pos = pos + 1 while true do val, pos = json.parse(str, pos, ']') if val == nil then return arr, pos end if not delim_found then error('Comma missing between array items.') end arr[#arr + 1] = val pos, delim_found = skip_delim(str, pos, ',') end elseif first == '"' then -- Parse a string. return parse_str_val(str, pos + 1) elseif first == '-' or first:match('%d') then -- Parse a number. return parse_num_val(str, pos) elseif first == end_delim then -- End of an object or array. return nil, pos + 1 else -- Parse true, false, or null. local literals = {['true'] = true, ['false'] = false, ['null'] = json.null} for lit_str, lit_val in pairs(literals) do local lit_end = pos + #lit_str - 1 if str:sub(pos, lit_end) == lit_str then return lit_val, lit_end + 1 end end local pos_info_str = 'position ' .. pos .. ': ' .. str:sub(pos, pos + 10) error('Invalid json syntax starting at ' .. pos_info_str) end end return json end) package.preload['bcc.vendor.helpers'] = (function (...) do local ffi = require("ffi") local ptrtype = ffi.typeof("uint64_t") local strformat = string.format function string.format(format, ...) local args = {...} local match_no = 1 local newfmt, count = string.gsub(format, "()%%(.-)(%a)", function(_, mods, t) local n = match_no match_no = match_no + 1 if t == 'p' and ffi.istype(ptrtype, args[n]) then local lo = tonumber(args[n] % 4294967296ULL) local hi = tonumber(args[n] / 4294967296ULL) args[n] = (hi == 0) and strformat("%x", lo) or strformat("%x%08x", hi, lo) return "%"..mods.."s" end end) if count == 0 then return strformat(format, ...) else return strformat(newfmt, unpack(args,1,select('#',...))) end end end function string.starts(s, p) return string.sub(s, 1, string.len(p)) == p end function string.lstrip(s, p) return string.sub(s, string.len(p) + 1) end function string.ends(s, e) return e == '' or string.sub(s, -string.len(e))==e end function string.escape(s) return s:gsub('[%-%.%+%[%]%(%)%$%^%%%?%*]','%%%1') end --- split a string into a list of strings separated by a delimiter. -- @param s The input string -- @param re A Lua string pattern; defaults to '%s+' -- @param plain don't use Lua patterns -- @param n optional maximum number of splits -- @return a list-like table -- @raise error if s is not a string function string.split(s,re,plain,n) local find,sub,append = string.find, string.sub, table.insert local i1,ls = 1,{} if not re then re = '%s+' end if re == '' then return {s} end while true do local i2,i3 = find(s,re,i1,plain) if not i2 then local last = sub(s,i1) if last ~= '' then append(ls,last) end if #ls == 1 and ls[1] == '' then return {} else return ls end end append(ls,sub(s,i1,i2-1)) if n and #ls == n then ls[#ls] = sub(s,i1) return ls end i1 = i3+1 end end function table.count(T) local count = 0 for _ in pairs(T) do count = count + 1 end return count end function table.bsearch(list, value, mkval) local low = 1 local high = #list while low <= high do local mid = math.floor((low+high)/2) local this = mkval and mkval(list[mid]) or list[mid] if this > value then high = mid - 1 elseif this < value then low = mid + 1 else return mid end end return low - 1 end function table.join(a, b) assert(a) if b == nil or #b == 0 then return a end local res = {} for _, v in ipairs(a) do table.insert(res, v) end for _, v in ipairs(b) do table.insert(res, v) end return res end function table.build(iterator_fn, build_fn) build_fn = (build_fn or function(arg) return arg end) local res = {} while true do local vars = {iterator_fn()} if vars[1] == nil then break end table.insert(res, build_fn(vars)) end return res end function table.values(T) local V = {} for k, v in pairs(T) do table.insert(V, v) end return V end function table.tuples(T) local i = 0 local n = table.getn(t) return function () i = i + 1 if i <= n then return t[i][1], t[i][2] end end end getmetatable("").__mod = function(a, b) if not b then return a elseif type(b) == "table" then return string.format(a, unpack(b)) else return string.format(a, b) end end function os.exists(path) local f=io.open(path,"r") if f~=nil then io.close(f) return true else return false end end function os.spawn(...) local cmd = string.format(...) local proc = assert(io.popen(cmd)) local out = proc:read("*a") proc:close() return out end local function logline(...) if not log.enabled then return end local c_green = "\27[32m" local c_grey = "\27[1;30m" local c_clear = "\27[0m" local msg = string.format(...) local info = debug.getinfo(2, "Sln") local line = string.format("%s[%s:%s]%s %s", c_grey, info.short_src:match("^.+/(.+)$"), info.currentline, c_clear, info.name) io.stderr:write( string.format("%s[%s]%s %s: %s\n", c_green, os.date("%H:%M:%S"), c_clear, line, msg)) end setmetatable(_G, { __newindex = function (_, n) error("attempt to write to undeclared variable "..n, 2) end, __index = function (_, n) error("attempt to read undeclared variable "..n, 2) end, }) rawset(_G, "log", { info = logline, enabled = false }) rawset(_G, "class", require("bcc.vendor.middleclass")) end) package.preload['bcc.init'] = (function (...) --[[ Copyright 2016 GitHub, Inc Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] require("bcc.vendor.helpers") return { BPF = require("bcc.bpf") } end) package.preload['bcc.run'] = (function (...) --[[ Copyright 2016 GitHub, Inc Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] return function() require("bcc.vendor.helpers") local standalone = rawget(_G, "BCC_STANDALONE") local progname = standalone or "bcc-probe" local function print_usage() io.stderr:write(string.format( "usage: %s [[--version|--verbose] --] path_to_script.lua [...]\n", progname)) os.exit(1) end local function print_version() local jit = require("jit") print(string.format("%s %s -- Running on %s (%s/%s)", progname, rawget(_G, "BCC_VERSION") or "HEAD", jit.version, jit.os, jit.arch)) os.exit(0) end while arg[1] and string.starts(arg[1], "-") do local k = table.remove(arg, 1) if k == "--" then break elseif standalone == nil and string.starts(k, "--so-path=") then rawset(_G, "LIBBCC_SO_PATH", string.lstrip(k, "--so-path=")) elseif k == "--llvm-debug" then rawset(_G, "LIBBCC_LLVM_DEBUG", 1) elseif k == "-V" or k == "--verbose" then log.enabled = true elseif k == "-v" or k == "--version" then print_version() else print_usage() end end local tracefile = table.remove(arg, 1) if not tracefile then print_usage() end local BPF = require("bcc.bpf") BPF.script_root(tracefile) local USDT = require("bcc.usdt") local utils = { argparse = require("bcc.vendor.argparse"), posix = require("bcc.vendor.posix"), USDT = USDT, } local command = dofile(tracefile) local res, err = xpcall(command, debug.traceback, BPF, utils) if not res and err ~= "interrupted!" then io.stderr:write("[ERROR] "..err.."\n") end BPF.cleanup() USDT.cleanup() return res, err end end) package.preload['bcc.bpf'] = (function (...) --[[ Copyright 2016 GitHub, Inc Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] local ffi = require("ffi") local libbcc = require("bcc.libbcc") local TracerPipe = require("bcc.tracerpipe") local Table = require("bcc.table") local Sym = require("bcc.sym") local Bpf = class("BPF") Bpf.static.open_kprobes = {} Bpf.static.open_uprobes = {} Bpf.static.perf_buffers = {} Bpf.static.KPROBE_LIMIT = 1000 Bpf.static.tracer_pipe = nil Bpf.static.DEFAULT_CFLAGS = { '-D__HAVE_BUILTIN_BSWAP16__', '-D__HAVE_BUILTIN_BSWAP32__', '-D__HAVE_BUILTIN_BSWAP64__', } function Bpf.static.check_probe_quota(n) local cur = table.count(Bpf.static.open_kprobes) + table.count(Bpf.static.open_uprobes) assert(cur + n <= Bpf.static.KPROBE_LIMIT, "number of open probes would exceed quota") end function Bpf.static.cleanup() local function detach_all(probe_type, all_probes) for key, fd in pairs(all_probes) do libbcc.bpf_close_perf_event_fd(fd) -- skip bcc-specific kprobes if not key:starts("bcc:") then if probe_type == "kprobes" then libbcc.bpf_detach_kprobe(key) elseif probe_type == "uprobes" then libbcc.bpf_detach_uprobe(key) end end all_probes[key] = nil end end detach_all("kprobes", Bpf.static.open_kprobes) detach_all("uprobes", Bpf.static.open_uprobes) for key, perf_buffer in pairs(Bpf.static.perf_buffers) do libbcc.perf_reader_free(perf_buffer) Bpf.static.perf_buffers[key] = nil end if Bpf.static.tracer_pipe ~= nil then Bpf.static.tracer_pipe:close() end end function Bpf.static.SymbolCache(pid) return Sym.create_cache(pid) end function Bpf.static.num_open_uprobes() return table.count(Bpf.static.open_uprobes) end function Bpf.static.num_open_kprobes() return table.count(Bpf.static.open_kprobes) end Bpf.static.SCRIPT_ROOT = "./" function Bpf.static.script_root(root) local dir, file = root:match'(.*/)(.*)' Bpf.static.SCRIPT_ROOT = dir or "./" return Bpf end local function _find_file(script_root, filename) if filename == nil then return nil end if os.exists(filename) then return filename end if not filename:starts("/") then filename = script_root .. filename if os.exists(filename) then return filename end end assert(nil, "failed to find file "..filename.." (root="..script_root..")") end function Bpf:initialize(args) self.funcs = {} self.tables = {} if args.usdt and args.text then args.text = args.usdt:_get_text() .. args.text end local cflags = table.join(Bpf.DEFAULT_CFLAGS, args.cflags) local cflags_ary = ffi.new("const char *[?]", #cflags, cflags) local llvm_debug = rawget(_G, "LIBBCC_LLVM_DEBUG") or args.debug or 0 assert(type(llvm_debug) == "number") if args.text then log.info("\n%s\n", args.text) self.module = libbcc.bpf_module_create_c_from_string(args.text, llvm_debug, cflags_ary, #cflags, true) elseif args.src_file then local src = _find_file(Bpf.SCRIPT_ROOT, args.src_file) if src:ends(".b") then local hdr = _find_file(Bpf.SCRIPT_ROOT, args.hdr_file) self.module = libbcc.bpf_module_create_b(src, hdr, llvm_debug) else self.module = libbcc.bpf_module_create_c(src, llvm_debug, cflags_ary, #cflags, true) end end assert(self.module ~= nil, "failed to compile BPF module") if args.usdt then args.usdt:_attach_uprobes(self) end end function Bpf:load_funcs(prog_type) prog_type = prog_type or "BPF_PROG_TYPE_KPROBE" local result = {} local fn_count = tonumber(libbcc.bpf_num_functions(self.module)) for i = 0,fn_count-1 do local name = ffi.string(libbcc.bpf_function_name(self.module, i)) table.insert(result, self:load_func(name, prog_type)) end return result end function Bpf:load_func(fn_name, prog_type) if self.funcs[fn_name] ~= nil then return self.funcs[fn_name] end assert(libbcc.bpf_function_start(self.module, fn_name) ~= nil, "unknown program: "..fn_name) local fd = libbcc.bcc_prog_load(prog_type, fn_name, libbcc.bpf_function_start(self.module, fn_name), libbcc.bpf_function_size(self.module, fn_name), libbcc.bpf_module_license(self.module), libbcc.bpf_module_kern_version(self.module), 0, nil, 0) assert(fd >= 0, "failed to load BPF program "..fn_name) log.info("loaded %s (%d)", fn_name, fd) local fn = {bpf=self, name=fn_name, fd=fd} self.funcs[fn_name] = fn return fn end function Bpf:dump_func(fn_name) local start = libbcc.bpf_function_start(self.module, fn_name) assert(start ~= nil, "unknown program") local len = libbcc.bpf_function_size(self.module, fn_name) return ffi.string(start, tonumber(len)) end function Bpf:attach_uprobe(args) Bpf.check_probe_quota(1) local path, addr = Sym.check_path_symbol(args.name, args.sym, args.addr, args.pid, args.sym_off) local fn = self:load_func(args.fn_name, 'BPF_PROG_TYPE_KPROBE') local ptype = args.retprobe and "r" or "p" local ev_name = string.format("%s_%s_0x%p", ptype, path:gsub("[^%a%d]", "_"), addr) local retprobe = args.retprobe and 1 or 0 local res = libbcc.bpf_attach_uprobe(fn.fd, retprobe, ev_name, path, addr, args.pid or -1) assert(res >= 0, "failed to attach BPF to uprobe") self:probe_store("uprobe", ev_name, res) return self end function Bpf:attach_kprobe(args) -- TODO: allow the caller to glob multiple functions together Bpf.check_probe_quota(1) local fn = self:load_func(args.fn_name, 'BPF_PROG_TYPE_KPROBE') local event = args.event or "" local ptype = args.retprobe and "r" or "p" local ev_name = string.format("%s_%s", ptype, event:gsub("[%+%.]", "_")) local offset = args.fn_offset or 0 local retprobe = args.retprobe and 1 or 0 local maxactive = args.maxactive or 0 local res = libbcc.bpf_attach_kprobe(fn.fd, retprobe, ev_name, event, offset, maxactive) assert(res >= 0, "failed to attach BPF to kprobe") self:probe_store("kprobe", ev_name, res) return self end function Bpf:pipe() if Bpf.tracer_pipe == nil then Bpf.tracer_pipe = TracerPipe:new() end return Bpf.tracer_pipe end function Bpf:get_table(name, key_type, leaf_type) if self.tables[name] == nil then self.tables[name] = Table(self, name, key_type, leaf_type) end return self.tables[name] end function Bpf:probe_store(t, id, fd) if t == "kprobe" then Bpf.open_kprobes[id] = fd elseif t == "uprobe" then Bpf.open_uprobes[id] = fd else error("unknown probe type '%s'" % t) end log.info("%s -> %s", id, fd) end function Bpf:perf_buffer_store(id, reader) Bpf.perf_buffers[id] = reader log.info("%s -> %s", id, reader) end function Bpf:probe_lookup(t, id) if t == "kprobe" then return Bpf.open_kprobes[id] elseif t == "uprobe" then return Bpf.open_uprobes[id] else return nil end end function Bpf:_perf_buffer_array() local perf_buffer_count = table.count(Bpf.perf_buffers) local readers = ffi.new("struct perf_reader*[?]", perf_buffer_count) local n = 0 for _, r in pairs(Bpf.perf_buffers) do readers[n] = r n = n + 1 end assert(n == perf_buffer_count) return readers, n end function Bpf:perf_buffer_poll_loop() local perf_buffers, perf_buffer_count = self:_perf_buffer_array() return pcall(function() while true do libbcc.perf_reader_poll(perf_buffer_count, perf_buffers, -1) end end) end function Bpf:kprobe_poll_loop() return self:perf_buffer_poll_loop() end function Bpf:perf_buffer_poll(timeout) local perf_buffers, perf_buffer_count = self:_perf_buffer_array() libbcc.perf_reader_poll(perf_buffer_count, perf_buffers, timeout or -1) end function Bpf:kprobe_poll(timeout) self:perf_buffer_poll(timeout) end return Bpf end) package.preload['bcc.sym'] = (function (...) --[[ Copyright 2016 GitHub, Inc Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] local ffi = require("ffi") local libbcc = require("bcc.libbcc") local SYM = ffi.typeof("struct bcc_symbol[1]") local function create_cache(pid) return { _CACHE = libbcc.bcc_symcache_new(pid or -1, nil), resolve = function(self, addr) local sym = SYM() if libbcc.bcc_symcache_resolve(self._CACHE, addr, sym) < 0 then return "[unknown]", 0x0 end local name_res = ffi.string(sym[0].demangle_name) libbcc.bcc_symbol_free_demangle_name(sym); return name_res, sym[0].offset end } end local function check_path_symbol(module, symname, addr, pid, sym_off) local sym = SYM() local module_path local new_addr if libbcc.bcc_resolve_symname(module, symname, addr or 0x0, pid or 0, nil, sym) < 0 then if sym[0].module == nil then error("could not find library '%s' in the library path" % module) else module_path = ffi.string(sym[0].module) libbcc.bcc_procutils_free(sym[0].module) error("failed to resolve symbol '%s' in '%s'" % { symname, module_path}) end end new_addr = sym[0].offset + (sym_off or 0) module_path = ffi.string(sym[0].module) libbcc.bcc_procutils_free(sym[0].module) return module_path, new_addr end return { create_cache=create_cache, check_path_symbol=check_path_symbol } end) package.preload['bcc.libbcc'] = (function (...) --[[ Copyright 2016 GitHub, Inc Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] local ffi = require("ffi") ffi.cdef[[ enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, BPF_PROG_TYPE_SOCKET_FILTER, BPF_PROG_TYPE_KPROBE, BPF_PROG_TYPE_SCHED_CLS, BPF_PROG_TYPE_SCHED_ACT, }; int bcc_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, int map_flags); int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags); int bpf_lookup_elem(int fd, void *key, void *value); int bpf_delete_elem(int fd, void *key); int bpf_get_next_key(int fd, void *key, void *next_key); int bcc_prog_load(enum bpf_prog_type prog_type, const char *name, const struct bpf_insn *insns, int insn_len, const char *license, unsigned kern_version, int log_level, char *log_buf, unsigned log_buf_size); int bpf_attach_socket(int sockfd, int progfd); /* create RAW socket and bind to interface 'name' */ int bpf_open_raw_sock(const char *name); typedef void (*perf_reader_raw_cb)(void *cb_cookie, void *raw, int raw_size); typedef void (*perf_reader_lost_cb)(void *cb_cookie, uint64_t lost); int bpf_attach_kprobe(int progfd, int attach_type, const char *ev_name, const char *fn_name, uint64_t fn_offset, int maxactive); int bpf_detach_kprobe(const char *ev_name); int bpf_attach_uprobe(int progfd, int attach_type, const char *ev_name, const char *binary_path, uint64_t offset, int pid); int bpf_detach_uprobe(const char *ev_name); void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb, perf_reader_lost_cb lost_cb, void *cb_cookie, int pid, int cpu, int page_cnt); int bpf_close_perf_event_fd(int fd); ]] ffi.cdef[[ void * bpf_module_create_b(const char *filename, const char *proto_filename, unsigned flags); void * bpf_module_create_c(const char *filename, unsigned flags, const char *cflags[], int ncflags, bool allow_rlimit); void * bpf_module_create_c_from_string(const char *text, unsigned flags, const char *cflags[], int ncflags, bool allow_rlimit); void bpf_module_destroy(void *program); char * bpf_module_license(void *program); unsigned bpf_module_kern_version(void *program); size_t bpf_num_functions(void *program); const char * bpf_function_name(void *program, size_t id); void * bpf_function_start_id(void *program, size_t id); void * bpf_function_start(void *program, const char *name); size_t bpf_function_size_id(void *program, size_t id); size_t bpf_function_size(void *program, const char *name); size_t bpf_num_tables(void *program); size_t bpf_table_id(void *program, const char *table_name); int bpf_table_fd(void *program, const char *table_name); int bpf_table_fd_id(void *program, size_t id); int bpf_table_type(void *program, const char *table_name); int bpf_table_type_id(void *program, size_t id); size_t bpf_table_max_entries(void *program, const char *table_name); size_t bpf_table_max_entries_id(void *program, size_t id); int bpf_table_flags(void *program, const char *table_name); int bpf_table_flags_id(void *program, size_t id); const char * bpf_table_name(void *program, size_t id); const char * bpf_table_key_desc(void *program, const char *table_name); const char * bpf_table_key_desc_id(void *program, size_t id); const char * bpf_table_leaf_desc(void *program, const char *table_name); const char * bpf_table_leaf_desc_id(void *program, size_t id); size_t bpf_table_key_size(void *program, const char *table_name); size_t bpf_table_key_size_id(void *program, size_t id); size_t bpf_table_leaf_size(void *program, const char *table_name); size_t bpf_table_leaf_size_id(void *program, size_t id); int bpf_table_key_snprintf(void *program, size_t id, char *buf, size_t buflen, const void *key); int bpf_table_leaf_snprintf(void *program, size_t id, char *buf, size_t buflen, const void *leaf); int bpf_table_key_sscanf(void *program, size_t id, const char *buf, void *key); int bpf_table_leaf_sscanf(void *program, size_t id, const char *buf, void *leaf); ]] ffi.cdef[[ struct perf_reader; void perf_reader_free(void *ptr); int perf_reader_mmap(struct perf_reader *reader); int perf_reader_poll(int num_readers, struct perf_reader **readers, int timeout); int perf_reader_fd(struct perf_reader *reader); void perf_reader_set_fd(struct perf_reader *reader, int fd); ]] ffi.cdef[[ struct bcc_symbol { const char *name; const char *demangle_name; const char *module; uint64_t offset; }; struct bcc_symbol_option { int use_debug_file; int check_debug_file_crc; int lazy_symbolize; uint32_t use_symbol_type; }; int bcc_resolve_symname(const char *module, const char *symname, const uint64_t addr, int pid, struct bcc_symbol_option *option, struct bcc_symbol *sym); void bcc_procutils_free(const char *ptr); void *bcc_symcache_new(int pid, struct bcc_symbol_option *option); void bcc_symbol_free_demangle_name(struct bcc_symbol *sym); int bcc_symcache_resolve(void *symcache, uint64_t addr, struct bcc_symbol *sym); void bcc_symcache_refresh(void *resolver); ]] ffi.cdef[[ void *bcc_usdt_new_frompid(int pid); void *bcc_usdt_new_frompath(const char *path); void bcc_usdt_close(void *usdt); int bcc_usdt_enable_probe(void *, const char *, const char *); char *bcc_usdt_genargs(void *); typedef void (*bcc_usdt_uprobe_cb)(const char *, const char *, uint64_t, int); void bcc_usdt_foreach_uprobe(void *usdt, bcc_usdt_uprobe_cb callback); ]] if rawget(_G, "BCC_STANDALONE") then return ffi.C else return ffi.load( os.getenv("LIBBCC_SO_PATH") or rawget(_G, "LIBBCC_SO_PATH") or "bcc") end end) package.preload['bcc.tracerpipe'] = (function (...) --[[ Copyright 2016 GitHub, Inc Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] local TracerPipe = class("TracerPipe") TracerPipe.static.TRACEFS = "/sys/kernel/debug/tracing" TracerPipe.static.fields = "%s+(.-)%-(%d+)%s+%[(%d+)%]%s+(....)%s+([%d%.]+):.-:%s+(.+)" function TracerPipe:close() if self.pipe ~= nil then self.pipe:close() end end function TracerPipe:open() if self.pipe == nil then self.pipe = assert(io.open(TracerPipe.TRACEFS .. "/trace_pipe")) end return self.pipe end function TracerPipe:readline() return self:open():read() end function TracerPipe:trace_fields() while true do local line = self:readline() if not line and self.nonblocking then return nil end if not line:starts("CPU:") then local task, pid, cpu, flags, ts, msg = line:match(TracerPipe.fields) if task ~= nil then return task, tonumber(pid), tonumber(cpu), flags, tonumber(ts), msg end end end end function TracerPipe:initialize(nonblocking) self.nonblocking = nonblocking end return TracerPipe end) package.preload['bcc.table'] = (function (...) --[[ Copyright 2016 GitHub, Inc Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] local ffi = require("ffi") local libbcc = require("bcc.libbcc") local Posix = require("bcc.vendor.posix") local BaseTable = class("BaseTable") BaseTable.static.BPF_MAP_TYPE_HASH = 1 BaseTable.static.BPF_MAP_TYPE_ARRAY = 2 BaseTable.static.BPF_MAP_TYPE_PROG_ARRAY = 3 BaseTable.static.BPF_MAP_TYPE_PERF_EVENT_ARRAY = 4 BaseTable.static.BPF_MAP_TYPE_PERCPU_HASH = 5 BaseTable.static.BPF_MAP_TYPE_PERCPU_ARRAY = 6 BaseTable.static.BPF_MAP_TYPE_STACK_TRACE = 7 BaseTable.static.BPF_MAP_TYPE_CGROUP_ARRAY = 8 BaseTable.static.BPF_MAP_TYPE_LRU_HASH = 9 BaseTable.static.BPF_MAP_TYPE_LRU_PERCPU_HASH = 10 BaseTable.static.BPF_MAP_TYPE_LPM_TRIE = 11 function BaseTable:initialize(t_type, bpf, map_id, map_fd, key_type, leaf_type) assert(t_type == libbcc.bpf_table_type_id(bpf.module, map_id)) self.t_type = t_type self.bpf = bpf self.map_id = map_id self.map_fd = map_fd self.c_key = ffi.typeof(key_type.."[1]") self.c_leaf = ffi.typeof(leaf_type.."[1]") end function BaseTable:key_sprintf(key) local pkey = self.c_key(key) local buf_len = ffi.sizeof(self.c_key) * 8 local pbuf = ffi.new("char[?]", buf_len) local res = libbcc.bpf_table_key_snprintf( self.bpf.module, self.map_id, pbuf, buf_len, pkey) assert(res == 0, "could not print key") return ffi.string(pbuf) end function BaseTable:leaf_sprintf(leaf) local pleaf = self.c_leaf(leaf) local buf_len = ffi.sizeof(self.c_leaf) * 8 local pbuf = ffi.new("char[?]", buf_len) local res = libbcc.bpf_table_leaf_snprintf( self.bpf.module, self.map_id, pbuf, buf_len, pleaf) assert(res == 0, "could not print leaf") return ffi.string(pbuf) end function BaseTable:key_scanf(key_str) local pkey = self.c_key() local res = libbcc.bpf_table_key_sscanf( self.bpf.module, self.map_id, key_str, pkey) assert(res == 0, "could not scanf key") return pkey[0] end function BaseTable:leaf_scanf(leaf_str) local pleaf = self.c_leaf() local res = libbcc.bpf_table_leaf_sscanf( self.bpf.module, self.map_id, leaf_str, pleaf) assert(res == 0, "could not scanf leaf") return pleaf[0] end function BaseTable:get(key) local pkey = self.c_key(key) local pvalue = self.c_leaf() if libbcc.bpf_lookup_elem(self.map_fd, pkey, pvalue) < 0 then return nil end return pvalue[0] end function BaseTable:set(key, value) local pkey = self.c_key(key) local pvalue = self.c_leaf(value) assert(libbcc.bpf_update_elem(self.map_fd, pkey, pvalue, 0) == 0, "could not update table") end function BaseTable:_empty_key() local pkey = self.c_key() local pvalue = self.c_leaf() for _, v in ipairs({0x0, 0x55, 0xff}) do ffi.fill(pkey, ffi.sizeof(pkey[0]), v) if libbcc.bpf_lookup_elem(self.map_fd, pkey, pvalue) < 0 then return pkey end end error("failed to find an empty key for table iteration") end function BaseTable:keys() local pkey = self:_empty_key() return function() local pkey_next = self.c_key() if libbcc.bpf_get_next_key(self.map_fd, pkey, pkey_next) < 0 then return nil end pkey = pkey_next return pkey[0] end end function BaseTable:items() local pkey = self:_empty_key() return function() local pkey_next = self.c_key() local pvalue = self.c_leaf() if libbcc.bpf_get_next_key(self.map_fd, pkey, pkey_next) < 0 then return nil end pkey = pkey_next assert(libbcc.bpf_lookup_elem(self.map_fd, pkey, pvalue) == 0) return pkey[0], pvalue[0] end end local HashTable = class("HashTable", BaseTable) function HashTable:initialize(bpf, map_id, map_fd, key_type, leaf_type) BaseTable.initialize(self, BaseTable.BPF_MAP_TYPE_HASH, bpf, map_id, map_fd, key_type, leaf_type) end function HashTable:delete(key) local pkey = self.c_key(key) return libbcc.bpf_delete_elem(self.map_fd, pkey) == 0 end function HashTable:size() local n = 0 self:each(function() n = n + 1 end) return n end local BaseArray = class("BaseArray", BaseTable) function BaseArray:initialize(t_type, bpf, map_id, map_fd, key_type, leaf_type) BaseTable.initialize(self, t_type, bpf, map_id, map_fd, key_type, leaf_type) self.max_entries = tonumber(libbcc.bpf_table_max_entries_id(self.bpf.module, self.map_id)) end function BaseArray:_normalize_key(key) assert(type(key) == "number", "invalid key (expected a number") if key < 0 then key = self.max_entries + key end assert(key < self.max_entries, string.format("out of range (%d >= %d)", key, self.max_entries)) return key end function BaseArray:get(key) return BaseTable.get(self, self:_normalize_key(key)) end function BaseArray:set(key, value) return BaseTable.set(self, self:_normalize_key(key), value) end function BaseArray:delete(key) assert(nil, "unsupported") end function BaseArray:items(with_index) local pkey = self.c_key() local max = self.max_entries local n = 0 -- TODO return function() local pvalue = self.c_leaf() if n == max then return nil end pkey[0] = n n = n + 1 if libbcc.bpf_lookup_elem(self.map_fd, pkey, pvalue) ~= 0 then return nil end if with_index then return n, pvalue[0] -- return 1-based index else return pvalue[0] end end end local Array = class("Array", BaseArray) function Array:initialize(bpf, map_id, map_fd, key_type, leaf_type) BaseArray.initialize(self, BaseTable.BPF_MAP_TYPE_ARRAY, bpf, map_id, map_fd, key_type, leaf_type) end local PerfEventArray = class("PerfEventArray", BaseArray) function PerfEventArray:initialize(bpf, map_id, map_fd, key_type, leaf_type) BaseArray.initialize(self, BaseTable.BPF_MAP_TYPE_PERF_EVENT_ARRAY, bpf, map_id, map_fd, key_type, leaf_type) self._callbacks = {} end local function _perf_id(id, cpu) return string.format("bcc:perf_event_array:%d:%d", tonumber(id), cpu or 0) end function PerfEventArray:_open_perf_buffer(cpu, callback, ctype, page_cnt, lost_cb) local _cb = ffi.cast("perf_reader_raw_cb", function (cookie, data, size) callback(cpu, ctype(data)[0]) end) local _lost_cb = nil if lost_cb then _lost_cb = ffi.cast("perf_reader_lost_cb", function (cookie, lost) lost_cb(cookie, lost) end) end -- default to 8 pages per buffer local reader = libbcc.bpf_open_perf_buffer(_cb, _lost_cb, nil, -1, cpu, page_cnt or 8) assert(reader, "failed to open perf buffer") local fd = libbcc.perf_reader_fd(reader) self:set(cpu, fd) self.bpf:perf_buffer_store(_perf_id(self.map_id, cpu), reader) self._callbacks[cpu] = _cb end function PerfEventArray:open_perf_buffer(callback, data_type, data_params, page_cnt, lost_cb) assert(data_type, "a data type is needed for callback conversion") local ctype = ffi.typeof(data_type.."*", unpack(data_params or {})) for i = 0, Posix.cpu_count() - 1 do self:_open_perf_buffer(i, callback, ctype, page_cnt, lost_cb) end end local StackTrace = class("StackTrace", BaseTable) StackTrace.static.MAX_STACK = 127 function StackTrace:initialize(bpf, map_id, map_fd, key_type, leaf_type) BaseTable.initialize(self, BaseTable.BPF_MAP_TYPE_STACK_TRACE, bpf, map_id, map_fd, key_type, leaf_type) self._stackp = self.c_leaf() -- FIXME: not threadsafe end function StackTrace:walk(id) local pkey = self.c_key(id) local pstack = self._stackp local i = 0 if libbcc.bpf_lookup_elem(self.map_fd, pkey, pstack) < 0 then return nil end return function() if i >= StackTrace.MAX_STACK then return nil end local addr = pstack[0].ip[i] if addr == 0 then return nil end i = i + 1 return addr end end function StackTrace:get(id, resolver) local stack = {} for addr in self:walk(id) do table.insert(stack, resolver and resolver(addr) or addr) end return stack end local function _decode_table_type(desc) local json = require("bcc.vendor.json") local json_desc = ffi.string(desc) local function _dec(t) if type(t) == "string" then return t end local fields = {} local struct = t[3] or "struct" for _, value in ipairs(t[2]) do local f = nil if #value == 2 then f = string.format("%s %s;", _dec(value[2]), value[1]) elseif #value == 3 then if type(value[3]) == "table" then f = string.format("%s %s[%d];", _dec(value[2]), value[1], value[3][1]) elseif type(value[3]) == "number" then local t = _dec(value[2]) assert(t == "int" or t == "unsigned int", "bitfields can only appear in [unsigned] int types") f = string.format("%s %s:%d;", t, value[1], value[3]) end end assert(f ~= nil, "failed to decode type "..json_desc) table.insert(fields, f) end assert(struct == "struct" or struct == "struct_packed" or struct == "union", "unknown complex type: "..struct) if struct == "union" then return string.format("union { %s }", table.concat(fields, " ")) else return string.format("struct { %s }", table.concat(fields, " ")) end end return _dec(json.parse(json_desc)) end local function NewTable(bpf, name, key_type, leaf_type) local id = libbcc.bpf_table_id(bpf.module, name) local fd = libbcc.bpf_table_fd(bpf.module, name) if fd < 0 then return nil end local t_type = libbcc.bpf_table_type_id(bpf.module, id) local table = nil if t_type == BaseTable.BPF_MAP_TYPE_HASH then table = HashTable elseif t_type == BaseTable.BPF_MAP_TYPE_ARRAY then table = Array elseif t_type == BaseTable.BPF_MAP_TYPE_PERF_EVENT_ARRAY then table = PerfEventArray elseif t_type == BaseTable.BPF_MAP_TYPE_STACK_TRACE then table = StackTrace end assert(table, "unsupported table type %d" % t_type) if key_type == nil then local desc = libbcc.bpf_table_key_desc(bpf.module, name) assert(desc, "Failed to load BPF table description for "..name) key_type = _decode_table_type(desc) end if leaf_type == nil then local desc = libbcc.bpf_table_leaf_desc(bpf.module, name) assert(desc, "Failed to load BPF table description for "..name) leaf_type = _decode_table_type(desc) end log.info("key = %s value = %s", key_type, leaf_type) return table:new(bpf, id, fd, key_type, leaf_type) end return NewTable end) package.preload['bcc.usdt'] = (function (...) --[[ Copyright 2016 GitHub, Inc Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] local ffi = require("ffi") local libbcc = require("bcc.libbcc") local Usdt = class("USDT") Usdt.static.open_contexts = {} function Usdt.static.cleanup() for _, context in ipairs(Usdt.static.open_contexts) do context:_cleanup() end end function Usdt:initialize(args) assert(args.pid or args.path) if args.pid then self.pid = args.pid self.context = libbcc.bcc_usdt_new_frompid(args.pid) elseif args.path then self.path = args.path self.context = libbcc.bcc_usdt_new_frompath(args.path) end assert(self.context ~= nil, "failed to create USDT context") table.insert(Usdt.open_contexts, self) end function Usdt:enable_probe(args) assert(args.probe and args.fn_name) assert(libbcc.bcc_usdt_enable_probe( self.context, args.probe, args.fn_name) == 0) end function Usdt:_cleanup() libbcc.bcc_usdt_close(self.context) self.context = nil end function Usdt:_get_text() local argc = libbcc.bcc_usdt_genargs(self.context) assert(argc ~= nil) return ffi.string(argc) end function Usdt:_attach_uprobes(bpf) local uprobes = {} local cb = ffi.cast("bcc_usdt_uprobe_cb", function(binpath, fn_name, addr, pid) table.insert(uprobes, {name=ffi.string(binpath), addr=addr, fn_name=ffi.string(fn_name), pid=pid}) end) libbcc.bcc_usdt_foreach_uprobe(self.context, cb) cb:free() for _, args in ipairs(uprobes) do bpf:attach_uprobe(args) end end return Usdt end) package.preload['bpf'] = (function (...) --[[ Copyright 2016 Marek Vavrusa Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] return require('bpf.bpf') end) package.preload['bpf.bpf'] = (function (...) --[[ Copyright 2016 Marek Vavrusa Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] -- LuaJIT to BPF bytecode compiler. -- -- The code generation phase is currently one-pass and produces: -- * Compiled code in BPF bytecode format (https://www.kernel.org/doc/Documentation/networking/filter.txt) -- * Variables with liveness analysis and other meta (spill information, compile-time value) -- -- The code generator optimises as much as possible in single pass: -- * Fold compile-time expressions and constant propagation -- * Basic control flow analysis with dead code elimination (based on compile-time expressions) -- * Single-pass optimistic register allocation -- -- The first pass doesn't have variable lifetime visibility yet, so it relies on rewriter for further -- optimisations such as: -- * Dead store elimination (first-pass doesn't know if/when the variable is going to be used) -- * Common sub-expression elimination (relies on DCE and liveness analysis) -- * Orphan JMP elimination (removing this in first pass would break previous JMP targets) -- * Better register allocation (needs to be recomputed after optimisations) local ffi = require('ffi') local bit = require('bit') local S = require('syscall') local bytecode = require('bpf.ljbytecode') local cdef = require('bpf.cdef') local proto = require('bpf.proto') local builtins = require('bpf.builtins') -- Constants local ALWAYS, NEVER = -1, -2 local BPF = ffi.typeof('struct bpf') local HELPER = ffi.typeof('struct bpf_func_id') -- Symbolic table of constant expressions over numbers local const_expr = { ADD = function (a, b) return a + b end, SUB = function (a, b) return a - b end, DIV = function (a, b) return a / b end, MOD = function (a, b) return a % b end, JEQ = function (a, b) return a == b end, JNE = function (a, b) return a ~= b end, JGE = function (a, b) return a >= b end, JGT = function (a, b) return a > b end, } local const_width = { [1] = BPF.B, [2] = BPF.H, [4] = BPF.W, [8] = BPF.DW, } -- Built-ins that are strict only (never compile-time expandable) local builtins_strict = { [ffi.new] = true, [print] = true, } -- Deep copy a table local function table_copy(t) local copy = {} for n,v in pairs(t) do if type(v) == 'table' then v = table_copy(v) end copy[n] = v end return copy end -- Return true if the constant part is a proxy local function is_proxy(x) return type(x) == 'table' and (x.__dissector or x.__map or x.__base) end -- Create compiler closure local function create_emitter(env, stackslots, params, param_types) local V = {} -- Variable tracking / register allocator local code = { -- Generated code pc = 0, bc_pc = 0, insn = ffi.new('struct bpf_insn[4096]'), fixup = {}, reachable = true, seen_cmp = nil, } local Vstate = {} -- Track variable layout at basic block exits -- Anything below this stack offset is free to use by caller -- @note: There is no tracking memory allocator, so the caller may -- lower it for persistent objects, but such memory will never -- be reclaimed and the caller is responsible for resetting stack -- top whenever the memory below is free to be reused local stack_top = (stackslots + 1) * ffi.sizeof('uint64_t') local function emit(op, dst, src, off, imm) local ins = code.insn[code.pc] ins.code = op ins.dst_reg = dst ins.src_reg = src ins.off = off ins.imm = imm code.pc = code.pc + 1 end local function reg_spill(var) local vinfo = V[var] assert(vinfo.reg, 'attempt to spill VAR that doesn\'t have an allocated register') vinfo.spill = (var + 1) * ffi.sizeof('uint64_t') -- Index by (variable number) * (register width) emit(BPF.MEM + BPF.STX + BPF.DW, 10, vinfo.reg, -vinfo.spill, 0) vinfo.reg = nil end local function reg_fill(var, reg) local vinfo = V[var] assert(reg, 'attempt to fill variable to register but not register is allocated') assert(vinfo.spill, 'attempt to fill register with a VAR that isn\'t spilled') emit(BPF.MEM + BPF.LDX + BPF.DW, reg, 10, -vinfo.spill, 0) vinfo.reg = reg vinfo.spill = nil end -- Allocate a register (lazy simple allocator) local function reg_alloc(var, reg) -- Specific register requested, must spill/move existing variable if reg then for k,v in pairs(V) do -- Spill any variable that has this register if v.reg == reg and not v.shadow then reg_spill(k) break end end return reg end -- Find free or least recently used slot local last, last_seen, used = nil, 0xffff, 0 for k,v in pairs(V) do if v.reg then if not v.live_to or v.live_to < last_seen then last, last_seen = k, v.live_to or last_seen end used = bit.bor(used, bit.lshift(1, v.reg)) end end -- Attempt to select a free register from R7-R9 (callee saved) local free = bit.bnot(used) if bit.band(free, 0x80) ~= 0 then reg = 7 elseif bit.band(free,0x100) ~= 0 then reg = 8 elseif bit.band(free,0x200) ~= 0 then reg = 9 end -- Select another variable to be spilled if not reg then assert(last) reg = V[last].reg reg_spill(last) end assert(reg, 'VAR '..var..'fill/spill failed') return reg end -- Set new variable local function vset(var, reg, const, vtype) -- Must materialise all variables shadowing this variable slot, as it will be overwritten if V[var] and V[var].reg then for _, vinfo in pairs(V) do -- Shadowing variable MUST share the same type and attributes, -- but the register assignment may have changed if vinfo.shadow == var then vinfo.reg = V[var].reg vinfo.shadow = nil end end end -- Get precise type for CDATA or attempt to narrow numeric constant if not vtype and type(const) == 'cdata' then vtype = ffi.typeof(const) end V[var] = {reg=reg, const=const, type=vtype} -- Track variable source if V[var].const and type(const) == 'table' then V[var].source = V[var].const.source end end -- Materialize (or register) a variable in a register -- If the register is nil, then the a new register is assigned (if not already assigned) local function vreg(var, reg, reserve, vtype) local vinfo = V[var] assert(vinfo, 'VAR '..var..' not registered') vinfo.live_to = code.pc-1 if (vinfo.reg and not reg) and not vinfo.shadow then return vinfo.reg end reg = reg_alloc(var, reg) -- Materialize variable shadow copy local src = vinfo while src.shadow do src = V[src.shadow] end if reserve then -- luacheck: ignore -- No load to register occurs elseif src.reg then emit(BPF.ALU64 + BPF.MOV + BPF.X, reg, src.reg, 0, 0) elseif src.spill then vinfo.spill = src.spill reg_fill(var, reg) elseif src.const then vtype = vtype or src.type if type(src.const) == 'table' and src.const.__base then -- Load pointer type emit(BPF.ALU64 + BPF.MOV + BPF.X, reg, 10, 0, 0) emit(BPF.ALU64 + BPF.ADD + BPF.K, reg, 0, 0, -src.const.__base) elseif type(src.const) == 'table' and src.const.__dissector then -- Load dissector offset (imm32), but keep the constant part (dissector proxy) emit(BPF.ALU64 + BPF.MOV + BPF.K, reg, 0, 0, src.const.off or 0) elseif vtype and ffi.sizeof(vtype) == 8 then -- IMM64 must be done in two instructions with imm64 = (lo(imm32), hi(imm32)) emit(BPF.LD + BPF.DW, reg, 0, 0, ffi.cast('uint32_t', src.const)) emit(0, 0, 0, 0, ffi.cast('uint32_t', bit.rshift(bit.rshift(src.const, 16), 16))) vinfo.const = nil -- The variable is live else emit(BPF.ALU64 + BPF.MOV + BPF.K, reg, 0, 0, src.const) vinfo.const = nil -- The variable is live end else assert(false, 'VAR '..var..' has neither register nor constant value') end vinfo.reg = reg vinfo.shadow = nil vinfo.live_from = code.pc-1 vinfo.type = vtype or vinfo.type return reg end -- Copy variable local function vcopy(dst, src) if dst == src then return end V[dst] = {reg=V[src].reg, const=V[src].const, shadow=src, source=V[src].source, type=V[src].type} end -- Dereference variable of pointer type local function vderef(dst_reg, src_reg, vinfo) -- Dereference map pointers for primitive types -- BPF doesn't allow pointer arithmetics, so use the entry value assert(type(vinfo.const) == 'table' and vinfo.const.__dissector, 'cannot dereference a non-pointer variable') local vtype = vinfo.const.__dissector local w = ffi.sizeof(vtype) assert(const_width[w], 'NYI: sizeof('..tostring(vtype)..') not 1/2/4/8 bytes') if dst_reg ~= src_reg then emit(BPF.ALU64 + BPF.MOV + BPF.X, dst_reg, src_reg, 0, 0) -- dst = src end -- Optimize the NULL check away if provably not NULL if not vinfo.source or vinfo.source:find('_or_null', 1, true) then emit(BPF.JMP + BPF.JEQ + BPF.K, src_reg, 0, 1, 0) -- if (src != NULL) end emit(BPF.MEM + BPF.LDX + const_width[w], dst_reg, src_reg, 0, 0) -- dst = *src; end -- Allocate a space for variable local function valloc(size, blank) local base = stack_top assert(stack_top + size < 512 * 1024, 'exceeded maximum stack size of 512kB') stack_top = stack_top + size -- Align to 8 byte boundary stack_top = math.ceil(stack_top/8)*8 -- Current kernel version doesn't support ARG_PTR_TO_RAW_STACK -- so we always need to have memory initialized, remove this when supported if blank then if type(blank) == 'string' then local sp = 0 while sp < size do -- TODO: no BPF_ST + BPF_DW instruction yet local as_u32 = ffi.new('uint32_t [1]') local sub = blank:sub(sp+1, sp+ffi.sizeof(as_u32)) ffi.copy(as_u32, sub, #sub) emit(BPF.MEM + BPF.ST + BPF.W, 10, 0, -(stack_top-sp), as_u32[0]) sp = sp + ffi.sizeof(as_u32) end elseif type(blank) == 'boolean' then reg_alloc(stackslots, 0) emit(BPF.ALU64 + BPF.MOV + BPF.K, 0, 0, 0, 0) for sp = base+8,stack_top,8 do emit(BPF.MEM + BPF.STX + BPF.DW, 10, 0, -sp, 0) end else error('NYI: will with unknown type '..type(blank)) end end return stack_top end -- Turn variable into scalar in register (or constant) local function vscalar(a, w) assert(const_width[w], 'sizeof(scalar variable) must be 1/2/4/8') local src_reg -- If source is a pointer, we must dereference it first if cdef.isptr(V[a].type) then src_reg = vreg(a) local tmp_reg = reg_alloc(stackslots, 1) -- Clone variable in tmp register emit(BPF.ALU64 + BPF.MOV + BPF.X, tmp_reg, src_reg, 0, 0) vderef(tmp_reg, tmp_reg, V[a]) src_reg = tmp_reg -- Materialize and dereference it -- Source is a value on stack, we must load it first elseif type(V[a].const) == 'table' and V[a].const.__base > 0 then src_reg = vreg(a) emit(BPF.MEM + BPF.LDX + const_width[w], src_reg, 10, -V[a].const.__base, 0) V[a].type = V[a].const.__dissector V[a].const = nil -- Value is dereferenced -- If source is an imm32 number, avoid register load elseif type(V[a].const) == 'number' and w < 8 then return nil, V[a].const -- Load variable from any other source else src_reg = vreg(a) end return src_reg, nil end -- Emit compensation code at the end of basic block to unify variable set layout on all block exits -- 1. we need to free registers by spilling -- 2. fill registers to match other exits from this BB local function bb_end(Vcomp) for i,v in pairs(V) do if Vcomp[i] and Vcomp[i].spill and not v.spill then -- Materialize constant or shadowing variable to be able to spill if not v.reg and (v.shadow or cdef.isimmconst(v)) then vreg(i) end reg_spill(i) end end for i,v in pairs(V) do if Vcomp[i] and Vcomp[i].reg and not v.reg then vreg(i, Vcomp[i].reg) end -- Compensate variable metadata change if Vcomp[i] and Vcomp[i].source then V[i].source = Vcomp[i].source end end end local function CMP_STR(a, b, op) assert(op == 'JEQ' or op == 'JNE', 'NYI: only equivallence stack/string only supports == or ~=') -- I have no better idea how to implement it than unrolled XOR loop, as we can fixup only one JMP -- So: X(a,b) = a[0] ^ b[0] | a[1] ^ b[1] | ... -- EQ(a,b) <=> X == 0 -- This could be optimised by placing early exits by rewriter in second phase for long strings local base, size = V[a].const.__base, math.min(#b, ffi.sizeof(V[a].type)) local acc, tmp = reg_alloc(stackslots, 0), reg_alloc(stackslots+1, 1) local sp = 0 emit(BPF.ALU64 + BPF.MOV + BPF.K, acc, 0, 0, 0) while sp < size do -- Load string chunk as imm32 local as_u32 = ffi.new('uint32_t [1]') local sub = b:sub(sp+1, sp+ffi.sizeof(as_u32)) ffi.copy(as_u32, sub, #sub) -- TODO: make this faster by interleaved load/compare steps with DW length emit(BPF.MEM + BPF.LDX + BPF.W, tmp, 10, -(base-sp), 0) emit(BPF.ALU64 + BPF.XOR + BPF.K, tmp, 0, 0, as_u32[0]) emit(BPF.ALU64 + BPF.OR + BPF.X, acc, tmp, 0, 0) sp = sp + ffi.sizeof(as_u32) end emit(BPF.JMP + BPF[op] + BPF.K, acc, 0, 0xffff, 0) code.seen_cmp = code.pc-1 end local function CMP_REG(a, b, op) -- Fold compile-time expressions if V[a].const and V[b].const and not (is_proxy(V[a].const) or is_proxy(V[b].const)) then code.seen_cmp = const_expr[op](V[a].const, V[b].const) and ALWAYS or NEVER else -- Comparison against compile-time string or stack memory if V[b].const and type(V[b].const) == 'string' then return CMP_STR(a, V[b].const, op) end -- The 0xFFFF target here has no significance, it's just a placeholder for -- compiler to replace it's absolute offset to LJ bytecode insn with a relative -- offset in BPF program code, verifier will accept only programs with valid JMP targets local a_reg, b_reg = vreg(a), vreg(b) emit(BPF.JMP + BPF[op] + BPF.X, a_reg, b_reg, 0xffff, 0) code.seen_cmp = code.pc-1 end end local function CMP_IMM(a, b, op) local c = V[a].const if c and not is_proxy(c) then -- Fold compile-time expressions code.seen_cmp = const_expr[op](c, b) and ALWAYS or NEVER else -- Convert imm32 to number if type(b) == 'string' then if #b == 1 then b = b:byte() elseif cdef.isptr(V[a].type) then -- String comparison between stack/constant string return CMP_STR(a, b, op) elseif #b <= 4 then -- Convert to u32 with network byte order local imm = ffi.new('uint32_t[1]') ffi.copy(imm, b, #b) b = builtins.hton(imm[0]) else error('NYI: compare register with string, where #string > sizeof(u32)') end end -- The 0xFFFF target here has no significance, it's just a placeholder for -- compiler to replace it's absolute offset to LJ bytecode insn with a relative -- offset in BPF program code, verifier will accept only programs with valid JMP targets local reg = vreg(a) emit(BPF.JMP + BPF[op] + BPF.K, reg, 0, 0xffff, b) code.seen_cmp = code.pc-1 -- Remember NULL pointer checks as BPF prohibits pointer comparisons -- and repeated checks wouldn't pass the verifier, only comparisons -- against constants are checked. if op == 'JEQ' and tonumber(b) == 0 and V[a].source then local pos = V[a].source:find('_or_null', 1, true) if pos then code.seen_null_guard = a end -- Inverse NULL pointer check (if a ~= nil) elseif op == 'JNE' and tonumber(b) == 0 and V[a].source then local pos = V[a].source:find('_or_null', 1, true) if pos then code.seen_null_guard = a code.seen_null_guard_inverse = true end end end end local function ALU_IMM(dst, a, b, op) -- Fold compile-time expressions if V[a].const and not is_proxy(V[a].const) then assert(cdef.isimmconst(V[a]), 'VAR '..a..' must be numeric') vset(dst, nil, const_expr[op](V[a].const, b)) -- Now we need to materialize dissected value at DST, and add it else vcopy(dst, a) local dst_reg = vreg(dst) if cdef.isptr(V[a].type) then vderef(dst_reg, dst_reg, V[a]) V[dst].type = V[a].const.__dissector else V[dst].type = V[a].type end emit(BPF.ALU64 + BPF[op] + BPF.K, dst_reg, 0, 0, b) end end local function ALU_REG(dst, a, b, op) -- Fold compile-time expressions if V[a].const and not (is_proxy(V[a].const) or is_proxy(V[b].const)) then assert(cdef.isimmconst(V[a]), 'VAR '..a..' must be numeric') assert(cdef.isimmconst(V[b]), 'VAR '..b..' must be numeric') if type(op) == 'string' then op = const_expr[op] end vcopy(dst, a) V[dst].const = op(V[a].const, V[b].const) else local src_reg = b and vreg(b) or 0 -- SRC is optional for unary operations if b and cdef.isptr(V[b].type) then -- We have to allocate a temporary register for dereferencing to preserve -- pointer in source variable that MUST NOT be altered reg_alloc(stackslots, 2) vderef(2, src_reg, V[b]) src_reg = 2 end vcopy(dst, a) -- DST may alias B, so copy must occur after we materialize B local dst_reg = vreg(dst) if cdef.isptr(V[a].type) then vderef(dst_reg, dst_reg, V[a]) V[dst].type = V[a].const.__dissector end emit(BPF.ALU64 + BPF[op] + BPF.X, dst_reg, src_reg, 0, 0) V[stackslots].reg = nil -- Free temporary registers end end local function ALU_IMM_NV(dst, a, b, op) -- Do DST = IMM(a) op VAR(b) where we can't invert because -- the registers are u64 but immediates are u32, so complement -- arithmetics wouldn't work vset(stackslots+1, nil, a) ALU_REG(dst, stackslots+1, b, op) end local function LD_ABS(dst, w, off) assert(off, 'LD_ABS called without offset') if w < 8 then local dst_reg = vreg(dst, 0, true, builtins.width_type(w)) -- Reserve R0 emit(BPF.LD + BPF.ABS + const_width[w], dst_reg, 0, 0, off) if w > 1 and ffi.abi('le') then -- LD_ABS has htonl() semantics, reverse emit(BPF.ALU + BPF.END + BPF.TO_BE, dst_reg, 0, 0, w * 8) end elseif w == 8 then -- LD_ABS|IND prohibits DW, we need to do two W loads and combine them local tmp_reg = vreg(stackslots, 0, true, builtins.width_type(w)) -- Reserve R0 emit(BPF.LD + BPF.ABS + const_width[4], tmp_reg, 0, 0, off + 4) if ffi.abi('le') then -- LD_ABS has htonl() semantics, reverse emit(BPF.ALU + BPF.END + BPF.TO_BE, tmp_reg, 0, 0, 32) end ALU_IMM(stackslots, stackslots, 32, 'LSH') local dst_reg = vreg(dst, 0, true, builtins.width_type(w)) -- Reserve R0, spill tmp variable emit(BPF.LD + BPF.ABS + const_width[4], dst_reg, 0, 0, off) if ffi.abi('le') then -- LD_ABS has htonl() semantics, reverse emit(BPF.ALU + BPF.END + BPF.TO_BE, dst_reg, 0, 0, 32) end ALU_REG(dst, dst, stackslots, 'OR') V[stackslots].reg = nil -- Free temporary registers else assert(w < 8, 'NYI: only LD_ABS of 1/2/4/8 is supported') end end local function LD_IND(dst, src, w, off) local src_reg = vreg(src) -- Must materialize first in case dst == src local dst_reg = vreg(dst, 0, true, builtins.width_type(w)) -- Reserve R0 emit(BPF.LD + BPF.IND + const_width[w], dst_reg, src_reg, 0, off or 0) if w > 1 and ffi.abi('le') then -- LD_ABS has htonl() semantics, reverse emit(BPF.ALU + BPF.END + BPF.TO_BE, dst_reg, 0, 0, w * 8) end end local function LD_MEM(dst, src, w, off) local src_reg = vreg(src) -- Must materialize first in case dst == src local dst_reg = vreg(dst, nil, true, builtins.width_type(w)) -- Reserve R0 emit(BPF.MEM + BPF.LDX + const_width[w], dst_reg, src_reg, off or 0, 0) end -- @note: This is specific now as it expects registers reserved local function LD_IMM_X(dst_reg, src_type, imm, w) if w == 8 then -- IMM64 must be done in two instructions with imm64 = (lo(imm32), hi(imm32)) emit(BPF.LD + const_width[w], dst_reg, src_type, 0, ffi.cast('uint32_t', imm)) -- Must shift in two steps as bit.lshift supports [0..31] emit(0, 0, 0, 0, ffi.cast('uint32_t', bit.lshift(bit.lshift(imm, 16), 16))) else emit(BPF.LD + const_width[w], dst_reg, src_type, 0, imm) end end local function BUILTIN(func, ...) local builtin_export = { -- Compiler primitives (work with variable slots, emit instructions) V=V, vreg=vreg, vset=vset, vcopy=vcopy, vderef=vderef, valloc=valloc, emit=emit, reg_alloc=reg_alloc, reg_spill=reg_spill, tmpvar=stackslots, const_width=const_width, -- Extensions and helpers (use with care) LD_IMM_X = LD_IMM_X, } func(builtin_export, ...) end local function LOAD(dst, src, off, vtype) local base = V[src].const assert(base and base.__dissector, 'NYI: load() on variable that doesn\'t have dissector') assert(V[src].source, 'NYI: load() on variable with unknown source') -- Cast to different type if requested vtype = vtype or base.__dissector local w = ffi.sizeof(vtype) assert(const_width[w], 'NYI: load() supports 1/2/4/8 bytes at a time only, wanted ' .. tostring(w)) -- Packet access with a dissector (use BPF_LD) if V[src].source:find('ptr_to_pkt', 1, true) then if base.off then -- Absolute address to payload LD_ABS(dst, w, off + base.off) else -- Indirect address to payload LD_IND(dst, src, w, off) end -- Direct access to first argument (skb fields, pt regs, ...) elseif V[src].source:find('ptr_to_ctx', 1, true) then LD_MEM(dst, src, w, off) -- Direct skb access with a dissector (use BPF_MEM) elseif V[src].source:find('ptr_to_skb', 1, true) then LD_MEM(dst, src, w, off) -- Pointer to map-backed memory (use BPF_MEM) elseif V[src].source:find('ptr_to_map_value', 1, true) then LD_MEM(dst, src, w, off) -- Indirect read using probe (uprobe or kprobe, uses helper) elseif V[src].source:find('ptr_to_probe', 1, true) then BUILTIN(builtins[builtins.probe_read], nil, dst, src, vtype, off) V[dst].source = V[src].source -- Builtin handles everything else error('NYI: load() on variable from ' .. V[src].source) end V[dst].type = vtype V[dst].const = nil -- Dissected value is not constant anymore end local function CALL(a, b, d) assert(b-1 <= 1, 'NYI: CALL with >1 return values') -- Perform either compile-time, helper, or builtin local func = V[a].const -- Gather all arguments and check if they're constant local args, const, nargs = {}, true, d - 1 for i = a+1, a+d-1 do table.insert(args, V[i].const) if not V[i].const or is_proxy(V[i].const) then const = false end end local builtin = builtins[func] if not const or nargs == 0 then if builtin and type(builtin) == 'function' then args = {a} for i = a+1, a+nargs do table.insert(args, i) end BUILTIN(builtin, unpack(args)) elseif V[a+2] and V[a+2].const then -- var OP imm ALU_IMM(a, a+1, V[a+2].const, builtin) elseif nargs <= 2 then -- var OP var ALU_REG(a, a+1, V[a+2] and a+2, builtin) else error('NYI: CALL non-builtin with 3 or more arguments') end -- Call on dissector implies slice retrieval elseif type(func) == 'table' and func.__dissector then assert(nargs >= 2, 'NYI: .slice(a, b) must have at least two arguments') assert(V[a+1].const and V[a+2].const, 'NYI: slice() arguments must be constant') local off = V[a+1].const local vtype = builtins.width_type(V[a+2].const - off) -- Access to packet via packet (use BPF_LD) if V[a].source and V[a].source:find('ptr_to_', 1, true) then LOAD(a, a, off, vtype) else error('NYI: .slice(a, b) on non-pointer memory ' .. (V[a].source or 'unknown')) end -- Strict builtins cannot be expanded on compile-time elseif builtins_strict[func] and builtin then args = {a} for i = a+1, a+nargs do table.insert(args, i) end BUILTIN(builtin, unpack(args)) -- Attempt compile-time call expansion (expects all argument compile-time known) else assert(const, 'NYI: CALL attempted on constant arguments, but at least one argument is not constant') V[a].const = func(unpack(args)) end end local function MAP_INIT(map_var, key, imm) local map = V[map_var].const vreg(map_var, 1, true, ffi.typeof('uint64_t')) -- Reserve R1 and load ptr for process-local map fd LD_IMM_X(1, BPF.PSEUDO_MAP_FD, map.fd, ffi.sizeof(V[map_var].type)) V[map_var].reg = nil -- R1 will be invalidated after CALL, forget register allocation -- Reserve R2 and load R2 = key pointer local key_size = ffi.sizeof(map.key_type) local w = const_width[key_size] or BPF.DW local pod_type = const_width[key_size] local sp = stack_top + key_size -- Must use stack below spill slots -- Store immediate value on stack reg_alloc(stackslots, 2) -- Spill anything in R2 (unnamed tmp variable) local key_base = key and V[key].const imm = imm or key_base if imm and (not key or not is_proxy(key_base)) then assert(pod_type, 'NYI: map[const K], K width must be 1/2/4/8') emit(BPF.MEM + BPF.ST + w, 10, 0, -sp, imm) -- Key is in register, spill it elseif V[key].reg and pod_type then if cdef.isptr(V[key].type) then -- There is already pointer in register, dereference before spilling emit(BPF.MEM + BPF.LDX + w, 2, V[key].reg, 0, 0) emit(BPF.MEM + BPF.STX + w, 10, 2, -sp, 0) else -- Variable in register is POD, spill it on the stack emit(BPF.MEM + BPF.STX + w, 10, V[key].reg, -sp, 0) end -- Key is spilled from register to stack elseif V[key].spill then sp = V[key].spill -- Key is already on stack, write to base-relative address elseif key_base.__base then assert(key_size == ffi.sizeof(V[key].type), 'VAR '..key..' type incompatible with BPF map key type') sp = key_base.__base else error('VAR '..key..' is neither const-expr/register/stack/spilled') end -- If [FP+K] addressing, emit it if sp then emit(BPF.ALU64 + BPF.MOV + BPF.X, 2, 10, 0, 0) emit(BPF.ALU64 + BPF.ADD + BPF.K, 2, 0, 0, -sp) end end local function MAP_GET(dst, map_var, key, imm) local map = V[map_var].const MAP_INIT(map_var, key, imm) -- Flag as pointer type and associate dissector for map value type vreg(dst, 0, true, ffi.typeof('uint8_t *')) V[dst].const = {__dissector=map.val_type} V[dst].source = 'ptr_to_map_value_or_null' emit(BPF.JMP + BPF.CALL, 0, 0, 0, HELPER.map_lookup_elem) V[stackslots].reg = nil -- Free temporary registers end local function MAP_DEL(map_var, key, key_imm) -- Set R0, R1 (map fd, preempt R0) reg_alloc(stackslots, 0) -- Spill anything in R0 (unnamed tmp variable) MAP_INIT(map_var, key, key_imm) emit(BPF.JMP + BPF.CALL, 0, 0, 0, HELPER.map_delete_elem) V[stackslots].reg = nil -- Free temporary registers end local function MAP_SET(map_var, key, key_imm, src) local map = V[map_var].const -- Delete when setting nil if V[src].type == ffi.typeof('void') then return MAP_DEL(map_var, key, key_imm) end -- Set R0, R1 (map fd, preempt R0) reg_alloc(stackslots, 0) -- Spill anything in R0 (unnamed tmp variable) MAP_INIT(map_var, key, key_imm) reg_alloc(stackslots, 4) -- Spill anything in R4 (unnamed tmp variable) emit(BPF.ALU64 + BPF.MOV + BPF.K, 4, 0, 0, 0) -- BPF_ANY, create new element or update existing -- Reserve R3 for value pointer reg_alloc(stackslots, 3) -- Spill anything in R3 (unnamed tmp variable) local val_size = ffi.sizeof(map.val_type) local w = const_width[val_size] or BPF.DW local pod_type = const_width[val_size] -- Stack pointer must be aligned to both key/value size and have enough headroom for (key, value) local sp = stack_top + ffi.sizeof(map.key_type) + val_size sp = sp + (sp % val_size) local base = V[src].const if base and not is_proxy(base) then assert(pod_type, 'NYI: MAP[K] = imm V; V width must be 1/2/4/8') emit(BPF.MEM + BPF.ST + w, 10, 0, -sp, base) -- Value is in register, spill it elseif V[src].reg and pod_type then -- Value is a pointer, derefernce it and spill it if cdef.isptr(V[src].type) then vderef(3, V[src].reg, V[src]) emit(BPF.MEM + BPF.STX + w, 10, 3, -sp, 0) else emit(BPF.MEM + BPF.STX + w, 10, V[src].reg, -sp, 0) end -- We get a pointer to spilled register on stack elseif V[src].spill then -- If variable is a pointer, we can load it to R3 directly (save "LEA") if cdef.isptr(V[src].type) then reg_fill(src, 3) -- If variable is a stack pointer, we don't have to check it if base.__base then emit(BPF.JMP + BPF.CALL, 0, 0, 0, HELPER.map_update_elem) return end vderef(3, V[src].reg, V[src]) emit(BPF.MEM + BPF.STX + w, 10, 3, -sp, 0) else sp = V[src].spill end -- Value is already on stack, write to base-relative address elseif base.__base then if val_size ~= ffi.sizeof(V[src].type) then local err = string.format('VAR %d type (%s) incompatible with BPF map value type (%s): expected %d, got %d', src, V[src].type, map.val_type, val_size, ffi.sizeof(V[src].type)) error(err) end sp = base.__base -- Value is constant, materialize it on stack else error('VAR '.. src ..' is neither const-expr/register/stack/spilled') end emit(BPF.ALU64 + BPF.MOV + BPF.X, 3, 10, 0, 0) emit(BPF.ALU64 + BPF.ADD + BPF.K, 3, 0, 0, -sp) emit(BPF.JMP + BPF.CALL, 0, 0, 0, HELPER.map_update_elem) V[stackslots].reg = nil -- Free temporary registers end -- Finally - this table translates LuaJIT bytecode into code emitter actions. local BC = { -- Constants KNUM = function(a, _, c, _) -- KNUM if c < 2147483648 then vset(a, nil, c, ffi.typeof('int32_t')) else vset(a, nil, c, ffi.typeof('uint64_t')) end end, KSHORT = function(a, _, _, d) -- KSHORT vset(a, nil, d, ffi.typeof('int16_t')) end, KCDATA = function(a, _, c, _) -- KCDATA -- Coerce numeric types if possible local ct = ffi.typeof(c) if ffi.istype(ct, ffi.typeof('uint64_t')) or ffi.istype(ct, ffi.typeof('int64_t')) then vset(a, nil, c, ct) elseif tonumber(c) ~= nil then -- TODO: this should not be possible vset(a, nil, tonumber(c), ct) else error('NYI: cannot use CDATA constant of type ' .. ct) end end, KPRI = function(a, _, _, d) -- KPRI -- KNIL is 0, must create a special type to identify it local vtype = (d < 1) and ffi.typeof('void') or ffi.typeof('uint8_t') vset(a, nil, (d < 2) and 0 or 1, vtype) end, KSTR = function(a, _, c, _) -- KSTR vset(a, nil, c, ffi.typeof('const char[?]')) end, MOV = function(a, _, _, d) -- MOV var, var vcopy(a, d) end, -- Comparison ops -- Note: comparisons are always followed by JMP opcode, that -- will fuse following JMP to JMP+CMP instruction in BPF -- Note: we're narrowed to integers, so operand/operator inversion is legit ISLT = function(a, _, _, d) return CMP_REG(d, a, 'JGE') end, -- (a < d) (inverted) ISGE = function(a, _, _, d) return CMP_REG(a, d, 'JGE') end, -- (a >= d) ISGT = function(a, _, _, d) return CMP_REG(a, d, 'JGT') end, -- (a > d) ISEQV = function(a, _, _, d) return CMP_REG(a, d, 'JEQ') end, -- (a == d) ISNEV = function(a, _, _, d) return CMP_REG(a, d, 'JNE') end, -- (a ~= d) ISEQS = function(a, _, c, _) return CMP_IMM(a, c, 'JEQ') end, -- (a == str(c)) ISNES = function(a, _, c, _) return CMP_IMM(a, c, 'JNE') end, -- (a ~= str(c)) ISEQN = function(a, _, c, _) return CMP_IMM(a, c, 'JEQ') end, -- (a == c) ISNEN = function(a, _, c, _) return CMP_IMM(a, c, 'JNE') end, -- (a ~= c) IST = function(_, _, _, d) return CMP_IMM(d, 0, 'JNE') end, -- (d) ISF = function(_, _, _, d) return CMP_IMM(d, 0, 'JEQ') end, -- (not d) ISEQP = function(a, _, c, _) return CMP_IMM(a, c, 'JEQ') end, -- ISEQP (a == c) -- Binary operations with RHS constants ADDVN = function(a, b, c, _) return ALU_IMM(a, b, c, 'ADD') end, SUBVN = function(a, b, c, _) return ALU_IMM(a, b, c, 'SUB') end, MULVN = function(a, b, c, _) return ALU_IMM(a, b, c, 'MUL') end, DIVVN = function(a, b, c, _) return ALU_IMM(a, b, c, 'DIV') end, MODVN = function(a, b, c, _) return ALU_IMM(a, b, c, 'MOD') end, -- Binary operations with LHS constants -- Cheat code: we're narrowed to integer arithmetic, so MUL+ADD are commutative ADDNV = function(a, b, c, _) return ALU_IMM(a, b, c, 'ADD') end, -- ADDNV MULNV = function(a, b, c, _) return ALU_IMM(a, b, c, 'MUL') end, -- MULNV SUBNV = function(a, b, c, _) return ALU_IMM_NV(a, c, b, 'SUB') end, -- SUBNV DIVNV = function(a, b, c, _) return ALU_IMM_NV(a, c, b, 'DIV') end, -- DIVNV -- Binary operations between registers ADDVV = function(a, b, _, d) return ALU_REG(a, b, d, 'ADD') end, SUBVV = function(a, b, _, d) return ALU_REG(a, b, d, 'SUB') end, MULVV = function(a, b, _, d) return ALU_REG(a, b, d, 'MUL') end, DIVVV = function(a, b, _, d) return ALU_REG(a, b, d, 'DIV') end, MODVV = function(a, b, _, d) return ALU_REG(a, b, d, 'MOD') end, -- Strings CAT = function(a, b, _, d) -- CAT A = B ~ D assert(V[b].const and V[d].const, 'NYI: CAT only works on compile-time expressions') assert(type(V[b].const) == 'string' and type(V[d].const) == 'string', 'NYI: CAT only works on compile-time strings') vset(a, nil, V[b].const .. V[d].const) end, -- Tables GGET = function (a, _, c, _) -- GGET (A = GLOBAL[c]) if env[c] ~= nil then vset(a, nil, env[c]) else error(string.format("undefined global '%s'", c)) end end, UGET = function (a, _, c, _) -- UGET (A = UPVALUE[c]) if env[c] ~= nil then vset(a, nil, env[c]) else error(string.format("undefined upvalue '%s'", c)) end end, TSETB = function (a, b, _, d) -- TSETB (B[D] = A) assert(V[b] and type(V[b].const) == 'table', 'NYI: B[D] where B is not Lua table, BPF map, or pointer') local vinfo = V[b].const if vinfo.__map then -- BPF map read (constant) return MAP_SET(b, nil, d, a) -- D is literal elseif vinfo.__dissector then assert(vinfo.__dissector, 'NYI: B[D] where B does not have a known element size') local w = ffi.sizeof(vinfo.__dissector) -- TODO: support vectorized moves larger than register width assert(const_width[w], 'B[C] = A, sizeof(A) must be 1/2/4/8') local src_reg, const = vscalar(a, w) -- If changing map value, write to absolute address + offset if V[b].source and V[b].source:find('ptr_to_map_value', 1, true) then local dst_reg = vreg(b) -- Optimization: immediate values (imm32) can be stored directly if type(const) == 'number' then emit(BPF.MEM + BPF.ST + const_width[w], dst_reg, 0, d, const) else emit(BPF.MEM + BPF.STX + const_width[w], dst_reg, src_reg, d, 0) end -- Table is already on stack, write to vinfo-relative address elseif vinfo.__base then -- Optimization: immediate values (imm32) can be stored directly if type(const) == 'number' then emit(BPF.MEM + BPF.ST + const_width[w], 10, 0, -vinfo.__base + (d * w), const) else emit(BPF.MEM + BPF.STX + const_width[w], 10, src_reg, -vinfo.__base + (d * w), 0) end else error('NYI: B[D] where B is not Lua table, BPF map, or pointer') end elseif vinfo and vinfo and V[a].const then vinfo[V[d].const] = V[a].const else error('NYI: B[D] where B is not Lua table, BPF map, or pointer') end end, TSETV = function (a, b, _, d) -- TSETV (B[D] = A) assert(V[b] and type(V[b].const) == 'table', 'NYI: B[D] where B is not Lua table, BPF map, or pointer') local vinfo = V[b].const if vinfo.__map then -- BPF map read (constant) return MAP_SET(b, d, nil, a) -- D is variable elseif vinfo.__dissector then assert(vinfo.__dissector, 'NYI: B[D] where B does not have a known element size') local w = ffi.sizeof(vinfo.__dissector) -- TODO: support vectorized moves larger than register width assert(const_width[w], 'B[C] = A, sizeof(A) must be 1/2/4/8') local src_reg, const = vscalar(a, w) -- If changing map value, write to absolute address + offset if V[b].source and V[b].source:find('ptr_to_map_value', 1, true) then -- Calculate variable address from two registers local tmp_var = stackslots + 1 vset(tmp_var, nil, d) ALU_REG(tmp_var, tmp_var, b, 'ADD') local dst_reg = vreg(tmp_var) V[tmp_var].reg = nil -- Only temporary allocation -- Optimization: immediate values (imm32) can be stored directly if type(const) == 'number' and w < 8 then emit(BPF.MEM + BPF.ST + const_width[w], dst_reg, 0, 0, const) else emit(BPF.MEM + BPF.STX + const_width[w], dst_reg, src_reg, 0, 0) end -- Table is already on stack, write to vinfo-relative address elseif vinfo.__base then -- Calculate variable address from two registers local tmp_var = stackslots + 1 vcopy(tmp_var, d) -- Element position if w > 1 then ALU_IMM(tmp_var, tmp_var, w, 'MUL') -- multiply by element size end local dst_reg = vreg(tmp_var) -- add R10 (stack pointer) emit(BPF.ALU64 + BPF.ADD + BPF.X, dst_reg, 10, 0, 0) V[tmp_var].reg = nil -- Only temporary allocation -- Optimization: immediate values (imm32) can be stored directly if type(const) == 'number' and w < 8 then emit(BPF.MEM + BPF.ST + const_width[w], dst_reg, 0, -vinfo.__base, const) else emit(BPF.MEM + BPF.STX + const_width[w], dst_reg, src_reg, -vinfo.__base, 0) end else error('NYI: B[D] where B is not Lua table, BPF map, or pointer') end elseif vinfo and V[d].const and V[a].const then vinfo[V[d].const] = V[a].const else error('NYI: B[D] where B is not Lua table, BPF map, or pointer') end end, TSETS = function (a, b, c, _) -- TSETS (B[C] = A) assert(V[b] and V[b].const, 'NYI: B[D] where B is not Lua table, BPF map, or pointer') local base = V[b].const if base.__dissector then local ofs,bpos = ffi.offsetof(base.__dissector, c) assert(not bpos, 'NYI: B[C] = A, where C is a bitfield') local w = builtins.sizeofattr(base.__dissector, c) -- TODO: support vectorized moves larger than register width assert(const_width[w], 'B[C] = A, sizeof(A) must be 1/2/4/8') local src_reg, const = vscalar(a, w) -- If changing map value, write to absolute address + offset if V[b].source and V[b].source:find('ptr_to_map_value', 1, true) then local dst_reg = vreg(b) -- Optimization: immediate values (imm32) can be stored directly if type(const) == 'number' and w < 8 then emit(BPF.MEM + BPF.ST + const_width[w], dst_reg, 0, ofs, const) else emit(BPF.MEM + BPF.STX + const_width[w], dst_reg, src_reg, ofs, 0) end -- Table is already on stack, write to base-relative address elseif base.__base then -- Optimization: immediate values (imm32) can be stored directly if type(const) == 'number' and w < 8 then emit(BPF.MEM + BPF.ST + const_width[w], 10, 0, -base.__base + ofs, const) else emit(BPF.MEM + BPF.STX + const_width[w], 10, src_reg, -base.__base + ofs, 0) end else error('NYI: B[C] where B is not Lua table, BPF map, or pointer') end elseif V[a].const then base[c] = V[a].const else error('NYI: B[C] where B is not Lua table, BPF map, or pointer') end end, TGETB = function (a, b, _, d) -- TGETB (A = B[D]) local base = V[b].const assert(type(base) == 'table', 'NYI: B[C] where C is string and B not Lua table or BPF map') if a ~= b then vset(a) end if base.__map then -- BPF map read (constant) MAP_GET(a, b, nil, d) -- Pointer access with a dissector (traditional uses BPF_LD, direct uses BPF_MEM) elseif V[b].source and V[b].source:find('ptr_to_') then local vtype = base.__dissector and base.__dissector or ffi.typeof('uint8_t') LOAD(a, b, d, vtype) -- Specialise PTR[0] as dereference operator elseif cdef.isptr(V[b].type) and d == 0 then vcopy(a, b) local dst_reg = vreg(a) vderef(dst_reg, dst_reg, V[a]) V[a].type = V[a].const.__dissector else error('NYI: A = B[D], where B is not Lua table or packet dissector or pointer dereference') end end, TGETV = function (a, b, _, d) -- TGETV (A = B[D]) local base = V[b].const assert(type(base) == 'table', 'NYI: B[C] where C is string and B not Lua table or BPF map') if a ~= b then vset(a) end if base.__map then -- BPF map read MAP_GET(a, b, d) -- Pointer access with a dissector (traditional uses BPF_LD, direct uses BPF_MEM) elseif V[b].source and V[b].source:find('ptr_to_') then local vtype = base.__dissector and base.__dissector or ffi.typeof('uint8_t') LOAD(a, b, d, vtype) -- Constant dereference elseif type(V[d].const) == 'number' then V[a].const = base[V[d].const] else error('NYI: A = B[D], where B is not Lua table or packet dissector or pointer dereference') end end, TGETS = function (a, b, c, _) -- TGETS (A = B[C]) local base = V[b].const assert(type(base) == 'table', 'NYI: B[C] where C is string and B not Lua table or BPF map') if a ~= b then vset(a) end if base.__dissector then local ofs,bpos,bsize = ffi.offsetof(base.__dissector, c) -- Resolve table key using metatable if not ofs and type(base.__dissector[c]) == 'string' then c = base.__dissector[c] ofs,bpos,bsize = ffi.offsetof(base.__dissector, c) end if not ofs and proto[c] then -- Load new dissector on given offset BUILTIN(proto[c], a, b, c) else -- Loading register from offset is a little bit tricky as there are -- several data sources and value loading modes with different restrictions -- such as checking pointer values for NULL compared to using stack. assert(ofs, tostring(base.__dissector)..'.'..c..' attribute not exists') if a ~= b then vset(a) end -- Dissected value is probably not constant anymore local new_const = nil local w, atype = builtins.sizeofattr(base.__dissector, c) -- [SP+K] addressing using R10 (stack pointer) -- Doesn't need to be checked for NULL if base.__base and base.__base > 0 then if cdef.isptr(atype) then -- If the member is pointer type, update base pointer with offset new_const = {__base = base.__base-ofs} else local dst_reg = vreg(a, nil, true) emit(BPF.MEM + BPF.LDX + const_width[w], dst_reg, 10, -base.__base+ofs, 0) end -- Pointer access with a dissector (traditional uses BPF_LD, direct uses BPF_MEM) elseif V[b].source and V[b].source:find('ptr_to_') then LOAD(a, b, ofs, atype) else error('NYI: B[C] where B is not Lua table, BPF map, or pointer') end -- Bitfield, must be further narrowed with a bitmask/shift if bpos then local mask = 0 for i=bpos+1,bpos+bsize do mask = bit.bor(mask, bit.lshift(1, w*8-i)) end emit(BPF.ALU64 + BPF.AND + BPF.K, vreg(a), 0, 0, mask) -- Free optimization: single-bit values need just boolean result if bsize > 1 then local shift = w*8-bsize-bpos if shift > 0 then emit(BPF.ALU64 + BPF.RSH + BPF.K, vreg(a), 0, 0, shift) end end end V[a].type = atype V[a].const = new_const V[a].source = V[b].source -- Track direct access to skb data -- see https://www.kernel.org/doc/Documentation/networking/filter.txt "Direct packet access" if ffi.istype(base.__dissector, ffi.typeof('struct sk_buff')) then -- Direct access to skb uses skb->data and skb->data_end -- which are encoded as u32, but are actually pointers if c == 'data' or c == 'data_end' then V[a].const = {__dissector = ffi.typeof('uint8_t')} V[a].source = 'ptr_to_skb' end end end else V[a].const = base[c] end end, -- Loops and branches CALLM = function (a, b, _, d) -- A = A(A+1, ..., A+D+MULTRES) -- NYI: Support single result only CALL(a, b, d+2) end, CALL = function (a, b, _, d) -- A = A(A+1, ..., A+D-1) CALL(a, b, d) end, JMP = function (a, _, c, _) -- JMP -- Discard unused slots after jump for i, _ in pairs(V) do if i >= a and i < stackslots then V[i] = nil end end -- Cross basic block boundary if the jump target isn't provably unreachable local val = code.fixup[c] or {} if code.seen_cmp and code.seen_cmp ~= ALWAYS then if code.seen_cmp ~= NEVER then -- Do not emit the jump or fixup -- Store previous CMP insn for reemitting after compensation code local jmpi = ffi.new('struct bpf_insn', code.insn[code.pc-1]) code.pc = code.pc - 1 -- First branch point, emit compensation code local Vcomp = Vstate[c] if not Vcomp then -- Select scratch register (R0-5) that isn't used as operand -- in the CMP instruction, as the variable may not be live, after -- the JMP, but it may be used in the JMP+CMP instruction itself local tmp_reg = 0 for reg = 0, 5 do if reg ~= jmpi.dst_reg and reg ~= jmpi.src_reg then tmp_reg = reg break end end -- Force materialization of constants at the end of BB for i, v in pairs(V) do if not v.reg and cdef.isimmconst(v) then vreg(i, tmp_reg) -- Load to TMP register (not saved) reg_spill(i) -- Spill caller-saved registers end end -- Record variable state Vstate[c] = V Vcomp = V V = table_copy(V) -- Variable state already set, emit specific compensation code else bb_end(Vcomp) end -- Record pointer NULL check from condition -- If the condition checks pointer variable against NULL, -- we can assume it will not be NULL in the fall-through block if code.seen_null_guard then local var = code.seen_null_guard -- The null guard can have two forms: -- if x == nil then goto -- if x ~= nil then goto -- First form guarantees that the variable will be non-nil on the following instruction -- Second form guarantees that the variable will be non-nil at the jump target local vinfo = code.seen_null_guard_inverse and Vcomp[var] or V[var] if vinfo.source then local pos = vinfo.source:find('_or_null', 1, true) if pos then vinfo.source = vinfo.source:sub(1, pos - 1) end end end -- Reemit CMP insn emit(jmpi.code, jmpi.dst_reg, jmpi.src_reg, jmpi.off, jmpi.imm) -- Fuse JMP into previous CMP opcode, mark JMP target for fixup -- as we don't knot the relative offset in generated code yet table.insert(val, code.pc-1) code.fixup[c] = val end code.seen_cmp = nil code.seen_null_guard = nil code.seen_null_guard_inverse = nil elseif c == code.bc_pc + 1 then -- luacheck: ignore 542 -- Eliminate jumps to next immediate instruction -- e.g. 0002 JMP 1 => 0003 else -- We need to synthesise a condition that's always true, however -- BPF prohibits pointer arithmetic to prevent pointer leaks -- so we have to clear out one register and use it for cmp that's always true local dst_reg = reg_alloc(stackslots) V[stackslots].reg = nil -- Only temporary allocation -- First branch point, emit compensation code local Vcomp = Vstate[c] if not Vcomp then -- Force materialization of constants at the end of BB for i, v in pairs(V) do if not v.reg and cdef.isimmconst(v) then vreg(i, dst_reg) -- Load to TMP register (not saved) reg_spill(i) -- Spill caller-saved registers end end -- Record variable state Vstate[c] = V V = table_copy(V) -- Variable state already set, emit specific compensation code else bb_end(Vcomp) end emit(BPF.ALU64 + BPF.MOV + BPF.K, dst_reg, 0, 0, 0) emit(BPF.JMP + BPF.JEQ + BPF.K, dst_reg, 0, 0xffff, 0) table.insert(val, code.pc-1) -- Fixup JMP target code.reachable = false -- Code following the JMP is not reachable code.fixup[c] = val end end, RET1 = function (a, _, _, _) -- RET1 -- Free optimisation: spilled variable will not be filled again for i, v in pairs(V) do if i ~= a then v.reg = nil end end if V[a].reg ~= 0 then vreg(a, 0) end -- Convenience: dereference pointer variables -- e.g. 'return map[k]' will return actual map value, not pointer if cdef.isptr(V[a].type) then vderef(0, 0, V[a]) end emit(BPF.JMP + BPF.EXIT, 0, 0, 0, 0) code.reachable = false end, RET0 = function (_, _, _, _) -- RET0 emit(BPF.ALU64 + BPF.MOV + BPF.K, 0, 0, 0, 0) emit(BPF.JMP + BPF.EXIT, 0, 0, 0, 0) code.reachable = false end, compile = function () return code end } -- Composite instructions function BC.CALLT(a, _, _, d) -- Tailcall: return A(A+1, ..., A+D-1) CALL(a, 1, d) BC.RET1(a) end -- Always initialize R6 with R1 context emit(BPF.ALU64 + BPF.MOV + BPF.X, 6, 1, 0, 0) -- Register R6 as context variable (first argument) if params and params > 0 then vset(0, 6, param_types[1] or proto.skb) assert(V[0].source == V[0].const.source) -- Propagate source annotation from typeinfo end -- Register tmpvars vset(stackslots) vset(stackslots+1) return setmetatable(BC, { __index = function (_, k, _) if type(k) == 'number' then local op_str = string.sub(require('jit.vmdef').bcnames, 6*k+1, 6*k+6) error(string.format("NYI: opcode '0x%02x' (%-04s)", k, op_str)) end end, __call = function (t, op, a, b, c, d) code.bc_pc = code.bc_pc + 1 -- Exitting BB straight through, emit compensation code if Vstate[code.bc_pc] then if code.reachable then -- Instruction is reachable from previous line -- so we must make the variable allocation consistent -- with the variable allocation at the jump source -- e.g. 0001 x:R0 = 5 -- 0002 if rand() then goto 0005 -- 0003 x:R0 -> x:stack -- 0004 y:R0 = 5 -- 0005 x:? = 10 <-- x was in R0 before jump, and stack after jump bb_end(Vstate[code.bc_pc]) else -- Instruction isn't reachable from previous line, restore variable layout -- e.g. RET or condition-less JMP on previous line V = table_copy(Vstate[code.bc_pc]) end end -- Perform fixup of jump targets -- We need to do this because the number of consumed and emitted -- bytecode instructions is different local fixup = code.fixup[code.bc_pc] if fixup ~= nil then -- Patch JMP source insn with relative offset for _,pc in ipairs(fixup) do code.insn[pc].off = code.pc - 1 - pc end code.fixup[code.bc_pc] = nil code.reachable = true end -- Execute if code.reachable then assert(t[op], string.format('NYI: instruction %s, parameters: %s,%s,%s,%s', op,a,b,c,d)) return t[op](a, b, c, d) end end, }) end -- Emitted code dump local function dump_mem(cls, ins, _, fuse) -- This is a very dense MEM instruction decoder without much explanation -- Refer to https://www.kernel.org/doc/Documentation/networking/filter.txt for instruction format local mode = bit.band(ins.code, 0xe0) if mode == BPF.XADD then cls = 5 end -- The only mode local op_1 = {'LD', 'LDX', 'ST', 'STX', '', 'XADD'} local op_2 = {[0]='W', [8]='H', [16]='B', [24]='DW'} local name = op_1[cls+1] .. op_2[bit.band(ins.code, 0x18)] local off = tonumber(ffi.cast('int16_t', ins.off)) -- Reinterpret as signed local dst = cls < 2 and 'R'..ins.dst_reg or string.format('[R%d%+d]', ins.dst_reg, off) local src = cls % 2 == 0 and '#'..ins.imm or 'R'..ins.src_reg if cls == BPF.LDX then src = string.format('[R%d%+d]', ins.src_reg, off) end if mode == BPF.ABS then src = string.format('skb[%d]', ins.imm) end if mode == BPF.IND then src = string.format('skb[R%d%+d]', ins.src_reg, ins.imm) end return string.format('%s\t%s\t%s', fuse and '' or name, fuse and '' or dst, src) end local function dump_alu(cls, ins, pc) local alu = {'ADD', 'SUB', 'MUL', 'DIV', 'OR', 'AND', 'LSH', 'RSH', 'NEG', 'MOD', 'XOR', 'MOV', 'ARSH', 'END' } local jmp = {'JA', 'JEQ', 'JGT', 'JGE', 'JSET', 'JNE', 'JSGT', 'JSGE', 'CALL', 'EXIT'} local helper = {'unspec', 'map_lookup_elem', 'map_update_elem', 'map_delete_elem', 'probe_read', 'ktime_get_ns', 'trace_printk', 'get_prandom_u32', 'get_smp_processor_id', 'skb_store_bytes', 'l3_csum_replace', 'l4_csum_replace', 'tail_call', 'clone_redirect', 'get_current_pid_tgid', 'get_current_uid_gid', 'get_current_comm', 'get_cgroup_classid', 'skb_vlan_push', 'skb_vlan_pop', 'skb_get_tunnel_key', 'skb_set_tunnel_key', 'perf_event_read', 'redirect', 'get_route_realm', 'perf_event_output', 'skb_load_bytes'} local op = 0 -- This is a very dense ALU instruction decoder without much explanation -- Refer to https://www.kernel.org/doc/Documentation/networking/filter.txt for instruction format for i = 0,13 do if 0x10 * i == bit.band(ins.code, 0xf0) then op = i + 1 break end end local name = (cls == 5) and jmp[op] or alu[op] local src = (bit.band(ins.code, 0x08) == BPF.X) and 'R'..ins.src_reg or '#'..ins.imm local target = (cls == 5 and op < 9) and string.format('\t=> %04d', pc + ins.off + 1) or '' if cls == 5 and op == 9 then target = string.format('\t; %s', helper[ins.imm + 1] or tostring(ins.imm)) end return string.format('%s\t%s\t%s%s', name, 'R'..ins.dst_reg, src, target) end local function dump_string(code, off, hide_counter) if not code then return end local cls_map = { [0] = dump_mem, [1] = dump_mem, [2] = dump_mem, [3] = dump_mem, [4] = dump_alu, [5] = dump_alu, [7] = dump_alu, } local result = {} local fused = false for i = off or 0, code.pc - 1 do local ins = code.insn[i] local cls = bit.band(ins.code, 0x07) local line = cls_map[cls](cls, ins, i, fused) if hide_counter then table.insert(result, line) else table.insert(result, string.format('%04u\t%s', i, line)) end fused = string.find(line, 'LDDW', 1) end return table.concat(result, '\n') end local function dump(code) if not code then return end print(string.format('-- BPF %s:0-%u', code.insn, code.pc)) print(dump_string(code)) end local function compile(prog, params) -- Create code emitter sandbox, include caller locals local env = { pkt=proto.pkt, eth=proto.pkt, BPF=BPF, ffi=ffi } -- Include upvalues up to 4 nested scopes back -- the narrower scope overrides broader scope for k = 5, 2, -1 do local i = 1 while true do local ok, n, v = pcall(debug.getlocal, k, i) if not ok or not n then break end env[n] = v i = i + 1 end end setmetatable(env, { __index = function (_, k) return proto[k] or builtins[k] or _G[k] end }) -- Create code emitter and compile LuaJIT bytecode if type(prog) == 'string' then prog = loadstring(prog) end -- Create error handler to print traceback local funci, pc = bytecode.funcinfo(prog), 0 local E = create_emitter(env, funci.stackslots, funci.params, params or {}) local on_err = function (e) funci = bytecode.funcinfo(prog, pc) local from, to = 0, 0 for _ = 1, funci.currentline do from = to to = string.find(funci.source, '\n', from+1, true) or 0 end print(funci.loc..':'..string.sub(funci.source, from+1, to-1)) print('error: '..e) print(debug.traceback()) end for _,op,a,b,c,d in bytecode.decoder(prog) do local ok, _, err = xpcall(E,on_err,op,a,b,c,d) if not ok then return nil, err end end return E:compile() end -- BPF map interface local bpf_map_mt = { __gc = function (map) S.close(map.fd) end, __len = function(map) return map.max_entries end, __index = function (map, k) if type(k) == 'string' then -- Return iterator if k == 'pairs' then return function(t, key) -- Get next key local next_key = ffi.new(ffi.typeof(t.key)) local cur_key if key then cur_key = t.key t.key[0] = key else cur_key = ffi.new(ffi.typeof(t.key)) end local ok, err = S.bpf_map_op(S.c.BPF_CMD.MAP_GET_NEXT_KEY, map.fd, cur_key, next_key) if not ok then return nil, err end -- Get next value assert(S.bpf_map_op(S.c.BPF_CMD.MAP_LOOKUP_ELEM, map.fd, next_key, map.val)) return next_key[0], map.val[0] end, map, nil -- Read for perf event map elseif k == 'reader' then return function (pmap, pid, cpu, event_type) -- Caller must either specify PID or CPU if not pid or pid < 0 then assert((cpu and cpu >= 0), 'NYI: creating composed reader for all CPUs') pid = -1 end -- Create BPF output reader local pe = S.t.perf_event_attr1() pe[0].type = 'software' pe[0].config = 'sw_bpf_output' pe[0].sample_type = 'raw' pe[0].sample_period = 1 pe[0].wakeup_events = 1 local reader, err = S.t.perf_reader(S.perf_event_open(pe, pid, cpu or -1)) if not reader then return nil, tostring(err) end -- Register event reader fd in BPF map assert(cpu < pmap.max_entries, string.format('BPF map smaller than read CPU %d', cpu)) pmap[cpu] = reader.fd -- Open memory map and start reading local ok, err = reader:start() assert(ok, tostring(err)) ok, err = reader:mmap() assert(ok, tostring(err)) return cdef.event_reader(reader, event_type) end -- Signalise this is a map type end return k == '__map' end -- Retrieve key map.key[0] = k local ok, err = S.bpf_map_op(S.c.BPF_CMD.MAP_LOOKUP_ELEM, map.fd, map.key, map.val) if not ok then return nil, err end return ffi.new(map.val_type, map.val[0]) end, __newindex = function (map, k, v) map.key[0] = k if v == nil then return S.bpf_map_op(map.fd, S.c.BPF_CMD.MAP_DELETE_ELEM, map.key, nil) end map.val[0] = v return S.bpf_map_op(S.c.BPF_CMD.MAP_UPDATE_ELEM, map.fd, map.key, map.val) end, } -- Linux tracing interface local function trace_check_enabled(path) path = path or '/sys/kernel/debug/tracing' if S.statfs(path) then return true end return nil, 'debugfs not accessible: "mount -t debugfs nodev /sys/kernel/debug"? missing sudo?' end -- Tracepoint interface local tracepoint_mt = { __index = { bpf = function (t, prog) if type(prog) ~= 'table' then -- Create protocol parser with source probe prog = compile(prog, {proto.type(t.type, {source='ptr_to_probe'})}) end -- Load the BPF program local prog_fd, err, log = S.bpf_prog_load(S.c.BPF_PROG.TRACEPOINT, prog.insn, prog.pc) assert(prog_fd, tostring(err)..': '..tostring(log)) -- Open tracepoint and attach t.reader:setbpf(prog_fd:getfd()) table.insert(t.progs, prog_fd) return prog_fd end, } } -- Open tracepoint local function tracepoint_open(path, pid, cpu, group_fd) -- Open tracepoint and compile tracepoint type local tp = assert(S.perf_tracepoint('/sys/kernel/debug/tracing/events/'..path)) local tp_type = assert(cdef.tracepoint_type(path)) -- Open tracepoint reader and create interface local reader = assert(S.perf_attach_tracepoint(tp, pid, cpu, group_fd)) return setmetatable({tp=tp,type=tp_type,reader=reader,progs={}}, tracepoint_mt) end local function trace_bpf(ptype, pname, pdef, retprobe, prog, pid, cpu, group_fd) -- Load BPF program if type(prog) ~= 'table' then prog = compile(prog, {proto.pt_regs}) end local prog_fd, err, log = S.bpf_prog_load(S.c.BPF_PROG.KPROBE, prog.insn, prog.pc) assert(prog_fd, tostring(err)..': '..tostring(log)) -- Open tracepoint and attach local tp, err = S.perf_probe(ptype, pname, pdef, retprobe) if not tp then prog_fd:close() return nil, tostring(err) end local reader, err = S.perf_attach_tracepoint(tp, pid, cpu, group_fd, {sample_type='raw, callchain'}) if not reader then prog_fd:close() S.perf_probe(ptype, pname, false) return nil, tostring(err) end local ok, err = reader:setbpf(prog_fd:getfd()) if not ok then prog_fd:close() reader:close() S.perf_probe(ptype, pname, false) return nil, tostring(err)..' (kernel version should be at least 4.1)' end -- Create GC closure for reader to close BPF program -- and detach probe in correct order ffi.gc(reader, function () prog_fd:close() reader:close() S.perf_probe(ptype, pname, false) end) return {reader=reader, prog=prog_fd, probe=pname, probe_type=ptype} end -- Module interface return setmetatable({ new = create_emitter, dump = dump, dump_string = dump_string, maps = {}, map = function (type, max_entries, key_ctype, val_ctype) if not key_ctype then key_ctype = ffi.typeof('uint32_t') end if not val_ctype then val_ctype = ffi.typeof('uint32_t') end if not max_entries then max_entries = 4096 end -- Special case for BPF_MAP_STACK_TRACE if S.c.BPF_MAP[type] == S.c.BPF_MAP.STACK_TRACE then key_ctype = ffi.typeof('int32_t') val_ctype = ffi.typeof('struct bpf_stacktrace') end local fd, err = S.bpf_map_create(S.c.BPF_MAP[type], ffi.sizeof(key_ctype), ffi.sizeof(val_ctype), max_entries) if not fd then return nil, tostring(err) end local map = setmetatable({ max_entries = max_entries, key = ffi.new(ffi.typeof('$ [1]', key_ctype)), val = ffi.new(ffi.typeof('$ [1]', val_ctype)), map_type = S.c.BPF_MAP[type], key_type = key_ctype, val_type = val_ctype, fd = fd:nogc():getfd(), }, bpf_map_mt) return map end, socket = function (sock, prog) -- Expect socket type, if sock is string then assume it's -- an interface name (e.g. 'lo'), if it's a number then typecast it as a socket local ok, err if type(sock) == 'string' then local iface = assert(S.nl.getlink())[sock] assert(iface, sock..' is not interface name') sock, err = S.socket('packet', 'raw') assert(sock, tostring(err)) ok, err = sock:bind(S.t.sockaddr_ll({protocol='all', ifindex=iface.index})) assert(ok, tostring(err)) elseif type(sock) == 'number' then sock = S.t.fd(sock):nogc() elseif ffi.istype(S.t.fd, sock) then -- luacheck: ignore -- No cast required else return nil, 'socket must either be an fd number, an interface name, or an ljsyscall socket' end -- Load program and attach it to socket if type(prog) ~= 'table' then prog = compile(prog, {proto.skb}) end local prog_fd, err, log = S.bpf_prog_load(S.c.BPF_PROG.SOCKET_FILTER, prog.insn, prog.pc) assert(prog_fd, tostring(err)..': '..tostring(log)) assert(sock:setsockopt('socket', 'attach_bpf', prog_fd:getfd())) return prog_fd, err end, tracepoint = function(tp, prog, pid, cpu, group_fd) assert(trace_check_enabled()) -- Return tracepoint instance if no program specified -- this allows free specialisation of arg0 to tracepoint type local probe = tracepoint_open(tp, pid, cpu, group_fd) -- Load the BPF program if prog then probe:bpf(prog) end return probe end, kprobe = function(tp, prog, retprobe, pid, cpu, group_fd) assert(trace_check_enabled()) -- Open tracepoint and attach local pname, pdef = tp:match('([^:]+):(.+)') return trace_bpf('kprobe', pname, pdef, retprobe, prog, pid, cpu, group_fd) end, uprobe = function(tp, prog, retprobe, pid, cpu, group_fd) assert(trace_check_enabled()) -- Translate symbol to address local obj, sym_want = tp:match('([^:]+):(.+)') if not S.statfs(obj) then return nil, S.t.error(S.c.E.NOENT) end -- Resolve Elf object (no support for anything else) local elf = require('bpf.elf').open(obj) local sym = elf:resolve(sym_want) if not sym then return nil, 'no such symbol' end sym = sym.st_value - elf:loadaddr() local sym_addr = string.format('%x%04x', tonumber(bit.rshift(sym, 32)), tonumber(ffi.cast('uint32_t', sym))) -- Convert it to expected uprobe format local pname = string.format('%s_%s', obj:gsub('.*/', ''), sym_addr) local pdef = obj..':0x'..sym_addr return trace_bpf('uprobe', pname, pdef, retprobe, prog, pid, cpu, group_fd) end, tracelog = function(path) assert(trace_check_enabled()) path = path or '/sys/kernel/debug/tracing/trace_pipe' return io.open(path, 'r') end, ntoh = builtins.ntoh, hton = builtins.hton, }, { __call = function (_, prog) return compile(prog) end, }) end) package.preload['bpf.builtins'] = (function (...) --[[ Copyright 2016 Marek Vavrusa Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] local ffi = require('ffi') local bit = require('bit') local cdef = require('bpf.cdef') local BPF, HELPER = ffi.typeof('struct bpf'), ffi.typeof('struct bpf_func_id') local const_width = { [1] = BPF.B, [2] = BPF.H, [4] = BPF.W, [8] = BPF.DW, } local const_width_type = { [1] = ffi.typeof('uint8_t'), [2] = ffi.typeof('uint16_t'), [4] = ffi.typeof('uint32_t'), [8] = ffi.typeof('uint64_t'), } -- Built-ins that will be translated into BPF instructions -- i.e. bit.bor(0xf0, 0x0f) becomes {'alu64, or, k', reg(0xf0), reg(0x0f), 0, 0} local builtins = { [bit.lshift] = 'LSH', [bit.rshift] = 'RSH', [bit.band] = 'AND', [bit.bnot] = 'NEG', [bit.bor] = 'OR', [bit.bxor] = 'XOR', [bit.arshift] = 'ARSH', -- Extensions and intrinsics } local function width_type(w) -- Note: ffi.typeof doesn't accept '?' as template return const_width_type[w] or ffi.typeof(string.format('uint8_t [%d]', w)) end builtins.width_type = width_type -- Return struct member size/type (requires LuaJIT 2.1+) -- I am ashamed that there's no easier way around it. local function sizeofattr(ct, name) if not ffi.typeinfo then error('LuaJIT 2.1+ is required for ffi.typeinfo') end local cinfo = ffi.typeinfo(ct) while true do cinfo = ffi.typeinfo(cinfo.sib) if not cinfo then return end if cinfo.name == name then break end end local size = math.max(1, ffi.typeinfo(cinfo.sib or ct).size - cinfo.size) -- Guess type name return size, builtins.width_type(size) end builtins.sizeofattr = sizeofattr -- Byte-order conversions for little endian local function ntoh(x, w) if w then x = ffi.cast(const_width_type[w/8], x) end return bit.bswap(x) end local function hton(x, w) return ntoh(x, w) end builtins.ntoh = ntoh builtins.hton = hton builtins[ntoh] = function (e, dst, a, w) -- This is trickery, but TO_LE means cpu_to_le(), -- and we want exactly the opposite as network is always 'be' w = w or ffi.sizeof(e.V[a].type)*8 if w == 8 then return end -- NOOP assert(w <= 64, 'NYI: hton(a[, width]) - operand larger than register width') -- Allocate registers and execute e.vcopy(dst, a) e.emit(BPF.ALU + BPF.END + BPF.TO_BE, e.vreg(dst), 0, 0, w) end builtins[hton] = function (e, dst, a, w) w = w or ffi.sizeof(e.V[a].type)*8 if w == 8 then return end -- NOOP assert(w <= 64, 'NYI: hton(a[, width]) - operand larger than register width') -- Allocate registers and execute e.vcopy(dst, a) e.emit(BPF.ALU + BPF.END + BPF.TO_LE, e.vreg(dst), 0, 0, w) end -- Byte-order conversions for big endian are no-ops if ffi.abi('be') then ntoh = function (x, w) return w and ffi.cast(const_width_type[w/8], x) or x end hton = ntoh builtins[ntoh] = function(_, _, _) return end builtins[hton] = function(_, _, _) return end end -- Other built-ins local function xadd() error('NYI') end builtins.xadd = xadd builtins[xadd] = function (e, ret, a, b, off) local vinfo = e.V[a].const assert(vinfo and vinfo.__dissector, 'xadd(a, b[, offset]) called on non-pointer') local w = ffi.sizeof(vinfo.__dissector) -- Calculate structure attribute offsets if e.V[off] and type(e.V[off].const) == 'string' then local ct, field = vinfo.__dissector, e.V[off].const off = ffi.offsetof(ct, field) assert(off, 'xadd(a, b, offset) - offset is not valid in given structure') w = sizeofattr(ct, field) end assert(w == 4 or w == 8, 'NYI: xadd() - 1 and 2 byte atomic increments are not supported') -- Allocate registers and execute local src_reg = e.vreg(b) local dst_reg = e.vreg(a) -- Set variable for return value and call e.vset(ret) e.vreg(ret, 0, true, ffi.typeof('int32_t')) -- Optimize the NULL check away if provably not NULL if not e.V[a].source or e.V[a].source:find('_or_null', 1, true) then e.emit(BPF.JMP + BPF.JEQ + BPF.K, dst_reg, 0, 1, 0) -- if (dst != NULL) end e.emit(BPF.XADD + BPF.STX + const_width[w], dst_reg, src_reg, off or 0, 0) end local function probe_read() error('NYI') end builtins.probe_read = probe_read builtins[probe_read] = function (e, ret, dst, src, vtype, ofs) e.reg_alloc(e.tmpvar, 1) -- Load stack pointer to dst, since only load to stack memory is supported -- we have to use allocated stack memory or create a new allocation and convert -- to pointer type e.emit(BPF.ALU64 + BPF.MOV + BPF.X, 1, 10, 0, 0) if not e.V[dst].const or not e.V[dst].const.__base > 0 then builtins[ffi.new](e, dst, vtype) -- Allocate stack memory end e.emit(BPF.ALU64 + BPF.ADD + BPF.K, 1, 0, 0, -e.V[dst].const.__base) -- Set stack memory maximum size bound e.reg_alloc(e.tmpvar, 2) if not vtype then vtype = cdef.typename(e.V[dst].type) -- Dereference pointer type to pointed type for size calculation if vtype:sub(-1) == '*' then vtype = vtype:sub(0, -2) end end local w = ffi.sizeof(vtype) e.emit(BPF.ALU64 + BPF.MOV + BPF.K, 2, 0, 0, w) -- Set source pointer if e.V[src].reg then e.reg_alloc(e.tmpvar, 3) -- Copy from original register e.emit(BPF.ALU64 + BPF.MOV + BPF.X, 3, e.V[src].reg, 0, 0) else e.vreg(src, 3) e.reg_spill(src) -- Spill to avoid overwriting end if ofs and ofs > 0 then e.emit(BPF.ALU64 + BPF.ADD + BPF.K, 3, 0, 0, ofs) end -- Call probe read helper ret = ret or e.tmpvar e.vset(ret) e.vreg(ret, 0, true, ffi.typeof('int32_t')) e.emit(BPF.JMP + BPF.CALL, 0, 0, 0, HELPER.probe_read) e.V[e.tmpvar].reg = nil -- Free temporary registers end builtins[ffi.cast] = function (e, dst, ct, x) assert(e.V[ct].const, 'ffi.cast(ctype, x) called with bad ctype') e.vcopy(dst, x) if e.V[x].const and type(e.V[x].const) == 'table' then e.V[dst].const.__dissector = ffi.typeof(e.V[ct].const) end e.V[dst].type = ffi.typeof(e.V[ct].const) -- Specific types also encode source of the data -- This is because BPF has different helpers for reading -- different data sources, so variables must track origins. -- struct pt_regs - source of the data is probe -- struct skb - source of the data is socket buffer -- struct X - source of the data is probe/tracepoint if ffi.typeof(e.V[ct].const) == ffi.typeof('struct pt_regs') then e.V[dst].source = 'ptr_to_probe' end end builtins[ffi.new] = function (e, dst, ct, x) if type(ct) == 'number' then ct = ffi.typeof(e.V[ct].const) -- Get ctype from variable end assert(not x, 'NYI: ffi.new(ctype, ...) - initializer is not supported') assert(not cdef.isptr(ct, true), 'NYI: ffi.new(ctype, ...) - ctype MUST NOT be a pointer') e.vset(dst, nil, ct) e.V[dst].source = 'ptr_to_stack' e.V[dst].const = {__base = e.valloc(ffi.sizeof(ct), true), __dissector = ct} -- Set array dissector if created an array -- e.g. if ct is 'char [2]', then dissector is 'char' local elem_type = tostring(ct):match('ctype<(.+)%s%[(%d+)%]>') if elem_type then e.V[dst].const.__dissector = ffi.typeof(elem_type) end end builtins[ffi.copy] = function (e, ret, dst, src) assert(cdef.isptr(e.V[dst].type), 'ffi.copy(dst, src) - dst MUST be a pointer type') assert(cdef.isptr(e.V[src].type), 'ffi.copy(dst, src) - src MUST be a pointer type') -- Specific types also encode source of the data -- struct pt_regs - source of the data is probe -- struct skb - source of the data is socket buffer if e.V[src].source and e.V[src].source:find('ptr_to_probe', 1, true) then e.reg_alloc(e.tmpvar, 1) -- Load stack pointer to dst, since only load to stack memory is supported -- we have to either use spilled variable or allocated stack memory offset e.emit(BPF.ALU64 + BPF.MOV + BPF.X, 1, 10, 0, 0) if e.V[dst].spill then e.emit(BPF.ALU64 + BPF.ADD + BPF.K, 1, 0, 0, -e.V[dst].spill) elseif e.V[dst].const.__base then e.emit(BPF.ALU64 + BPF.ADD + BPF.K, 1, 0, 0, -e.V[dst].const.__base) else error('ffi.copy(dst, src) - can\'t get stack offset of dst') end -- Set stack memory maximum size bound local dst_tname = cdef.typename(e.V[dst].type) if dst_tname:sub(-1) == '*' then dst_tname = dst_tname:sub(0, -2) end e.reg_alloc(e.tmpvar, 2) e.emit(BPF.ALU64 + BPF.MOV + BPF.K, 2, 0, 0, ffi.sizeof(dst_tname)) -- Set source pointer if e.V[src].reg then e.reg_alloc(e.tmpvar, 3) -- Copy from original register e.emit(BPF.ALU64 + BPF.MOV + BPF.X, 3, e.V[src].reg, 0, 0) else e.vreg(src, 3) e.reg_spill(src) -- Spill to avoid overwriting end -- Call probe read helper e.vset(ret) e.vreg(ret, 0, true, ffi.typeof('int32_t')) e.emit(BPF.JMP + BPF.CALL, 0, 0, 0, HELPER.probe_read) e.V[e.tmpvar].reg = nil -- Free temporary registers elseif e.V[src].const and e.V[src].const.__map then error('NYI: ffi.copy(dst, src) - src is backed by BPF map') elseif e.V[src].const and e.V[src].const.__dissector then error('NYI: ffi.copy(dst, src) - src is backed by socket buffer') else -- TODO: identify cheap register move -- TODO: identify copy to/from stack error('NYI: ffi.copy(dst, src) - src is neither BPF map/socket buffer or probe') end end -- print(format, ...) builtin changes semantics from Lua print(...) -- the first parameter has to be format and only reduced set of conversion specificers -- is allowed: %d %u %x %ld %lu %lx %lld %llu %llx %p %s builtins[print] = function (e, ret, fmt, a1, a2, a3) -- Load format string and length e.reg_alloc(e.V[e.tmpvar], 1) e.reg_alloc(e.V[e.tmpvar+1], 1) if type(e.V[fmt].const) == 'string' then local src = e.V[fmt].const local len = #src + 1 local dst = e.valloc(len, src) -- TODO: this is materialize step e.V[fmt].const = {__base=dst} e.V[fmt].type = ffi.typeof('char ['..len..']') elseif e.V[fmt].const.__base then -- luacheck: ignore -- NOP else error('NYI: print(fmt, ...) - format variable is not literal/stack memory') end -- Prepare helper call e.emit(BPF.ALU64 + BPF.MOV + BPF.X, 1, 10, 0, 0) e.emit(BPF.ALU64 + BPF.ADD + BPF.K, 1, 0, 0, -e.V[fmt].const.__base) e.emit(BPF.ALU64 + BPF.MOV + BPF.K, 2, 0, 0, ffi.sizeof(e.V[fmt].type)) if a1 then local args = {a1, a2, a3} assert(#args <= 3, 'print(fmt, ...) - maximum of 3 arguments supported') for i, arg in ipairs(args) do e.vcopy(e.tmpvar, arg) -- Copy variable e.vreg(e.tmpvar, 3+i-1) -- Materialize it in arg register end end -- Call helper e.vset(ret) e.vreg(ret, 0, true, ffi.typeof('int32_t')) -- Return is integer e.emit(BPF.JMP + BPF.CALL, 0, 0, 0, HELPER.trace_printk) e.V[e.tmpvar].reg = nil -- Free temporary registers end -- Implements bpf_perf_event_output(ctx, map, flags, var, vlen) on perf event map local function perf_submit(e, dst, map_var, src) -- Set R2 = map fd (indirect load) local map = e.V[map_var].const e.vcopy(e.tmpvar, map_var) e.vreg(e.tmpvar, 2, true, ffi.typeof('uint64_t')) e.LD_IMM_X(2, BPF.PSEUDO_MAP_FD, map.fd, ffi.sizeof('uint64_t')) -- Set R1 = ctx e.reg_alloc(e.tmpvar, 1) -- Spill anything in R1 (unnamed tmp variable) e.emit(BPF.ALU64 + BPF.MOV + BPF.X, 1, 6, 0, 0) -- CTX is always in R6, copy -- Set R3 = flags e.vset(e.tmpvar, nil, 0) -- BPF_F_CURRENT_CPU e.vreg(e.tmpvar, 3, false, ffi.typeof('uint64_t')) -- Set R4 = pointer to src on stack assert(e.V[src].const.__base, 'NYI: submit(map, var) - variable is not on stack') e.emit(BPF.ALU64 + BPF.MOV + BPF.X, 4, 10, 0, 0) e.emit(BPF.ALU64 + BPF.ADD + BPF.K, 4, 0, 0, -e.V[src].const.__base) -- Set R5 = src length e.emit(BPF.ALU64 + BPF.MOV + BPF.K, 5, 0, 0, ffi.sizeof(e.V[src].type)) -- Set R0 = ret and call e.vset(dst) e.vreg(dst, 0, true, ffi.typeof('int32_t')) -- Return is integer e.emit(BPF.JMP + BPF.CALL, 0, 0, 0, HELPER.perf_event_output) e.V[e.tmpvar].reg = nil -- Free temporary registers end -- Implements bpf_skb_load_bytes(ctx, off, var, vlen) on skb->data local function load_bytes(e, dst, off, var) -- Set R2 = offset e.vset(e.tmpvar, nil, off) e.vreg(e.tmpvar, 2, false, ffi.typeof('uint64_t')) -- Set R1 = ctx e.reg_alloc(e.tmpvar, 1) -- Spill anything in R1 (unnamed tmp variable) e.emit(BPF.ALU64 + BPF.MOV + BPF.X, 1, 6, 0, 0) -- CTX is always in R6, copy -- Set R3 = pointer to var on stack assert(e.V[var].const.__base, 'NYI: load_bytes(off, var, len) - variable is not on stack') e.emit(BPF.ALU64 + BPF.MOV + BPF.X, 3, 10, 0, 0) e.emit(BPF.ALU64 + BPF.ADD + BPF.K, 3, 0, 0, -e.V[var].const.__base) -- Set R4 = var length e.emit(BPF.ALU64 + BPF.MOV + BPF.K, 4, 0, 0, ffi.sizeof(e.V[var].type)) -- Set R0 = ret and call e.vset(dst) e.vreg(dst, 0, true, ffi.typeof('int32_t')) -- Return is integer e.emit(BPF.JMP + BPF.CALL, 0, 0, 0, HELPER.skb_load_bytes) e.V[e.tmpvar].reg = nil -- Free temporary registers end -- Implements bpf_get_stack_id() local function stack_id(e, ret, map_var, key) -- Set R2 = map fd (indirect load) local map = e.V[map_var].const e.vcopy(e.tmpvar, map_var) e.vreg(e.tmpvar, 2, true, ffi.typeof('uint64_t')) e.LD_IMM_X(2, BPF.PSEUDO_MAP_FD, map.fd, ffi.sizeof('uint64_t')) -- Set R1 = ctx e.reg_alloc(e.tmpvar, 1) -- Spill anything in R1 (unnamed tmp variable) e.emit(BPF.ALU64 + BPF.MOV + BPF.X, 1, 6, 0, 0) -- CTX is always in R6, copy -- Load flags in R2 (immediate value or key) local imm = e.V[key].const assert(tonumber(imm), 'NYI: stack_id(map, var), var must be constant number') e.reg_alloc(e.tmpvar, 3) -- Spill anything in R2 (unnamed tmp variable) e.LD_IMM_X(3, 0, imm, 8) -- Return R0 as signed integer e.vset(ret) e.vreg(ret, 0, true, ffi.typeof('int32_t')) e.emit(BPF.JMP + BPF.CALL, 0, 0, 0, HELPER.get_stackid) e.V[e.tmpvar].reg = nil -- Free temporary registers end -- table.insert(table, value) keeps semantics with the exception of BPF maps -- map `perf_event` -> submit inserted value builtins[table.insert] = function (e, dst, map_var, value) assert(e.V[map_var].const.__map, 'NYI: table.insert() supported only on BPF maps') return perf_submit(e, dst, map_var, value) end -- bpf_get_current_comm(buffer) - write current process name to byte buffer local function comm() error('NYI') end builtins[comm] = function (e, ret, dst) -- Set R1 = buffer assert(e.V[dst].const.__base, 'NYI: comm(buffer) - buffer variable is not on stack') e.reg_alloc(e.tmpvar, 1) -- Spill e.emit(BPF.ALU64 + BPF.MOV + BPF.X, 1, 10, 0, 0) e.emit(BPF.ALU64 + BPF.ADD + BPF.K, 1, 0, 0, -e.V[dst].const.__base) -- Set R2 = length e.reg_alloc(e.tmpvar, 2) -- Spill e.emit(BPF.ALU64 + BPF.MOV + BPF.K, 2, 0, 0, ffi.sizeof(e.V[dst].type)) -- Return is integer e.vset(ret) e.vreg(ret, 0, true, ffi.typeof('int32_t')) e.emit(BPF.JMP + BPF.CALL, 0, 0, 0, HELPER.get_current_comm) e.V[e.tmpvar].reg = nil -- Free temporary registers end -- Math library built-ins math.log2 = function () error('NYI') end builtins[math.log2] = function (e, dst, x) -- Classic integer bits subdivison algorithm to find the position -- of the highest bit set, adapted for BPF bytecode-friendly operations. -- https://graphics.stanford.edu/~seander/bithacks.html -- r = 0 local r = e.vreg(dst, nil, true) e.emit(BPF.ALU64 + BPF.MOV + BPF.K, r, 0, 0, 0) -- v = x e.vcopy(e.tmpvar, x) local v = e.vreg(e.tmpvar, 2) if cdef.isptr(e.V[x].const) then -- No pointer arithmetics, dereference e.vderef(v, v, {const = {__dissector=ffi.typeof('uint64_t')}}) end -- Invert value to invert all tests, otherwise we would need and+jnz e.emit(BPF.ALU64 + BPF.NEG + BPF.K, v, 0, 0, 0) -- v = ~v -- Unrolled test cases, converted masking to arithmetic as we don't have "if !(a & b)" -- As we're testing inverted value, we have to use arithmetic shift to copy MSB for i=4,0,-1 do local k = bit.lshift(1, i) e.emit(BPF.JMP + BPF.JGT + BPF.K, v, 0, 2, bit.bnot(bit.lshift(1, k))) -- if !upper_half(x) e.emit(BPF.ALU64 + BPF.ARSH + BPF.K, v, 0, 0, k) -- v >>= k e.emit(BPF.ALU64 + BPF.OR + BPF.K, r, 0, 0, k) -- r |= k end -- No longer constant, cleanup tmpvars e.V[dst].const = nil e.V[e.tmpvar].reg = nil end builtins[math.log10] = function (e, dst, x) -- Compute log2(x) and transform builtins[math.log2](e, dst, x) -- Relationship: log10(v) = log2(v) / log2(10) local r = e.V[dst].reg e.emit(BPF.ALU64 + BPF.ADD + BPF.K, r, 0, 0, 1) -- Compensate round-down e.emit(BPF.ALU64 + BPF.MUL + BPF.K, r, 0, 0, 1233) -- log2(10) ~ 1233>>12 e.emit(BPF.ALU64 + BPF.RSH + BPF.K, r, 0, 0, 12) end builtins[math.log] = function (e, dst, x) -- Compute log2(x) and transform builtins[math.log2](e, dst, x) -- Relationship: ln(v) = log2(v) / log2(e) local r = e.V[dst].reg e.emit(BPF.ALU64 + BPF.ADD + BPF.K, r, 0, 0, 1) -- Compensate round-down e.emit(BPF.ALU64 + BPF.MUL + BPF.K, r, 0, 0, 2839) -- log2(e) ~ 2839>>12 e.emit(BPF.ALU64 + BPF.RSH + BPF.K, r, 0, 0, 12) end -- Call-type helpers local function call_helper(e, dst, h, vtype) e.vset(dst) e.vreg(dst, 0, true, vtype or ffi.typeof('uint64_t')) e.emit(BPF.JMP + BPF.CALL, 0, 0, 0, h) e.V[dst].const = nil -- Target is not a function anymore end local function cpu() error('NYI') end local function rand() error('NYI') end local function time() error('NYI') end local function pid_tgid() error('NYI') end local function uid_gid() error('NYI') end -- Export helpers and builtin variants builtins.cpu = cpu builtins.time = time builtins.pid_tgid = pid_tgid builtins.uid_gid = uid_gid builtins.comm = comm builtins.perf_submit = perf_submit builtins.stack_id = stack_id builtins.load_bytes = load_bytes builtins[cpu] = function (e, dst) return call_helper(e, dst, HELPER.get_smp_processor_id) end builtins[rand] = function (e, dst) return call_helper(e, dst, HELPER.get_prandom_u32, ffi.typeof('uint32_t')) end builtins[time] = function (e, dst) return call_helper(e, dst, HELPER.ktime_get_ns) end builtins[pid_tgid] = function (e, dst) return call_helper(e, dst, HELPER.get_current_pid_tgid) end builtins[uid_gid] = function (e, dst) return call_helper(e, dst, HELPER.get_current_uid_gid) end builtins[perf_submit] = function (e, dst, map, value) return perf_submit(e, dst, map, value) end builtins[stack_id] = function (e, dst, map, key) return stack_id(e, dst, map, key) end builtins[load_bytes] = function (e, dst, off, var, len) return load_bytes(e, dst, off, var, len) end return builtins end) package.preload['bpf.cdef'] = (function (...) --[[ Copyright 2016 Marek Vavrusa Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] local ffi = require('ffi') local bit = require('bit') local has_syscall, S = pcall(require, 'syscall') local M = {} ffi.cdef [[ struct bpf { /* Instruction classes */ static const int LD = 0x00; static const int LDX = 0x01; static const int ST = 0x02; static const int STX = 0x03; static const int ALU = 0x04; static const int JMP = 0x05; static const int ALU64 = 0x07; /* ld/ldx fields */ static const int W = 0x00; static const int H = 0x08; static const int B = 0x10; static const int ABS = 0x20; static const int IND = 0x40; static const int MEM = 0x60; static const int LEN = 0x80; static const int MSH = 0xa0; /* alu/jmp fields */ static const int ADD = 0x00; static const int SUB = 0x10; static const int MUL = 0x20; static const int DIV = 0x30; static const int OR = 0x40; static const int AND = 0x50; static const int LSH = 0x60; static const int RSH = 0x70; static const int NEG = 0x80; static const int MOD = 0x90; static const int XOR = 0xa0; static const int JA = 0x00; static const int JEQ = 0x10; static const int JGT = 0x20; static const int JGE = 0x30; static const int JSET = 0x40; static const int K = 0x00; static const int X = 0x08; static const int JNE = 0x50; /* jump != */ static const int JSGT = 0x60; /* SGT is signed '>', GT in x86 */ static const int JSGE = 0x70; /* SGE is signed '>=', GE in x86 */ static const int CALL = 0x80; /* function call */ static const int EXIT = 0x90; /* function return */ /* ld/ldx fields */ static const int DW = 0x18; /* double word */ static const int XADD = 0xc0; /* exclusive add */ /* alu/jmp fields */ static const int MOV = 0xb0; /* mov reg to reg */ static const int ARSH = 0xc0; /* sign extending arithmetic shift right */ /* change endianness of a register */ static const int END = 0xd0; /* flags for endianness conversion: */ static const int TO_LE = 0x00; /* convert to little-endian */ static const int TO_BE = 0x08; /* convert to big-endian */ /* misc */ static const int PSEUDO_MAP_FD = 0x01; /* helper functions */ static const int F_CURRENT_CPU = 0xffffffff; static const int F_USER_STACK = 1 << 8; static const int F_FAST_STACK_CMP = 1 << 9; static const int F_REUSE_STACKID = 1 << 10; /* special offsets for ancillary data */ static const int NET_OFF = -0x100000; static const int LL_OFF = -0x200000; }; /* eBPF commands */ struct bpf_cmd { static const int MAP_CREATE = 0; static const int MAP_LOOKUP_ELEM = 1; static const int MAP_UPDATE_ELEM = 2; static const int MAP_DELETE_ELEM = 3; static const int MAP_GET_NEXT_KEY = 4; static const int PROG_LOAD = 5; static const int OBJ_PIN = 6; static const int OBJ_GET = 7; }; /* eBPF helpers */ struct bpf_func_id { static const int unspec = 0; static const int map_lookup_elem = 1; static const int map_update_elem = 2; static const int map_delete_elem = 3; static const int probe_read = 4; static const int ktime_get_ns = 5; static const int trace_printk = 6; static const int get_prandom_u32 = 7; static const int get_smp_processor_id = 8; static const int skb_store_bytes = 9; static const int l3_csum_replace = 10; static const int l4_csum_replace = 11; static const int tail_call = 12; static const int clone_redirect = 13; static const int get_current_pid_tgid = 14; static const int get_current_uid_gid = 15; static const int get_current_comm = 16; static const int get_cgroup_classid = 17; static const int skb_vlan_push = 18; static const int skb_vlan_pop = 19; static const int skb_get_tunnel_key = 20; static const int skb_set_tunnel_key = 21; static const int perf_event_read = 22; static const int redirect = 23; static const int get_route_realm = 24; static const int perf_event_output = 25; static const int skb_load_bytes = 26; static const int get_stackid = 27; }; /* BPF_MAP_STACK_TRACE structures and constants */ static const int BPF_MAX_STACK_DEPTH = 127; struct bpf_stacktrace { uint64_t ip[BPF_MAX_STACK_DEPTH]; }; ]] -- Compatibility: ljsyscall doesn't have support for BPF syscall if not has_syscall or not S.bpf then error("ljsyscall doesn't support bpf(), must be updated") else local strflag = require('syscall.helpers').strflag -- Compatibility: ljsyscall<=0.12 if not S.c.BPF_MAP.LRU_HASH then S.c.BPF_MAP = strflag { UNSPEC = 0, HASH = 1, ARRAY = 2, PROG_ARRAY = 3, PERF_EVENT_ARRAY = 4, PERCPU_HASH = 5, PERCPU_ARRAY = 6, STACK_TRACE = 7, CGROUP_ARRAY = 8, LRU_HASH = 9, LRU_PERCPU_HASH = 10, LPM_TRIE = 11, ARRAY_OF_MAPS = 12, HASH_OF_MAPS = 13, DEVMAP = 14, SOCKMAP = 15, CPUMAP = 16, } end if not S.c.BPF_PROG.TRACEPOINT then S.c.BPF_PROG = strflag { UNSPEC = 0, SOCKET_FILTER = 1, KPROBE = 2, SCHED_CLS = 3, SCHED_ACT = 4, TRACEPOINT = 5, XDP = 6, PERF_EVENT = 7, CGROUP_SKB = 8, CGROUP_SOCK = 9, LWT_IN = 10, LWT_OUT = 11, LWT_XMIT = 12, SOCK_OPS = 13, SK_SKB = 14, CGROUP_DEVICE = 15, SK_MSG = 16, RAW_TRACEPOINT = 17, CGROUP_SOCK_ADDR = 18, } end end -- Compatibility: metatype for stacktrace local function stacktrace_iter(t, i) i = i + 1 if i < #t and t.ip[i] > 0 then return i, t.ip[i] end end ffi.metatype('struct bpf_stacktrace', { __len = function (t) return ffi.sizeof(t.ip) / ffi.sizeof(t.ip[0]) end, __ipairs = function (t) return stacktrace_iter, t, -1 end, }) -- Reflect cdata type function M.typename(v) if not v or type(v) ~= 'cdata' then return nil end return string.match(tostring(ffi.typeof(v)), '<([^>]+)') end -- Reflect if cdata type can be pointer (accepts array or pointer) function M.isptr(v, noarray) local ctname = M.typename(v) if ctname then ctname = string.sub(ctname, -1) ctname = ctname == '*' or (not noarray and ctname == ']') end return ctname end -- Return true if variable is a non-nil constant that can be used as immediate value -- e.g. result of KSHORT and KNUM function M.isimmconst(v) return (type(v.const) == 'number' and not ffi.istype(v.type, ffi.typeof('void'))) or type(v.const) == 'cdata' and ffi.istype(v.type, ffi.typeof('uint64_t')) -- Lua numbers are at most 52 bits or type(v.const) == 'cdata' and ffi.istype(v.type, ffi.typeof('int64_t')) end function M.osversion() -- We have no better way to extract current kernel hex-string other -- than parsing headers, compiling a helper function or reading /proc local ver_str, count = S.sysctl('kernel.version'):match('%d+.%d+.%d+'), 2 if not ver_str then -- kernel.version is freeform, fallback to kernel.osrelease ver_str = S.sysctl('kernel.osrelease'):match('%d+.%d+.%d+') end local version = 0 for i in ver_str:gmatch('%d+') do -- Convert 'X.Y.Z' to 0xXXYYZZ version = bit.bor(version, bit.lshift(tonumber(i), 8*count)) count = count - 1 end return version end function M.event_reader(reader, event_type) -- Caller can specify event message binary format if event_type then assert(type(event_type) == 'string' and ffi.typeof(event_type), 'not a valid type for event reader') event_type = ffi.typeof(event_type .. '*') -- Convert type to pointer-to-type end -- Wrap reader in interface that can interpret read event messages return setmetatable({reader=reader,type=event_type}, {__index = { block = function(_ --[[self]]) return S.select { readfds = {reader.fd} } end, next = function(_ --[[self]], k) local len, ev = reader:next(k) -- Filter out only sample frames while ev and ev.type ~= S.c.PERF_RECORD.SAMPLE do len, ev = reader:next(len) end if ev and event_type then -- The perf event reader returns framed data with header and variable length -- This is going skip the frame header and cast data to given type ev = ffi.cast(event_type, ffi.cast('char *', ev) + ffi.sizeof('struct perf_event_header') + ffi.sizeof('uint32_t')) end return len, ev end, read = function(self) return self.next, self, nil end, }}) end function M.tracepoint_type(tp) -- Read tracepoint format string local fp = assert(io.open('/sys/kernel/debug/tracing/events/'..tp..'/format', 'r')) local fmt = fp:read '*a' fp:close() -- Parse struct fields local fields = {} for f in fmt:gmatch 'field:([^;]+;)' do table.insert(fields, f) end return string.format('struct { %s }', table.concat(fields)) end return M end) package.preload['bpf.elf'] = (function (...) --[[ Copyright 2016 Marek Vavrusa Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] -- This is a tiny wrapper over libelf to extract load address -- and offsets of dynamic symbols local S = require('syscall') local ffi = require('ffi') ffi.cdef [[ /* Type for a 16-bit quantity. */ typedef uint16_t Elf32_Half; typedef uint16_t Elf64_Half; /* Types for signed and unsigned 32-bit quantities. */ typedef uint32_t Elf32_Word; typedef int32_t Elf32_Sword; typedef uint32_t Elf64_Word; typedef int32_t Elf64_Sword; /* Types for signed and unsigned 64-bit quantities. */ typedef uint64_t Elf32_Xword; typedef int64_t Elf32_Sxword; typedef uint64_t Elf64_Xword; typedef int64_t Elf64_Sxword; /* Type of addresses. */ typedef uint32_t Elf32_Addr; typedef uint64_t Elf64_Addr; /* Type of file offsets. */ typedef uint32_t Elf32_Off; typedef uint64_t Elf64_Off; /* Type for section indices, which are 16-bit quantities. */ typedef uint16_t Elf32_Section; typedef uint16_t Elf64_Section; /* Constants */ struct Elf_Cmd { static const int READ = 1; static const int RDWR = 2; static const int WRITE = 3; static const int CLR = 4; static const int SET = 5; static const int FDDONE = 6; static const int FDREAD = 7; static const int READ_MMAP = 8; static const int RDWR_MMAP = 9; static const int WRITE_MMAP =10; static const int READ_MMAP_PRIVATE =11; static const int EMPTY =12; static const int NUM =13; }; /* Descriptor for the ELF file. */ typedef struct Elf Elf; /* Descriptor for ELF file section. */ typedef struct Elf_Scn Elf_Scn; /* Container type for metatable */ struct Elf_object { int fd; Elf *elf; }; /* Program segment header. */ typedef struct { Elf64_Word p_type; /* Segment type */ Elf64_Word p_flags; /* Segment flags */ Elf64_Off p_offset; /* Segment file offset */ Elf64_Addr p_vaddr; /* Segment virtual address */ Elf64_Addr p_paddr; /* Segment physical address */ Elf64_Xword p_filesz; /* Segment size in file */ Elf64_Xword p_memsz; /* Segment size in memory */ Elf64_Xword p_align; /* Segment alignment */ } Elf64_Phdr; typedef Elf64_Phdr GElf_Phdr; /* Section header. */ typedef struct { Elf64_Word sh_name; /* Section name (string tbl index) */ Elf64_Word sh_type; /* Section type */ Elf64_Xword sh_flags; /* Section flags */ Elf64_Addr sh_addr; /* Section virtual addr at execution */ Elf64_Off sh_offset; /* Section file offset */ Elf64_Xword sh_size; /* Section size in bytes */ Elf64_Word sh_link; /* Link to another section */ Elf64_Word sh_info; /* Additional section information */ Elf64_Xword sh_addralign; /* Section alignment */ Elf64_Xword sh_entsize; /* Entry size if section holds table */ } Elf64_Shdr; typedef Elf64_Shdr GElf_Shdr; /* Descriptor for data to be converted to or from memory format. */ typedef struct { void *d_buf; /* Pointer to the actual data. */ int d_type; /* Type of this piece of data. */ unsigned int d_version; /* ELF version. */ size_t d_size; /* Size in bytes. */ uint64_t d_off; /* Offset into section. */ size_t d_align; /* Alignment in section. */ } Elf_Data; /* Symbol table entry. */ typedef struct { Elf64_Word st_name; /* Symbol name (string tbl index) */ unsigned char st_info; /* Symbol type and binding */ unsigned char st_other; /* Symbol visibility */ Elf64_Section st_shndx; /* Section index */ Elf64_Addr st_value; /* Symbol value */ Elf64_Xword st_size; /* Symbol size */ } Elf64_Sym; typedef Elf64_Sym GElf_Sym; /* Coordinate ELF library and application versions. */ unsigned int elf_version (unsigned int __version); /* Return descriptor for ELF file to work according to CMD. */ Elf *elf_begin (int __fildes, int __cmd, Elf *__ref); /* Free resources allocated for ELF. */ int elf_end (Elf *__elf); /* Get the number of program headers in the ELF file. If the file uses more headers than can be represented in the e_phnum field of the ELF header the information from the sh_info field in the zeroth section header is used. */ int elf_getphdrnum (Elf *__elf, size_t *__dst); /* Retrieve program header table entry. */ GElf_Phdr *gelf_getphdr (Elf *__elf, int __ndx, GElf_Phdr *__dst); /* Retrieve section header. */ GElf_Shdr *gelf_getshdr (Elf_Scn *__scn, GElf_Shdr *__dst); /* Retrieve symbol information from the symbol table at the given index. */ GElf_Sym *gelf_getsym (Elf_Data *__data, int __ndx, GElf_Sym *__dst); /* Get section with next section index. */ Elf_Scn *elf_nextscn (Elf *__elf, Elf_Scn *__scn); /* Get data from section while translating from file representation to memory representation. */ Elf_Data *elf_getdata (Elf_Scn *__scn, Elf_Data *__data); /* Return pointer to string at OFFSET in section INDEX. */ char *elf_strptr (Elf *__elf, size_t __index, size_t __offset); ]] local elf = ffi.load('elf') local EV = { NONE=0, CURRENT=1, NUM=2 } local PT = { NULL=0, LOAD=1, DYNAMIC=2, INTERP=3, NOTE=4, SHLIB=5, PHDR=6, TLS=7, NUM=8 } local SHT = { NULL=0, PROGBITS=1, SYMTAB=2, STRTAB=3, RELA=4, HASH=5, DYNAMIC=6, NOTE=7, NOBITS=8, REL=9, SHLIB=10, DYNSYM=11, INIT_ARRAY=14, FINI_ARRAY=15, PREINIT_ARRAY=16, GROUP=17, SYMTAB_SHNDX=18, NUM=19 } local ELF_C = ffi.new('struct Elf_Cmd') local M = {} -- Optional poor man's C++ demangler local cpp_demangler = os.getenv('CPP_DEMANGLER') if not cpp_demangler then for prefix in string.gmatch(os.getenv('PATH'), '[^;:]+') do if S.statfs(prefix..'/c++filt') then cpp_demangler = prefix..'/c++filt' break end end end local cpp_demangle = function (name) return name end if cpp_demangler then cpp_demangle = function (name) local cmd = string.format('%s -p %s', cpp_demangler, name) local fp = assert(io.popen(cmd, 'r')) local output = fp:read('*all') fp:close() return output:match '^(.-)%s*$' end end -- Metatable for ELF object ffi.metatype('struct Elf_object', { __gc = function (t) t:close() end, __index = { close = function (t) if t.elf ~= nil then elf.elf_end(t.elf) S.close(t.fd) t.elf = nil end end, -- Load library load address loadaddr = function(t) local phnum = ffi.new('size_t [1]') if elf.elf_getphdrnum(t.elf, phnum) == nil then return nil, 'cannot get phdrnum' end local header = ffi.new('GElf_Phdr [1]') for i = 0, tonumber(phnum[0])-1 do if elf.gelf_getphdr(t.elf, i, header) ~= nil and header[0].p_type == PT.LOAD then return header[0].p_vaddr end end end, -- Resolve symbol address resolve = function (t, k, pattern) local section = elf.elf_nextscn(t.elf, nil) while section ~= nil do local header = ffi.new('GElf_Shdr [1]') if elf.gelf_getshdr(section, header) ~= nil then if header[0].sh_type == SHT.SYMTAB or header[0].sh_type == SHT.DYNSYM then local data = elf.elf_getdata(section, nil) while data ~= nil do if data.d_size % header[0].sh_entsize > 0 then return nil, 'bad section header entity size' end local symcount = tonumber(data.d_size / header[0].sh_entsize) local sym = ffi.new('GElf_Sym [1]') for i = 0, symcount - 1 do if elf.gelf_getsym(data, i, sym) ~= nil then local name = elf.elf_strptr(t.elf, header[0].sh_link, sym[0].st_name) if name ~= nil then -- Demangle C++ symbols if necessary name = ffi.string(name) if name:sub(1,2) == '_Z' then name = cpp_demangle(name) end -- Match symbol name against pattern if pattern and string.match(name, k) or k == name then return sym[0] end end end end data = elf.elf_getdata(section, data) end end end section = elf.elf_nextscn(t.elf, section) end end, } }) -- Open an ELF object function M.open(path) if elf.elf_version(EV.CURRENT) == EV.NONE then return nil, 'bad version' end local fd, err = S.open(path, 'rdonly') if not fd then return nil, err end local pt = ffi.new('Elf *') pt = elf.elf_begin(fd:getfd(), ELF_C.READ, pt) if not pt then fd:close() return nil, 'cannot open elf object' end return ffi.new('struct Elf_object', fd:nogc():getfd(), pt) end return M end) package.preload['bpf.ljbytecode'] = (function (...) --[[ Copyright 2016 Marek Vavrusa Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] local jutil = require("jit.util") local vmdef = require("jit.vmdef") local bit = require('bit') local shr, band = bit.rshift, bit.band -- Decode LuaJIT 2.0 Byte Format -- Reference: http://wiki.luajit.org/Bytecode-2.0 -- Thanks to LJ, we get code in portable bytecode with constants folded, basic -- virtual registers allocated etc. -- No SSA IR, type inference or advanced optimizations because the code wasn't traced yet. local function decode_ins(func, pc) local ins, m = jutil.funcbc(func, pc) if not ins then return nil end local op, ma, mb, mc = band(ins, 0xff), band(m, 7), band(m, 15*8), band(m, 15*128) local a, b, c, d = band(shr(ins, 8), 0xff), nil, nil, shr(ins, 16) if mb ~= 0 then d = band(d, 0xff) b = shr(ins, 24) end if ma == 5 then -- BCMuv a = jutil.funcuvname(func, a) end if mc == 13*128 then -- BCMjump c = pc+d-0x7fff elseif mc == 14*128 then -- BCMcdata c = jutil.funck(func, -d-1) elseif mc == 9*128 then -- BCMint c = jutil.funck(func, d) elseif mc == 10*128 then -- BCMstr c = jutil.funck(func, -d-1) elseif mc == 5*128 then -- BCMuv c = jutil.funcuvname(func, d) end -- Convert version-specific opcode to string op = 6*op op = string.sub(vmdef.bcnames, op+1, op+6):match('[^%s]+') return pc, op, a, b, c, d end -- Decoder closure local function decoder(func) local pc = 0 return function () pc = pc + 1 return decode_ins(func, pc) end end -- Hexdump generated code local function dump(func) return require('jit.bc').dump(func) end return { decode = decode_ins, decoder = decoder, dump = dump, funcinfo = function (...) return jutil.funcinfo(...) end, } end) package.preload['bpf.proto'] = (function (...) --[[ Copyright 2016 Marek Vavrusa Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] local ffi = require('ffi') local BPF = ffi.typeof('struct bpf') ffi.cdef [[ struct sk_buff { uint32_t len; uint32_t pkt_type; uint32_t mark; uint32_t queue_mapping; uint32_t protocol; uint32_t vlan_present; uint32_t vlan_tci; uint32_t vlan_proto; uint32_t priority; uint32_t ingress_ifindex; uint32_t ifindex; uint32_t tc_index; uint32_t cb[5]; uint32_t hash; uint32_t tc_classid; uint32_t data; uint32_t data_end; uint32_t napi_id; /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */ uint32_t family; uint32_t remote_ip4; /* Stored in network byte order */ uint32_t local_ip4; /* Stored in network byte order */ uint32_t remote_ip6[4]; /* Stored in network byte order */ uint32_t local_ip6[4]; /* Stored in network byte order */ uint32_t remote_port; /* Stored in network byte order */ uint32_t local_port; /* stored in host byte order */ /* ... here. */ uint32_t data_meta; }; struct net_off_t { uint8_t ver:4; } __attribute__((packed)); struct eth_t { uint8_t dst[6]; uint8_t src[6]; uint16_t type; } __attribute__((packed)); struct dot1q_t { uint16_t pri:3; uint16_t cfi:1; uint16_t vlanid:12; uint16_t type; } __attribute__((packed)); struct arp_t { uint16_t htype; uint16_t ptype; uint8_t hlen; uint8_t plen; uint16_t oper; uint8_t sha[6]; uint32_t spa; uint8_t tha[6]; uint32_t tpa; } __attribute__((packed)); struct ip_t { uint8_t ver:4; uint8_t hlen:4; uint8_t tos; uint16_t tlen; uint16_t identification; uint16_t ffo_unused:1; uint16_t df:1; uint16_t mf:1; uint16_t foffset:13; uint8_t ttl; uint8_t proto; uint16_t hchecksum; uint32_t src; uint32_t dst; } __attribute__((packed)); struct icmp_t { uint8_t type; uint8_t code; uint16_t checksum; } __attribute__((packed)); struct ip6_t { uint32_t ver:4; uint32_t priority:8; uint32_t flow_label:20; uint16_t payload_len; uint8_t next_header; uint8_t hop_limit; uint64_t src_hi; uint64_t src_lo; uint64_t dst_hi; uint64_t dst_lo; } __attribute__((packed)); struct ip6_opt_t { uint8_t next_header; uint8_t ext_len; uint8_t pad[6]; } __attribute__((packed)); struct icmp6_t { uint8_t type; uint8_t code; uint16_t checksum; } __attribute__((packed)); struct udp_t { uint16_t src_port; uint16_t dst_port; uint16_t length; uint16_t crc; } __attribute__((packed)); struct tcp_t { uint16_t src_port; uint16_t dst_port; uint32_t seq_num; uint32_t ack_num; uint8_t offset:4; uint8_t reserved:4; uint8_t flag_cwr:1; uint8_t flag_ece:1; uint8_t flag_urg:1; uint8_t flag_ack:1; uint8_t flag_psh:1; uint8_t flag_rst:1; uint8_t flag_syn:1; uint8_t flag_fin:1; uint16_t rcv_wnd; uint16_t cksum; uint16_t urg_ptr; } __attribute__((packed)); struct vxlan_t { uint32_t rsv1:4; uint32_t iflag:1; uint32_t rsv2:3; uint32_t rsv3:24; uint32_t key:24; uint32_t rsv4:8; } __attribute__((packed)); ]] -- Architecture-specific ptrace register layout local S = require('syscall') local arch = S.abi.arch local parm_to_reg = {} if arch == 'x64' then ffi.cdef [[ struct pt_regs { unsigned long r15; unsigned long r14; unsigned long r13; unsigned long r12; unsigned long bp; unsigned long bx; unsigned long r11; unsigned long r10; unsigned long r9; unsigned long r8; unsigned long ax; unsigned long cx; unsigned long dx; unsigned long si; unsigned long di; unsigned long orig_ax; unsigned long ip; unsigned long cs; unsigned long flags; unsigned long sp; unsigned long ss; };]] parm_to_reg = {parm1='di', parm2='si', parm3='dx', parm4='cx', parm5='r8', ret='sp', fp='bp'} else ffi.cdef 'struct pt_regs {};' end -- Map symbolic registers to architecture ABI ffi.metatype('struct pt_regs', { __index = function (_ --[[t]],k) return assert(parm_to_reg[k], 'no such register: '..k) end, }) local M = {} -- Dissector interface local function dissector(type, e, dst, src, field) local parent = e.V[src].const -- Create new dissector variable e.vcopy(dst, src) -- Compute and materialize new dissector offset from parent e.V[dst].const = {off=e.V[src].const.off, __dissector=e.V[src].const.__dissector} parent.__dissector[field](e, dst) e.V[dst].const.__dissector = type end M.dissector = dissector -- Get current effective offset, load field value at an offset relative to it and -- add its value to compute next effective offset (e.g. udp_off = ip_off + pkt[ip_off].hlen) local function next_offset(e, var, type, off, mask, shift) local d = e.V[var].const -- Materialize relative offset value in R0 local dst_reg, tmp_reg if d.off then dst_reg = e.vreg(var, 0, true) tmp_reg = dst_reg -- Use target register to avoid copy e.emit(BPF.LD + BPF.ABS + e.const_width[ffi.sizeof(type)], tmp_reg, 0, 0, d.off + off or 0) else tmp_reg = e.vreg(e.tmpvar, 0, true, type) -- Reserve R0 for temporary relative offset dst_reg = e.vreg(var) -- Must rematerialize (if it was spilled by tmp var) e.emit(BPF.LD + BPF.IND + e.const_width[ffi.sizeof(type)], tmp_reg, dst_reg, 0, off or 0) end -- Finalize relative offset if mask then e.emit(BPF.ALU + BPF.AND + BPF.K, tmp_reg, 0, 0, mask) end if shift and shift ~= 0 then local op = BPF.LSH if shift < 0 then op = BPF.RSH shift = -shift end e.emit(BPF.ALU + op + BPF.K, tmp_reg, 0, 0, shift) end -- Add to base offset to turn it into effective address if dst_reg ~= tmp_reg then e.emit(BPF.ALU + BPF.ADD + BPF.X, dst_reg, tmp_reg, 0, 0) else e.emit(BPF.ALU + BPF.ADD + BPF.K, dst_reg, 0, 0, d.off) end -- Discard temporary allocations d.off = nil e.V[e.tmpvar].reg = nil end local function next_skip(e, var, off) local d = e.V[var].const if not d.off then local dst_reg = e.vreg(var) e.emit(BPF.ALU64 + BPF.ADD + BPF.K, dst_reg, 0, 0, off) else d.off = d.off + off end end local function skip_eth(e, dst) -- IP starts right after ETH header (fixed size) local d = e.V[dst].const d.off = d.off + ffi.sizeof('struct eth_t') end -- Export types M.type = function(typestr, t) t = t or {} t.__dissector=ffi.typeof(typestr) return t end M.skb = M.type('struct sk_buff', {source='ptr_to_ctx'}) M.pt_regs = M.type('struct pt_regs', {source='ptr_to_probe'}) M.pkt = M.type('struct eth_t', {off=0, source='ptr_to_pkt'}) -- skb needs special accessors -- M.eth = function (...) return dissector(ffi.typeof('struct eth_t'), ...) end M.dot1q = function (...) return dissector(ffi.typeof('struct dot1q_t'), ...) end M.arp = function (...) return dissector(ffi.typeof('struct arp_t'), ...) end M.icmp = function (...) return dissector(ffi.typeof('struct icmp_t'), ...) end M.ip = function (...) return dissector(ffi.typeof('struct ip_t'), ...) end M.icmp6 = function (...) return dissector(ffi.typeof('struct icmp6_t'), ...) end M.ip6 = function (...) return dissector(ffi.typeof('struct ip6_t'), ...) end M.ip6_opt = function (...) return dissector(ffi.typeof('struct ip6_opt_t'), ...) end M.udp = function (...) return dissector(ffi.typeof('struct udp_t'), ...) end M.tcp = function (...) return dissector(ffi.typeof('struct tcp_t'), ...) end M.vxlan = function (...) return dissector(ffi.typeof('struct vxlan_t'), ...) end M.data = function (...) return dissector(ffi.typeof('uint8_t'), ...) end M.net_off = function (...) return dissector(ffi.typeof('struct net_off_t'), ...) end -- Metatables ffi.metatype(ffi.typeof('struct eth_t'), { __index = { ip = skip_eth, ip6 = skip_eth, net_off = function (e, dst) next_skip(e, dst, BPF.NET_OFF) end, } }) ffi.metatype(ffi.typeof('struct net_off_t'), { __index = { ip = function () end, ip6 = function () end, } }) ffi.metatype(ffi.typeof('struct ip_t'), { __index = { -- Skip IP header length (stored as number of words) -- e.g. hlen = 5, Header Length = 5 x sizeof(u32) = 20 octets -- Mask first nibble and shift by 2 (multiplication by 4) icmp = function(e, dst) next_offset(e, dst, ffi.typeof('uint8_t'), 0, 0x0f, 2) end, udp = function(e, dst) next_offset(e, dst, ffi.typeof('uint8_t'), 0, 0x0f, 2) end, tcp = function(e, dst) next_offset(e, dst, ffi.typeof('uint8_t'), 0, 0x0f, 2) end, } }) ffi.metatype(ffi.typeof('struct ip6_t'), { __index = { -- Skip fixed IPv6 header length (40 bytes) -- The caller must check the value of `next_header` to skip any extension headers icmp6 = function(e, dst) next_skip(e, dst, ffi.sizeof('struct ip6_t'), 0) end, udp = function(e, dst) next_skip(e, dst, ffi.sizeof('struct ip6_t'), 0) end, tcp = function(e, dst) next_skip(e, dst, ffi.sizeof('struct ip6_t'), 0) end, ip6_opt = function(e, dst) next_skip(e, dst, ffi.sizeof('struct ip6_t'), 0) end, } }) local ip6_opt_ext_len_off = ffi.offsetof('struct ip6_opt_t', 'ext_len') ffi.metatype(ffi.typeof('struct ip6_opt_t'), { __index = { -- Skip IPv6 extension header length (field `ext_len`) icmp6 = function(e, dst) next_offset(e, dst, ffi.typeof('uint8_t'), ip6_opt_ext_len_off) end, udp = function(e, dst) next_offset(e, dst, ffi.typeof('uint8_t'), ip6_opt_ext_len_off) end, tcp = function(e, dst) next_offset(e, dst, ffi.typeof('uint8_t'), ip6_opt_ext_len_off) end, ip6_opt = function(e, dst) next_offset(e, dst, ffi.typeof('uint8_t'), ip6_opt_ext_len_off) end, } }) ffi.metatype(ffi.typeof('struct tcp_t'), { __index = { -- Skip TCP header length (stored as number of words) -- e.g. hlen = 5, Header Length = 5 x sizeof(u32) = 20 octets data = function(e, dst) next_offset(e, dst, ffi.typeof('uint8_t'), ffi.offsetof('struct tcp_t', 'offset'), 0xf0, -2) end, } }) ffi.metatype(ffi.typeof('struct udp_t'), { __index = { -- Skip UDP header length (8 octets) data = function(e, dst) next_skip(e, dst, ffi.sizeof('struct udp_t')) end, } }) -- Constants M.c = { eth = { -- Constants http://standards.ieee.org/regauth/ethertype ip = 0x0800, -- IP (v4) protocol ip6 = 0x86dd, -- IP (v6) protocol arp = 0x0806, -- Address resolution protocol revarp = 0x8035, -- Reverse addr resolution protocol vlan = 0x8100, -- IEEE 802.1Q VLAN tagging }, ip = { -- Reserved Addresses addr_any = 0x00000000, -- 0.0.0.0 addr_broadcast = 0xffffffff, -- 255.255.255.255 addr_loopback = 0x7f000001, -- 127.0.0.1 addr_mcast_all = 0xe0000001, -- 224.0.0.1 addr_mcast_local = 0xe00000ff, -- 224.0.0.255 -- Type of service (ip_tos), RFC 1349 ("obsoleted by RFC 2474") tos_default = 0x00, -- default tos_lowdelay = 0x10, -- low delay tos_throughput = 0x08, -- high throughput tos_reliability = 0x04, -- high reliability tos_lowcost = 0x02, -- low monetary cost - XXX tos_ect = 0x02, -- ECN-capable transport tos_ce = 0x01, -- congestion experienced -- Fragmentation flags (ip_off) rf = 0x8000, -- reserved df = 0x4000, -- don't fragment mf = 0x2000, -- more fragments (not last frag) offmask = 0x1fff, -- mask for fragment offset -- Time-to-live (ip_ttl), seconds ttl_default = 64, -- default ttl, RFC 1122, RFC 1340 ttl_max = 255, -- maximum ttl -- Protocol (ip_p) - http://www.iana.org/assignments/protocol-numbers proto_ip = 0, -- dummy for IP proto_hopopts = 0, -- IPv6 hop-by-hop options proto_icmp = 1, -- ICMP proto_igmp = 2, -- IGMP proto_ggp = 3, -- gateway-gateway protocol proto_ipip = 4, -- IP in IP proto_st = 5, -- ST datagram mode proto_tcp = 6, -- TCP proto_cbt = 7, -- CBT proto_egp = 8, -- exterior gateway protocol proto_igp = 9, -- interior gateway protocol proto_bbnrcc = 10, -- BBN RCC monitoring proto_nvp = 11, -- Network Voice Protocol proto_pup = 12, -- PARC universal packet proto_argus = 13, -- ARGUS proto_emcon = 14, -- EMCON proto_xnet = 15, -- Cross Net Debugger proto_chaos = 16, -- Chaos proto_udp = 17, -- UDP proto_mux = 18, -- multiplexing proto_dcnmeas = 19, -- DCN measurement proto_hmp = 20, -- Host Monitoring Protocol proto_prm = 21, -- Packet Radio Measurement proto_idp = 22, -- Xerox NS IDP proto_trunk1 = 23, -- Trunk-1 proto_trunk2 = 24, -- Trunk-2 proto_leaf1 = 25, -- Leaf-1 proto_leaf2 = 26, -- Leaf-2 proto_rdp = 27, -- "Reliable Datagram" proto proto_irtp = 28, -- Inet Reliable Transaction proto_tp = 29, -- ISO TP class 4 proto_netblt = 30, -- Bulk Data Transfer proto_mfpnsp = 31, -- MFE Network Services proto_meritinp= 32, -- Merit Internodal Protocol proto_sep = 33, -- Sequential Exchange proto proto_3pc = 34, -- Third Party Connect proto proto_idpr = 35, -- Interdomain Policy Route proto_xtp = 36, -- Xpress Transfer Protocol proto_ddp = 37, -- Datagram Delivery Proto proto_cmtp = 38, -- IDPR Ctrl Message Trans proto_tppp = 39, -- TP++ Transport Protocol proto_il = 40, -- IL Transport Protocol proto_ip6 = 41, -- IPv6 proto_sdrp = 42, -- Source Demand Routing proto_routing = 43, -- IPv6 routing header proto_fragment= 44, -- IPv6 fragmentation header proto_rsvp = 46, -- Reservation protocol proto_gre = 47, -- General Routing Encap proto_mhrp = 48, -- Mobile Host Routing proto_ena = 49, -- ENA proto_esp = 50, -- Encap Security Payload proto_ah = 51, -- Authentication Header proto_inlsp = 52, -- Integated Net Layer Sec proto_swipe = 53, -- SWIPE proto_narp = 54, -- NBMA Address Resolution proto_mobile = 55, -- Mobile IP, RFC 2004 proto_tlsp = 56, -- Transport Layer Security proto_skip = 57, -- SKIP proto_icmp6 = 58, -- ICMP for IPv6 proto_none = 59, -- IPv6 no next header proto_dstopts = 60, -- IPv6 destination options proto_anyhost = 61, -- any host internal proto proto_cftp = 62, -- CFTP proto_anynet = 63, -- any local network proto_expak = 64, -- SATNET and Backroom EXPAK proto_kryptolan = 65, -- Kryptolan proto_rvd = 66, -- MIT Remote Virtual Disk proto_ippc = 67, -- Inet Pluribus Packet Core proto_distfs = 68, -- any distributed fs proto_satmon = 69, -- SATNET Monitoring proto_visa = 70, -- VISA Protocol proto_ipcv = 71, -- Inet Packet Core Utility proto_cpnx = 72, -- Comp Proto Net Executive proto_cphb = 73, -- Comp Protocol Heart Beat proto_wsn = 74, -- Wang Span Network proto_pvp = 75, -- Packet Video Protocol proto_brsatmon= 76, -- Backroom SATNET Monitor proto_sunnd = 77, -- SUN ND Protocol proto_wbmon = 78, -- WIDEBAND Monitoring proto_wbexpak = 79, -- WIDEBAND EXPAK proto_eon = 80, -- ISO CNLP proto_vmtp = 81, -- Versatile Msg Transport proto_svmtp = 82, -- Secure VMTP proto_vines = 83, -- VINES proto_ttp = 84, -- TTP proto_nsfigp = 85, -- NSFNET-IGP proto_dgp = 86, -- Dissimilar Gateway Proto proto_tcf = 87, -- TCF proto_eigrp = 88, -- EIGRP proto_ospf = 89, -- Open Shortest Path First proto_spriterpc= 90, -- Sprite RPC Protocol proto_larp = 91, -- Locus Address Resolution proto_mtp = 92, -- Multicast Transport Proto proto_ax25 = 93, -- AX.25 Frames proto_ipipencap= 94, -- yet-another IP encap proto_micp = 95, -- Mobile Internet Ctrl proto_sccsp = 96, -- Semaphore Comm Sec Proto proto_etherip = 97, -- Ethernet in IPv4 proto_encap = 98, -- encapsulation header proto_anyenc = 99, -- private encryption scheme proto_gmtp = 100, -- GMTP proto_ifmp = 101, -- Ipsilon Flow Mgmt Proto proto_pnni = 102, -- PNNI over IP proto_pim = 103, -- Protocol Indep Multicast proto_aris = 104, -- ARIS proto_scps = 105, -- SCPS proto_qnx = 106, -- QNX proto_an = 107, -- Active Networks proto_ipcomp = 108, -- IP Payload Compression proto_snp = 109, -- Sitara Networks Protocol proto_compaqpeer= 110, -- Compaq Peer Protocol proto_ipxip = 111, -- IPX in IP proto_vrrp = 112, -- Virtual Router Redundancy proto_pgm = 113, -- PGM Reliable Transport proto_any0hop = 114, -- 0-hop protocol proto_l2tp = 115, -- Layer 2 Tunneling Proto proto_ddx = 116, -- D-II Data Exchange (DDX) proto_iatp = 117, -- Interactive Agent Xfer proto_stp = 118, -- Schedule Transfer Proto proto_srp = 119, -- SpectraLink Radio Proto proto_uti = 120, -- UTI proto_smp = 121, -- Simple Message Protocol proto_sm = 122, -- SM proto_ptp = 123, -- Performance Transparency proto_isis = 124, -- ISIS over IPv4 proto_fire = 125, -- FIRE proto_crtp = 126, -- Combat Radio Transport proto_crudp = 127, -- Combat Radio UDP proto_sscopmce= 128, -- SSCOPMCE proto_iplt = 129, -- IPLT proto_sps = 130, -- Secure Packet Shield proto_pipe = 131, -- Private IP Encap in IP proto_sctp = 132, -- Stream Ctrl Transmission proto_fc = 133, -- Fibre Channel proto_rsvpign = 134, -- RSVP-E2E-IGNORE proto_raw = 255, -- Raw IP packets proto_reserved= 255, -- Reserved }, } return M end) --[[ Copyright 2016 GitHub, Inc Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] return function() require("bcc.vendor.helpers") local standalone = rawget(_G, "BCC_STANDALONE") local progname = standalone or "bcc-probe" local function print_usage() io.stderr:write(string.format( "usage: %s [[--version|--verbose] --] path_to_script.lua [...]\n", progname)) os.exit(1) end local function print_version() local jit = require("jit") print(string.format("%s %s -- Running on %s (%s/%s)", progname, rawget(_G, "BCC_VERSION") or "HEAD", jit.version, jit.os, jit.arch)) os.exit(0) end while arg[1] and string.starts(arg[1], "-") do local k = table.remove(arg, 1) if k == "--" then break elseif standalone == nil and string.starts(k, "--so-path=") then rawset(_G, "LIBBCC_SO_PATH", string.lstrip(k, "--so-path=")) elseif k == "--llvm-debug" then rawset(_G, "LIBBCC_LLVM_DEBUG", 1) elseif k == "-V" or k == "--verbose" then log.enabled = true elseif k == "-v" or k == "--version" then print_version() else print_usage() end end local tracefile = table.remove(arg, 1) if not tracefile then print_usage() end local BPF = require("bcc.bpf") BPF.script_root(tracefile) local USDT = require("bcc.usdt") local utils = { argparse = require("bcc.vendor.argparse"), posix = require("bcc.vendor.posix"), USDT = USDT, } local command = dofile(tracefile) local res, err = xpcall(command, debug.traceback, BPF, utils) if not res and err ~= "interrupted!" then io.stderr:write("[ERROR] "..err.."\n") end BPF.cleanup() USDT.cleanup() return res, err end