diff options
author | Evadne Wu <ev@radi.ws> | 2019-07-03 21:18:28 +0100 |
---|---|---|
committer | James Every <devstopfix@gmail.com> | 2019-07-03 21:18:28 +0100 |
commit | 46375802f5c69b407140aab51854943e1c8e363c (patch) | |
tree | f0aa7c89fcad13ba423604c914ab7c1aa3a528e3 |
Prototype using erlexec
-rw-r--r-- | .formatter.exs | 4 | ||||
-rw-r--r-- | .gitignore | 26 | ||||
-rw-r--r-- | Makefile | 22 | ||||
-rw-r--r-- | README.md | 21 | ||||
-rw-r--r-- | lib/gen_magic.ex | 27 | ||||
-rw-r--r-- | lib/gen_magic/apprentice_server.ex | 100 | ||||
-rw-r--r-- | lib/gen_magic/configuration.ex | 34 | ||||
-rw-r--r-- | mix.exs | 28 | ||||
-rw-r--r-- | mix.lock | 5 | ||||
-rw-r--r-- | src/apprentice.c | 216 | ||||
-rw-r--r-- | test/gen_magic_test.exs | 8 | ||||
-rw-r--r-- | test/test_helper.exs | 1 |
12 files changed, 492 insertions, 0 deletions
diff --git a/.formatter.exs b/.formatter.exs new file mode 100644 index 0000000..d2cda26 --- /dev/null +++ b/.formatter.exs @@ -0,0 +1,4 @@ +# Used by "mix format" +[ + inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] +] diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..648beab --- /dev/null +++ b/.gitignore @@ -0,0 +1,26 @@ +# The directory Mix will write compiled artifacts to. +/_build/ +*.o +priv/ + +# If you run "mix test --cover", coverage assets end up here. +/cover/ + +# The directory Mix downloads your dependencies sources to. +/deps/ + +# Where third-party dependencies like ExDoc output generated docs. +/doc/ + +# Ignore .fetch files in case you like to edit your project deps locally. +/.fetch + +# If the VM crashes, it generates a dump, let's ignore it too. +erl_crash.dump + +# Also ignore archive artifacts (built via "mix archive.build"). +*.ez + +# Ignore package tarball (built via "mix hex.build"). +gen_magic-*.tar + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9e366c4 --- /dev/null +++ b/Makefile @@ -0,0 +1,22 @@ +CC = gcc +CFLAGS = -std=c99 -g -Wall +LDFLAGS = -lm -lmagic +HEADER_FILES = src +C_SOURCE_FILES = src/apprentice.c +OBJECT_FILES = $(C_SOURCE_FILES:.c=.o) +EXECUTABLE_DIRECTORY = priv +EXECUTABLE = $(EXECUTABLE_DIRECTORY)/apprentice + +all: $(C_SOURCE_FILES) $(EXECUTABLE) + +$(EXECUTABLE): $(OBJECT_FILES) $(EXECUTABLE_DIRECTORY) + $(CC) $(OBJECT_FILES) -o $@ $(LDFLAGS) + +$(EXECUTABLE_DIRECTORY): + mkdir -p $(EXECUTABLE_DIRECTORY) + +.o: + $(CC) $(CFLAGS) $< -o $@ + +clean: + rm -f $(EXECUTABLE) $(OBJECT_FILES) $(BEAM_FILES) diff --git a/README.md b/README.md new file mode 100644 index 0000000..9a4cf4d --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +# GenMagic + +**TODO: Add description** + +## Installation + +If [available in Hex](https://hex.pm/docs/publish), the package can be installed +by adding `gen_magic` to your list of dependencies in `mix.exs`: + +```elixir +def deps do + [ + {:gen_magic, "~> 0.1.0"} + ] +end +``` + +Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc) +and published on [HexDocs](https://hexdocs.pm). Once published, the docs can +be found at [https://hexdocs.pm/gen_magic](https://hexdocs.pm/gen_magic). + diff --git a/lib/gen_magic.ex b/lib/gen_magic.ex new file mode 100644 index 0000000..3711dfa --- /dev/null +++ b/lib/gen_magic.ex @@ -0,0 +1,27 @@ +defmodule GenMagic do + @moduledoc """ + Top-level namespace for GenMagic, the libMagic client for Elixir. + """ + + @doc """ + Top-level convenience function which creates an ad-hoc process. Usually + this will be wrapped in a pool established by the author of the application + that uses the library. + """ + def perform(path) do + {:ok, pid} = __MODULE__.ApprenticeServer.start_link() + result = GenServer.call(pid, {:perform, path}) + :ok = GenServer.stop(pid) + result + end + + def perform_infinite(path) do + {:ok, pid} = __MODULE__.ApprenticeServer.start_link() + perform_infinite(path, pid) + end + + defp perform_infinite(path, pid, count \\ 0) do + IO.inspect [count, GenServer.call(pid, {:perform, path})] + perform_infinite(path, pid, count + 1) + end +end diff --git a/lib/gen_magic/apprentice_server.ex b/lib/gen_magic/apprentice_server.ex new file mode 100644 index 0000000..5aee590 --- /dev/null +++ b/lib/gen_magic/apprentice_server.ex @@ -0,0 +1,100 @@ +defmodule GenMagic.ApprenticeServer do + @moduledoc """ + Provides access to the underlying libMagic client which performs file introspection. + """ + + alias GenMagic.Configuration + use GenServer + + def start_link(args \\ []) do + GenServer.start_link(__MODULE__, args) + end + + defmodule State do + defstruct pid: nil, ospid: nil, started: false, count: 0 + end + + def init(_) do + {:ok, %State{}} + end + + def handle_call(message, from, %{started: false} = state) do + case start(state) do + {:ok, state} -> handle_call(message, from, state) + {:error, _} = error -> {:reply, error, state} + end + end + + def handle_call({:perform, path}, _, state) do + max_count = Configuration.get_recycle_threshold() + + case {run(path, state), state.count + 1} do + {{:error, :worker_failure} = reply, _} -> + {:reply, reply, stop(state)} + {reply, ^max_count} -> + {:reply, reply, stop(state)} + {reply, count} -> + {:reply, reply, %{state | count: count}} + end + end + + def handle_info({:DOWN, _, :process, pid, :normal}, state) do + case state.pid do + ^pid -> {:noreply, %State{}} + _ -> {:noreply, state} + end + end + + defp start(%{started: false} = state) do + worker_command = Configuration.get_worker_command() + worker_options = [stdin: true, stdout: true, stderr: true, monitor: true] + worker_timeout = Configuration.get_worker_timeout() + {:ok, pid, ospid} = Exexec.run(worker_command, worker_options) + state = %{state | started: true, pid: pid, ospid: ospid} + + receive do + {:stdout, ^ospid, "ok\n"} -> {:ok, state} + {:stdout, ^ospid, "ok\r\n"} -> {:ok, state} + after worker_timeout -> + {:error, :worker_failure} + end + end + + defp stop(%{started: true} = state) do + :normal = Exexec.stop_and_wait(state.ospid) + %State{} + end + + defp run(path, %{pid: pid, ospid: ospid} = _state) do + worker_timeout = Configuration.get_worker_timeout() + :ok = Exexec.send(pid, "file; " <> path <> "\n") + + receive do + {stream, ^ospid, message} -> + handle_response(stream, message) + after worker_timeout -> + {:error, :worker_failure} + end + end + + defp handle_response(:stdout, "ok; " <> message) do + case message |> String.trim |> String.split("\t") do + [mime_type, encoding, content] -> {:ok, [mime_type: mime_type, encoding: encoding, content: content]} + _ -> {:error, :malformed_response} + end + end + + defp handle_response(:stderr, "error; " <> message) do + {:error, String.trim(message)} + end + + # TODO handle late responses under load + # 17:13:47.808 [error] GenServer #PID<0.199.0> terminating + # ** (FunctionClauseError) no function clause matching in GenMagic.ApprenticeServer.handle_info/2 + # (gen_magic) lib/gen_magic/apprentice_server.ex:41: GenMagic.ApprenticeServer.handle_info({:stderr, 12304, "\n"}, %GenMagic.ApprenticeServer.State{count: 2, ospid: 12304, pid: #PID<0.243.0>, started: true}) + # (stdlib) gen_server.erl:637: :gen_server.try_dispatch/4 + # (stdlib) gen_server.erl:711: :gen_server.handle_msg/6 + # (stdlib) proc_lib.erl:249: :proc_lib.init_p_do_apply/3 + # Last message: {:stderr, 12304, "\n"} + +end diff --git a/lib/gen_magic/configuration.ex b/lib/gen_magic/configuration.ex new file mode 100644 index 0000000..b815aad --- /dev/null +++ b/lib/gen_magic/configuration.ex @@ -0,0 +1,34 @@ +defmodule GenMagic.Configuration do + @moduledoc """ + Convenience module which returns information from configuration. + """ + + @otp_app Mix.Project.config[:app] + + def get_worker_command do + database_paths = get_database_paths() + worker_path = Path.join(:code.priv_dir(@otp_app), get_worker_name()) + worker_arguments = Enum.map(database_paths, & "--file " <> &1) + Enum.join([worker_path | worker_arguments], " ") + end + + def get_worker_name do + get_env(:worker_name) + end + + def get_worker_timeout do + get_env(:worker_timeout) + end + + def get_recycle_threshold do + get_env(:recycle_threshold) + end + + def get_database_paths do + get_env(:database_patterns) |> Enum.flat_map(&Path.wildcard/1) + end + + defp get_env(key) do + Application.get_env(@otp_app, key) + end +end @@ -0,0 +1,28 @@ +defmodule GenMagic.MixProject do + use Mix.Project + + def project do + [ + app: :gen_magic, + version: "0.1.0", + elixir: "~> 1.8", + start_permanent: Mix.env() == :prod, + compilers: [:elixir_make] ++ Mix.compilers, + deps: deps() + ] + end + + def application do + [ + extra_applications: [:logger] + ] + end + + defp deps do + [ + {:elixir_make, "~> 0.4", runtime: false}, + {:exexec, "~> 0.2.0"}, + {:erlexec, "~> 1.10.0"} + ] + end +end diff --git a/mix.lock b/mix.lock new file mode 100644 index 0000000..ac0e602 --- /dev/null +++ b/mix.lock @@ -0,0 +1,5 @@ +%{ + "elixir_make": {:hex, :elixir_make, "0.6.0", "38349f3e29aff4864352084fc736fa7fa0f2995a819a737554f7ebd28b85aaab", [:mix], [], "hexpm"}, + "erlexec": {:hex, :erlexec, "1.10.0", "cba7924cf526097d2082ceb0ec34e7db6bca2624b8f3867fb3fa89c4cf25d227", [:rebar3], [], "hexpm"}, + "exexec": {:hex, :exexec, "0.2.0", "a6ffc48cba3ac9420891b847e4dc7120692fb8c08c9e82220ebddc0bb8d96103", [:mix], [{:erlexec, "~> 1.10", [hex: :erlexec, repo: "hexpm", optional: false]}], "hexpm"}, +} diff --git a/src/apprentice.c b/src/apprentice.c new file mode 100644 index 0000000..04f08aa --- /dev/null +++ b/src/apprentice.c @@ -0,0 +1,216 @@ +#include <errno.h> +#include <getopt.h> +#include <libgen.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <magic.h> + +#define USAGE "--file <path/to/magic.mgc> [--file <path/to/custom.mgc> ...]" +#define DELIMINTER "\t" + +#define ANSI_INFO "\x1b[37m" // gray +#define ANSI_OK "\x1b[32m" // green +#define ANSI_ERROR "\x1b[31m" // red +#define ANSI_IGNORE "\x1b[90m" // red +#define ANSI_RESET "\x1b[0m" + +#define MAGIC_FLAGS_COMMON (MAGIC_CHECK|MAGIC_ERROR) +magic_t magic_setup(int flags); + +void setup_environment(); +void setup_options(int argc, char **argv); +void setup_options_file(char *optarg); +void setup_system(); +void process_line(char *line); +void process_file(char *path); +void print_info(const char *format, ...); +void print_ok(const char *format, ...); +void print_error(const char *format, ...); + +struct file { + char *path; + struct file *next; +}; + +static struct file* magic_database; +static magic_t magic_mime_type; // MAGIC_MIME_TYPE +static magic_t magic_mime_encoding; // MAGIC_MIME_ENCODING +static magic_t magic_type_name; // MAGIC_NONE + +int main (int argc, char **argv) { + setup_environment(); + setup_options(argc, argv); + setup_system(); + printf("ok\n"); + fflush(stdout); + + char line[4096]; + while (fgets(line, 4096, stdin)) { + process_line(line); + } + + return 0; +} + +void setup_environment() { + // setbuf(stdout, NULL); + opterr = 0; +} + +void setup_options(int argc, char **argv) { + const char *option_string = "f:"; + static struct option long_options[] = { + {"file", required_argument, 0, 'f'}, + {0, 0, 0, 0} + }; + + int option_character; + while (1) { + int option_index = 0; + option_character = getopt_long(argc, argv, option_string, long_options, &option_index); + if (-1 == option_character) { + break; + } + switch (option_character) { + case 'f': { + setup_options_file(optarg); + break; + } + case '?': + default: { + print_info("%s %s\n", basename(argv[0]), USAGE); + exit(1); + break; + } + } + } +} + +void setup_options_file(char *optarg) { + print_info("Magic Database: %s", optarg); + if (0 != access(optarg, R_OK)) { + print_error("no_database"); + exit(1); + } + struct file *next = malloc(sizeof(struct file)); + next->path = strdup(optarg); + next->next = magic_database; + magic_database = next; +} + +void setup_system() { + print_info("Starting System"); + magic_mime_encoding = magic_setup(MAGIC_FLAGS_COMMON|MAGIC_MIME_ENCODING); + magic_mime_type = magic_setup(MAGIC_FLAGS_COMMON|MAGIC_MIME_TYPE); + magic_type_name = magic_setup(MAGIC_FLAGS_COMMON|MAGIC_NONE); +} + +magic_t magic_setup(int flags) { + magic_t magic = magic_open(flags); + struct file *current_database = magic_database; + while (current_database) { + if (isatty(STDERR_FILENO)) { + fprintf(stderr, ANSI_IGNORE); + } + magic_load(magic, current_database->path); + if (isatty(STDERR_FILENO)) { + fprintf(stderr, ANSI_RESET); + } + current_database = current_database->next; + } + return magic; +} + +void process_line(char *line) { + char path[4096]; + + if (0 == strcmp(line, "exit\n")) { + exit(0); + } + + if (1 != sscanf(line, "file; %[^\n]s", path)) { + print_error("bad_path"); + return; + } + + if (0 != access(path, R_OK)) { + print_error("no_file"); + return; + } + + process_file(path); +} + +void process_file(char *path) { + const char *mime_type_result = magic_file(magic_mime_type, path); + const char *mime_type_error = magic_error(magic_mime_type); + const char *mine_encoding_result = magic_file(magic_mime_encoding, path); + const char *mine_encoding_error = magic_error(magic_mime_encoding); + const char *type_name_result = magic_file(magic_type_name, path); + const char *type_name_error = magic_error(magic_type_name); + + if (mime_type_error) { + print_error(mime_type_error); + return; + } + + if (mine_encoding_error) { + print_error(mine_encoding_error); + return; + } + + if (type_name_error) { + print_error(type_name_error); + return; + } + + print_ok("%s%s%s%s%s", mime_type_result, DELIMINTER, mine_encoding_result, DELIMINTER, type_name_result); +} + +void print_info(const char *format, ...) { + if (!isatty(STDOUT_FILENO)) { + return; + } + + printf(ANSI_INFO "[INFO] " ANSI_RESET); + va_list arguments; + va_start(arguments, format); + vprintf(format, arguments); + va_end(arguments); + printf("\n"); +} + +void print_ok(const char *format, ...) { + if (isatty(STDOUT_FILENO)) { + printf(ANSI_OK "[OK] " ANSI_RESET); + } else { + printf("ok; "); + } + + va_list arguments; + va_start(arguments, format); + vprintf(format, arguments); + va_end(arguments); + printf("\n"); + fflush(stdout); +} + +void print_error(const char *format, ...) { + if (isatty(STDERR_FILENO)) { + fprintf(stderr, ANSI_ERROR "[ERROR] " ANSI_RESET); + } else { + fprintf(stderr, "error; "); + } + + va_list arguments; + va_start(arguments, format); + vfprintf(stderr, format, arguments); + va_end(arguments); + fprintf(stderr, "\n"); + fflush(stderr); +} diff --git a/test/gen_magic_test.exs b/test/gen_magic_test.exs new file mode 100644 index 0000000..675dd81 --- /dev/null +++ b/test/gen_magic_test.exs @@ -0,0 +1,8 @@ +defmodule GenMagicTest do + use ExUnit.Case + doctest GenMagic + + test "greets the world" do + assert GenMagic.hello() == :world + end +end diff --git a/test/test_helper.exs b/test/test_helper.exs new file mode 100644 index 0000000..869559e --- /dev/null +++ b/test/test_helper.exs @@ -0,0 +1 @@ +ExUnit.start() |