summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvadne Wu <ev@radi.ws>2019-07-03 21:18:28 +0100
committerJames Every <devstopfix@gmail.com>2019-07-03 21:18:28 +0100
commit46375802f5c69b407140aab51854943e1c8e363c (patch)
treef0aa7c89fcad13ba423604c914ab7c1aa3a528e3
Prototype using erlexec
-rw-r--r--.formatter.exs4
-rw-r--r--.gitignore26
-rw-r--r--Makefile22
-rw-r--r--README.md21
-rw-r--r--lib/gen_magic.ex27
-rw-r--r--lib/gen_magic/apprentice_server.ex100
-rw-r--r--lib/gen_magic/configuration.ex34
-rw-r--r--mix.exs28
-rw-r--r--mix.lock5
-rw-r--r--src/apprentice.c216
-rw-r--r--test/gen_magic_test.exs8
-rw-r--r--test/test_helper.exs1
12 files changed, 492 insertions, 0 deletions
diff --git a/.formatter.exs b/.formatter.exs
new file mode 100644
index 0000000..d2cda26
--- /dev/null
+++ b/.formatter.exs
@@ -0,0 +1,4 @@
+# Used by "mix format"
+[
+ inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
+]
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..648beab
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,26 @@
+# The directory Mix will write compiled artifacts to.
+/_build/
+*.o
+priv/
+
+# If you run "mix test --cover", coverage assets end up here.
+/cover/
+
+# The directory Mix downloads your dependencies sources to.
+/deps/
+
+# Where third-party dependencies like ExDoc output generated docs.
+/doc/
+
+# Ignore .fetch files in case you like to edit your project deps locally.
+/.fetch
+
+# If the VM crashes, it generates a dump, let's ignore it too.
+erl_crash.dump
+
+# Also ignore archive artifacts (built via "mix archive.build").
+*.ez
+
+# Ignore package tarball (built via "mix hex.build").
+gen_magic-*.tar
+
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..9e366c4
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,22 @@
+CC = gcc
+CFLAGS = -std=c99 -g -Wall
+LDFLAGS = -lm -lmagic
+HEADER_FILES = src
+C_SOURCE_FILES = src/apprentice.c
+OBJECT_FILES = $(C_SOURCE_FILES:.c=.o)
+EXECUTABLE_DIRECTORY = priv
+EXECUTABLE = $(EXECUTABLE_DIRECTORY)/apprentice
+
+all: $(C_SOURCE_FILES) $(EXECUTABLE)
+
+$(EXECUTABLE): $(OBJECT_FILES) $(EXECUTABLE_DIRECTORY)
+ $(CC) $(OBJECT_FILES) -o $@ $(LDFLAGS)
+
+$(EXECUTABLE_DIRECTORY):
+ mkdir -p $(EXECUTABLE_DIRECTORY)
+
+.o:
+ $(CC) $(CFLAGS) $< -o $@
+
+clean:
+ rm -f $(EXECUTABLE) $(OBJECT_FILES) $(BEAM_FILES)
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9a4cf4d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,21 @@
+# GenMagic
+
+**TODO: Add description**
+
+## Installation
+
+If [available in Hex](https://hex.pm/docs/publish), the package can be installed
+by adding `gen_magic` to your list of dependencies in `mix.exs`:
+
+```elixir
+def deps do
+ [
+ {:gen_magic, "~> 0.1.0"}
+ ]
+end
+```
+
+Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc)
+and published on [HexDocs](https://hexdocs.pm). Once published, the docs can
+be found at [https://hexdocs.pm/gen_magic](https://hexdocs.pm/gen_magic).
+
diff --git a/lib/gen_magic.ex b/lib/gen_magic.ex
new file mode 100644
index 0000000..3711dfa
--- /dev/null
+++ b/lib/gen_magic.ex
@@ -0,0 +1,27 @@
+defmodule GenMagic do
+ @moduledoc """
+ Top-level namespace for GenMagic, the libMagic client for Elixir.
+ """
+
+ @doc """
+ Top-level convenience function which creates an ad-hoc process. Usually
+ this will be wrapped in a pool established by the author of the application
+ that uses the library.
+ """
+ def perform(path) do
+ {:ok, pid} = __MODULE__.ApprenticeServer.start_link()
+ result = GenServer.call(pid, {:perform, path})
+ :ok = GenServer.stop(pid)
+ result
+ end
+
+ def perform_infinite(path) do
+ {:ok, pid} = __MODULE__.ApprenticeServer.start_link()
+ perform_infinite(path, pid)
+ end
+
+ defp perform_infinite(path, pid, count \\ 0) do
+ IO.inspect [count, GenServer.call(pid, {:perform, path})]
+ perform_infinite(path, pid, count + 1)
+ end
+end
diff --git a/lib/gen_magic/apprentice_server.ex b/lib/gen_magic/apprentice_server.ex
new file mode 100644
index 0000000..5aee590
--- /dev/null
+++ b/lib/gen_magic/apprentice_server.ex
@@ -0,0 +1,100 @@
+defmodule GenMagic.ApprenticeServer do
+ @moduledoc """
+ Provides access to the underlying libMagic client which performs file introspection.
+ """
+
+ alias GenMagic.Configuration
+ use GenServer
+
+ def start_link(args \\ []) do
+ GenServer.start_link(__MODULE__, args)
+ end
+
+ defmodule State do
+ defstruct pid: nil, ospid: nil, started: false, count: 0
+ end
+
+ def init(_) do
+ {:ok, %State{}}
+ end
+
+ def handle_call(message, from, %{started: false} = state) do
+ case start(state) do
+ {:ok, state} -> handle_call(message, from, state)
+ {:error, _} = error -> {:reply, error, state}
+ end
+ end
+
+ def handle_call({:perform, path}, _, state) do
+ max_count = Configuration.get_recycle_threshold()
+
+ case {run(path, state), state.count + 1} do
+ {{:error, :worker_failure} = reply, _} ->
+ {:reply, reply, stop(state)}
+ {reply, ^max_count} ->
+ {:reply, reply, stop(state)}
+ {reply, count} ->
+ {:reply, reply, %{state | count: count}}
+ end
+ end
+
+ def handle_info({:DOWN, _, :process, pid, :normal}, state) do
+ case state.pid do
+ ^pid -> {:noreply, %State{}}
+ _ -> {:noreply, state}
+ end
+ end
+
+ defp start(%{started: false} = state) do
+ worker_command = Configuration.get_worker_command()
+ worker_options = [stdin: true, stdout: true, stderr: true, monitor: true]
+ worker_timeout = Configuration.get_worker_timeout()
+ {:ok, pid, ospid} = Exexec.run(worker_command, worker_options)
+ state = %{state | started: true, pid: pid, ospid: ospid}
+
+ receive do
+ {:stdout, ^ospid, "ok\n"} -> {:ok, state}
+ {:stdout, ^ospid, "ok\r\n"} -> {:ok, state}
+ after worker_timeout ->
+ {:error, :worker_failure}
+ end
+ end
+
+ defp stop(%{started: true} = state) do
+ :normal = Exexec.stop_and_wait(state.ospid)
+ %State{}
+ end
+
+ defp run(path, %{pid: pid, ospid: ospid} = _state) do
+ worker_timeout = Configuration.get_worker_timeout()
+ :ok = Exexec.send(pid, "file; " <> path <> "\n")
+
+ receive do
+ {stream, ^ospid, message} ->
+ handle_response(stream, message)
+ after worker_timeout ->
+ {:error, :worker_failure}
+ end
+ end
+
+ defp handle_response(:stdout, "ok; " <> message) do
+ case message |> String.trim |> String.split("\t") do
+ [mime_type, encoding, content] -> {:ok, [mime_type: mime_type, encoding: encoding, content: content]}
+ _ -> {:error, :malformed_response}
+ end
+ end
+
+ defp handle_response(:stderr, "error; " <> message) do
+ {:error, String.trim(message)}
+ end
+
+ # TODO handle late responses under load
+ # 17:13:47.808 [error] GenServer #PID<0.199.0> terminating
+ # ** (FunctionClauseError) no function clause matching in GenMagic.ApprenticeServer.handle_info/2
+ # (gen_magic) lib/gen_magic/apprentice_server.ex:41: GenMagic.ApprenticeServer.handle_info({:stderr, 12304, "\n"}, %GenMagic.ApprenticeServer.State{count: 2, ospid: 12304, pid: #PID<0.243.0>, started: true})
+ # (stdlib) gen_server.erl:637: :gen_server.try_dispatch/4
+ # (stdlib) gen_server.erl:711: :gen_server.handle_msg/6
+ # (stdlib) proc_lib.erl:249: :proc_lib.init_p_do_apply/3
+ # Last message: {:stderr, 12304, "\n"}
+
+end
diff --git a/lib/gen_magic/configuration.ex b/lib/gen_magic/configuration.ex
new file mode 100644
index 0000000..b815aad
--- /dev/null
+++ b/lib/gen_magic/configuration.ex
@@ -0,0 +1,34 @@
+defmodule GenMagic.Configuration do
+ @moduledoc """
+ Convenience module which returns information from configuration.
+ """
+
+ @otp_app Mix.Project.config[:app]
+
+ def get_worker_command do
+ database_paths = get_database_paths()
+ worker_path = Path.join(:code.priv_dir(@otp_app), get_worker_name())
+ worker_arguments = Enum.map(database_paths, & "--file " <> &1)
+ Enum.join([worker_path | worker_arguments], " ")
+ end
+
+ def get_worker_name do
+ get_env(:worker_name)
+ end
+
+ def get_worker_timeout do
+ get_env(:worker_timeout)
+ end
+
+ def get_recycle_threshold do
+ get_env(:recycle_threshold)
+ end
+
+ def get_database_paths do
+ get_env(:database_patterns) |> Enum.flat_map(&Path.wildcard/1)
+ end
+
+ defp get_env(key) do
+ Application.get_env(@otp_app, key)
+ end
+end
diff --git a/mix.exs b/mix.exs
new file mode 100644
index 0000000..4ad7c93
--- /dev/null
+++ b/mix.exs
@@ -0,0 +1,28 @@
+defmodule GenMagic.MixProject do
+ use Mix.Project
+
+ def project do
+ [
+ app: :gen_magic,
+ version: "0.1.0",
+ elixir: "~> 1.8",
+ start_permanent: Mix.env() == :prod,
+ compilers: [:elixir_make] ++ Mix.compilers,
+ deps: deps()
+ ]
+ end
+
+ def application do
+ [
+ extra_applications: [:logger]
+ ]
+ end
+
+ defp deps do
+ [
+ {:elixir_make, "~> 0.4", runtime: false},
+ {:exexec, "~> 0.2.0"},
+ {:erlexec, "~> 1.10.0"}
+ ]
+ end
+end
diff --git a/mix.lock b/mix.lock
new file mode 100644
index 0000000..ac0e602
--- /dev/null
+++ b/mix.lock
@@ -0,0 +1,5 @@
+%{
+ "elixir_make": {:hex, :elixir_make, "0.6.0", "38349f3e29aff4864352084fc736fa7fa0f2995a819a737554f7ebd28b85aaab", [:mix], [], "hexpm"},
+ "erlexec": {:hex, :erlexec, "1.10.0", "cba7924cf526097d2082ceb0ec34e7db6bca2624b8f3867fb3fa89c4cf25d227", [:rebar3], [], "hexpm"},
+ "exexec": {:hex, :exexec, "0.2.0", "a6ffc48cba3ac9420891b847e4dc7120692fb8c08c9e82220ebddc0bb8d96103", [:mix], [{:erlexec, "~> 1.10", [hex: :erlexec, repo: "hexpm", optional: false]}], "hexpm"},
+}
diff --git a/src/apprentice.c b/src/apprentice.c
new file mode 100644
index 0000000..04f08aa
--- /dev/null
+++ b/src/apprentice.c
@@ -0,0 +1,216 @@
+#include <errno.h>
+#include <getopt.h>
+#include <libgen.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <magic.h>
+
+#define USAGE "--file <path/to/magic.mgc> [--file <path/to/custom.mgc> ...]"
+#define DELIMINTER "\t"
+
+#define ANSI_INFO "\x1b[37m" // gray
+#define ANSI_OK "\x1b[32m" // green
+#define ANSI_ERROR "\x1b[31m" // red
+#define ANSI_IGNORE "\x1b[90m" // red
+#define ANSI_RESET "\x1b[0m"
+
+#define MAGIC_FLAGS_COMMON (MAGIC_CHECK|MAGIC_ERROR)
+magic_t magic_setup(int flags);
+
+void setup_environment();
+void setup_options(int argc, char **argv);
+void setup_options_file(char *optarg);
+void setup_system();
+void process_line(char *line);
+void process_file(char *path);
+void print_info(const char *format, ...);
+void print_ok(const char *format, ...);
+void print_error(const char *format, ...);
+
+struct file {
+ char *path;
+ struct file *next;
+};
+
+static struct file* magic_database;
+static magic_t magic_mime_type; // MAGIC_MIME_TYPE
+static magic_t magic_mime_encoding; // MAGIC_MIME_ENCODING
+static magic_t magic_type_name; // MAGIC_NONE
+
+int main (int argc, char **argv) {
+ setup_environment();
+ setup_options(argc, argv);
+ setup_system();
+ printf("ok\n");
+ fflush(stdout);
+
+ char line[4096];
+ while (fgets(line, 4096, stdin)) {
+ process_line(line);
+ }
+
+ return 0;
+}
+
+void setup_environment() {
+ // setbuf(stdout, NULL);
+ opterr = 0;
+}
+
+void setup_options(int argc, char **argv) {
+ const char *option_string = "f:";
+ static struct option long_options[] = {
+ {"file", required_argument, 0, 'f'},
+ {0, 0, 0, 0}
+ };
+
+ int option_character;
+ while (1) {
+ int option_index = 0;
+ option_character = getopt_long(argc, argv, option_string, long_options, &option_index);
+ if (-1 == option_character) {
+ break;
+ }
+ switch (option_character) {
+ case 'f': {
+ setup_options_file(optarg);
+ break;
+ }
+ case '?':
+ default: {
+ print_info("%s %s\n", basename(argv[0]), USAGE);
+ exit(1);
+ break;
+ }
+ }
+ }
+}
+
+void setup_options_file(char *optarg) {
+ print_info("Magic Database: %s", optarg);
+ if (0 != access(optarg, R_OK)) {
+ print_error("no_database");
+ exit(1);
+ }
+ struct file *next = malloc(sizeof(struct file));
+ next->path = strdup(optarg);
+ next->next = magic_database;
+ magic_database = next;
+}
+
+void setup_system() {
+ print_info("Starting System");
+ magic_mime_encoding = magic_setup(MAGIC_FLAGS_COMMON|MAGIC_MIME_ENCODING);
+ magic_mime_type = magic_setup(MAGIC_FLAGS_COMMON|MAGIC_MIME_TYPE);
+ magic_type_name = magic_setup(MAGIC_FLAGS_COMMON|MAGIC_NONE);
+}
+
+magic_t magic_setup(int flags) {
+ magic_t magic = magic_open(flags);
+ struct file *current_database = magic_database;
+ while (current_database) {
+ if (isatty(STDERR_FILENO)) {
+ fprintf(stderr, ANSI_IGNORE);
+ }
+ magic_load(magic, current_database->path);
+ if (isatty(STDERR_FILENO)) {
+ fprintf(stderr, ANSI_RESET);
+ }
+ current_database = current_database->next;
+ }
+ return magic;
+}
+
+void process_line(char *line) {
+ char path[4096];
+
+ if (0 == strcmp(line, "exit\n")) {
+ exit(0);
+ }
+
+ if (1 != sscanf(line, "file; %[^\n]s", path)) {
+ print_error("bad_path");
+ return;
+ }
+
+ if (0 != access(path, R_OK)) {
+ print_error("no_file");
+ return;
+ }
+
+ process_file(path);
+}
+
+void process_file(char *path) {
+ const char *mime_type_result = magic_file(magic_mime_type, path);
+ const char *mime_type_error = magic_error(magic_mime_type);
+ const char *mine_encoding_result = magic_file(magic_mime_encoding, path);
+ const char *mine_encoding_error = magic_error(magic_mime_encoding);
+ const char *type_name_result = magic_file(magic_type_name, path);
+ const char *type_name_error = magic_error(magic_type_name);
+
+ if (mime_type_error) {
+ print_error(mime_type_error);
+ return;
+ }
+
+ if (mine_encoding_error) {
+ print_error(mine_encoding_error);
+ return;
+ }
+
+ if (type_name_error) {
+ print_error(type_name_error);
+ return;
+ }
+
+ print_ok("%s%s%s%s%s", mime_type_result, DELIMINTER, mine_encoding_result, DELIMINTER, type_name_result);
+}
+
+void print_info(const char *format, ...) {
+ if (!isatty(STDOUT_FILENO)) {
+ return;
+ }
+
+ printf(ANSI_INFO "[INFO] " ANSI_RESET);
+ va_list arguments;
+ va_start(arguments, format);
+ vprintf(format, arguments);
+ va_end(arguments);
+ printf("\n");
+}
+
+void print_ok(const char *format, ...) {
+ if (isatty(STDOUT_FILENO)) {
+ printf(ANSI_OK "[OK] " ANSI_RESET);
+ } else {
+ printf("ok; ");
+ }
+
+ va_list arguments;
+ va_start(arguments, format);
+ vprintf(format, arguments);
+ va_end(arguments);
+ printf("\n");
+ fflush(stdout);
+}
+
+void print_error(const char *format, ...) {
+ if (isatty(STDERR_FILENO)) {
+ fprintf(stderr, ANSI_ERROR "[ERROR] " ANSI_RESET);
+ } else {
+ fprintf(stderr, "error; ");
+ }
+
+ va_list arguments;
+ va_start(arguments, format);
+ vfprintf(stderr, format, arguments);
+ va_end(arguments);
+ fprintf(stderr, "\n");
+ fflush(stderr);
+}
diff --git a/test/gen_magic_test.exs b/test/gen_magic_test.exs
new file mode 100644
index 0000000..675dd81
--- /dev/null
+++ b/test/gen_magic_test.exs
@@ -0,0 +1,8 @@
+defmodule GenMagicTest do
+ use ExUnit.Case
+ doctest GenMagic
+
+ test "greets the world" do
+ assert GenMagic.hello() == :world
+ end
+end
diff --git a/test/test_helper.exs b/test/test_helper.exs
new file mode 100644
index 0000000..869559e
--- /dev/null
+++ b/test/test_helper.exs
@@ -0,0 +1 @@
+ExUnit.start()