diff options
Diffstat (limited to 'priv/python/pyerlai/genservers')
-rw-r--r-- | priv/python/pyerlai/genservers/__init__.py | 0 | ||||
-rw-r--r-- | priv/python/pyerlai/genservers/clip_ask.py | 65 | ||||
-rw-r--r-- | priv/python/pyerlai/genservers/image_to_text.py | 43 | ||||
-rw-r--r-- | priv/python/pyerlai/genservers/pil.py | 21 |
4 files changed, 129 insertions, 0 deletions
diff --git a/priv/python/pyerlai/genservers/__init__.py b/priv/python/pyerlai/genservers/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/priv/python/pyerlai/genservers/__init__.py diff --git a/priv/python/pyerlai/genservers/clip_ask.py b/priv/python/pyerlai/genservers/clip_ask.py new file mode 100644 index 0000000..0cb40dc --- /dev/null +++ b/priv/python/pyerlai/genservers/clip_ask.py @@ -0,0 +1,65 @@ +from term import Atom +from pyrlang.gen.server import GenServer +from pyrlang.gen.decorators import call, cast, info +from PIL import Image +import io +import sys + +from transformers import CLIPProcessor, CLIPModel + +PROMPTS=[ +"photo", +"dog photo", +"cat photo", +"food photo", +"meme", +"painting", +"drawing", +"selfie", +"portrait photography", +"tv capture", +"screenshot", +"terminal/ssh/console screenshot", +"twitter screenshot", +"chat log", +"4chan screenshot", +"scanned document", +"book picture"] + +class ClipAsk(GenServer): + def __init__(self, node, logger) -> None: + super().__init__() + node.register_name(self, Atom('clip_ask')) + self.logger = logger + self.model = None + self.processor = None + self.ready = False + print("clipask: starting") + mypid = self.pid_ + node.send_nowait(mypid, mypid, "register") + self.logger.info("initialized process: clip_ask.") + + @info(0, lambda msg: msg == 'register') + def setup(self, msg): + print("clipask: doing setup") + self.logger.info("image_to_text_vit_gpt2: setup...") + self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") + self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") + self.logger.info("clip_ask: setup finished.") + self.ready = True + print("clipask: ready") + + @call(1, lambda msg: type(msg) == tuple and msg[0] == Atom("run")) + def run(self, msg): + if self.ready: + self.logger.info("clip_ask: inference") + image = Image.open(io.BytesIO(msg[1])) + inputs = self.processor(text=PROMPTS, images=image, return_tensors="pt", padding=True) + outputs = self.model(**inputs) + logits_per_image = outputs.logits_per_image + probs = logits_per_image.softmax(dim=1) + labels_with_probs = dict(zip(PROMPTS, probs.detach().numpy()[0])) + results = dict(sorted(labels_with_probs.items(), key=lambda item: item[1], reverse=True)) + return (Atom('ok'), {k: v.item() for k, v in results.items()}) + else: + return (Atom('error'), Atom('not_ready')) diff --git a/priv/python/pyerlai/genservers/image_to_text.py b/priv/python/pyerlai/genservers/image_to_text.py new file mode 100644 index 0000000..a1abc74 --- /dev/null +++ b/priv/python/pyerlai/genservers/image_to_text.py @@ -0,0 +1,43 @@ +from term import Atom +from pyrlang.gen.server import GenServer +from pyrlang.gen.decorators import call, cast, info +from PIL import Image +from transformers import GPT2TokenizerFast, ViTImageProcessor, VisionEncoderDecoderModel +import io + +class ImageToTextViTGPT2(GenServer): + def __init__(self, node, logger) -> None: + super().__init__() + node.register_name(self, Atom('image_to_text_vit_gpt2')) + self.logger = logger + self.model = None + self.tokenizer = None + self.image_processor = None + self.ready = False + print("lol") + mypid = self.pid_ + node.send_nowait(mypid, mypid, "register") + self.logger.info("initialized process: text_to_image_vit_gpt2.") + + @info(0, lambda msg: msg == 'register') + def setup(self, msg): + print("doing setup") + self.logger.info("image_to_text_vit_gpt2: setup...") + self.model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning") + self.tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning") + self.image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning") + self.logger.info("text_to_image_vit_gpt2: setup finished.") + self.ready = True + print("ready") + + @call(1, lambda msg: type(msg) == tuple and msg[0] == Atom("run")) + def run(self, msg): + if self.ready: + self.logger.info("image_to_text_vit_gpt2: inference") + image = Image.open(io.BytesIO(msg[1])).convert('RGB') + pixel_values = self.image_processor(image, return_tensors="pt").pixel_values + generated_ids = self.model.generate(pixel_values, max_new_tokens=40) + generated_text = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] + return (Atom('ok'), generated_text) + else: + return (Atom('error'), Atom('not_ready')) diff --git a/priv/python/pyerlai/genservers/pil.py b/priv/python/pyerlai/genservers/pil.py new file mode 100644 index 0000000..13db32f --- /dev/null +++ b/priv/python/pyerlai/genservers/pil.py @@ -0,0 +1,21 @@ +from term import Atom +from pyrlang.gen.server import GenServer +from pyrlang.gen.decorators import call, cast, info +from PIL import Image +import io +import sys + +class Pil(GenServer): + def __init__(self, node, logger) -> None: + super().__init__() + node.register_name(self, Atom('pil')) + self.logger = logger + print("clipask: starting") + mypid = self.pid_ + self.logger.info("initialized process: pil.") + + @call(1, lambda msg: type(msg) == tuple and msg[0] == Atom("run")) + def run(self, msg): + self.logger.info("clip_ask: inference") + image = Image.open(io.BytesIO(msg[1])) + return (Atom('ok'), {"width": image.width, "height": image.height, "animated": getattr(image, "is_animated", False)}) |