212 lines
8.9 KiB
Python
212 lines
8.9 KiB
Python
import sys
|
|
import cv2
|
|
import numpy as np
|
|
from PIL import Image
|
|
import objc
|
|
import threading
|
|
import base64
|
|
import os
|
|
from dotenv import load_dotenv
|
|
from openai import OpenAI
|
|
from AppKit import (
|
|
NSApplication, NSApp, NSWindow, NSView, NSImageView, NSButton,
|
|
NSStackView, NSImage, NSBitmapImageRep, NSBackingStoreBuffered,
|
|
NSWindowStyleMaskTitled, NSWindowStyleMaskClosable,
|
|
NSWindowStyleMaskResizable, NSWindowStyleMaskMiniaturizable,
|
|
NSTimer, NSMakeSize, NSMakeRect, NSObject, NSLog,
|
|
NSUserInterfaceLayoutOrientationVertical, NSLayoutAttributeCenterX,
|
|
NSLayoutAttributeCenterY, NSLayoutAttributeWidth, NSLayoutAttributeHeight,
|
|
NSLayoutAttributeTop, NSLayoutAttributeBottom, NSLayoutAttributeLeading,
|
|
NSLayoutAttributeTrailing, NSScrollView, NSTextView,
|
|
NSApplicationActivationPolicyRegular
|
|
)
|
|
from Foundation import NSObject, NSTimer, NSDate
|
|
|
|
load_dotenv()
|
|
|
|
class ItemSenseApp(NSObject):
|
|
def applicationDidFinishLaunching_(self, notification):
|
|
self.window = NSWindow.alloc().initWithContentRect_styleMask_backing_defer_(
|
|
NSMakeRect(0, 0, 800, 700), # Increased height for text view
|
|
NSWindowStyleMaskTitled | NSWindowStyleMaskClosable | NSWindowStyleMaskResizable | NSWindowStyleMaskMiniaturizable,
|
|
NSBackingStoreBuffered,
|
|
False
|
|
)
|
|
self.window.setTitle_("ItemSense")
|
|
self.window.center()
|
|
|
|
# Main content view (StackView for layout)
|
|
self.stack_view = NSStackView.alloc().init()
|
|
self.stack_view.setOrientation_(NSUserInterfaceLayoutOrientationVertical)
|
|
self.stack_view.setSpacing_(10)
|
|
self.stack_view.setEdgeInsets_((10, 10, 10, 10))
|
|
self.window.setContentView_(self.stack_view)
|
|
|
|
# Image View for Camera Feed
|
|
self.image_view = NSImageView.alloc().init()
|
|
self.image_view.setImageScaling_(0) # NSImageScaleProportionallyDown
|
|
# Add constraint for height logic later if needed, but stackview handles it well enough for basic
|
|
self.stack_view.addView_inGravity_(self.image_view, 1) # Top gravity
|
|
|
|
# Result View (Scrollable Text)
|
|
self.scroll_view = NSScrollView.alloc().init()
|
|
self.scroll_view.setHasVerticalScroller_(True)
|
|
self.scroll_view.setBorderType_(2) # NSBezelBorder
|
|
|
|
# Determine size (approx) - needed for the content view mostly
|
|
# But StackView will resize it. Let's give it an intrinsic size or min size constraint if possible.
|
|
# For simplicity in PyObjC without autolayout constraints helpers:
|
|
self.scroll_view.setHeightAdjustLimit_(1.0) # ??? No, simple init is usually fine in stackview
|
|
|
|
# Text View
|
|
content_size = self.scroll_view.contentSize()
|
|
self.text_view = NSTextView.alloc().initWithFrame_(NSMakeRect(0, 0, content_size.width, content_size.height))
|
|
self.text_view.setMinSize_(NSMakeSize(0.0, content_size.height))
|
|
self.text_view.setMaxSize_(NSMakeSize(float("inf"), float("inf")))
|
|
self.text_view.setVerticallyResizable_(True)
|
|
self.text_view.setHorizontallyResizable_(False)
|
|
self.text_view.setAutoresizingMask_(18) # NSViewWidthSizable | NSViewHeightSizable
|
|
self.text_view.textContainer().setContainerSize_(NSMakeSize(content_size.width, float("inf")))
|
|
self.text_view.textContainer().setWidthTracksTextView_(True)
|
|
self.text_view.setEditable_(False)
|
|
self.text_view.setRichText_(False)
|
|
|
|
self.scroll_view.setDocumentView_(self.text_view)
|
|
|
|
# We need to ensure the scroll view has some height. StackView might crush it if not careful.
|
|
# We can set a frame size, or add a constraint.
|
|
# Let's simple try adding it to stack view.
|
|
self.stack_view.addView_inGravity_(self.scroll_view, 2)
|
|
|
|
# Hide initially or just empty? Let's keep it visible so layout stabilizes.
|
|
self.text_view.setString_("")
|
|
|
|
# Capture Button
|
|
self.capture_button = NSButton.buttonWithTitle_target_action_("Capture", self, "captureClicked:")
|
|
self.stack_view.addView_inGravity_(self.capture_button, 3) # Bottom gravity
|
|
|
|
self.window.makeKeyAndOrderFront_(None)
|
|
|
|
# Initialize Camera
|
|
self.cap = cv2.VideoCapture(0)
|
|
if not self.cap.isOpened():
|
|
NSLog("Error: Could not open camera")
|
|
self.text_view.setString_("Error: Could not open camera.")
|
|
|
|
# State
|
|
self.is_capturing = True
|
|
self.current_frame = None
|
|
|
|
# Start Timer for 30 FPS
|
|
self.timer = NSTimer.scheduledTimerWithTimeInterval_target_selector_userInfo_repeats_(
|
|
1.0/30.0, self, "updateFrame:", None, True
|
|
)
|
|
|
|
def applicationShouldTerminateAfterLastWindowClosed_(self, sender):
|
|
return True
|
|
|
|
def applicationWillTerminate_(self, notification):
|
|
if hasattr(self, 'cap') and self.cap.isOpened():
|
|
self.cap.release()
|
|
|
|
def updateFrame_(self, timer):
|
|
if not self.is_capturing:
|
|
return
|
|
|
|
if hasattr(self, 'cap') and self.cap.isOpened():
|
|
ret, frame = self.cap.read()
|
|
if ret:
|
|
self.current_frame = frame # Store BGR frame
|
|
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
height, width, channels = rgb_frame.shape
|
|
|
|
header = f"P6 {width} {height} 255 ".encode()
|
|
data = header + rgb_frame.tobytes()
|
|
ns_data = objc.lookUpClass("NSData").dataWithBytes_length_(data, len(data))
|
|
ns_image = NSImage.alloc().initWithData_(ns_data)
|
|
|
|
self.image_view.setImage_(ns_image)
|
|
|
|
def captureClicked_(self, sender):
|
|
if self.is_capturing:
|
|
print("Capture clicked - Processing...")
|
|
self.is_capturing = False
|
|
self.capture_button.setTitle_("Processing...")
|
|
self.capture_button.setEnabled_(False)
|
|
self.text_view.setString_("Analyzing image...")
|
|
|
|
# Start background processing
|
|
threading.Thread(target=self.processImage).start()
|
|
|
|
def resetScan_(self, sender):
|
|
print("Resetting...")
|
|
self.text_view.setString_("")
|
|
self.capture_button.setTitle_("Capture")
|
|
self.capture_button.setAction_("captureClicked:")
|
|
self.is_capturing = True
|
|
|
|
# Restart timer if it was invalidated? No, we didn't invalidate it, just guarded with is_capturing.
|
|
# So it should resume immediately.
|
|
|
|
def processImage(self):
|
|
try:
|
|
if self.current_frame is None:
|
|
self.performSelectorOnMainThread_withObject_waitUntilDone_("handleError:", "No frame captured", False)
|
|
return
|
|
|
|
# Encode image to base64
|
|
_, buffer = cv2.imencode('.jpg', self.current_frame)
|
|
base64_image = base64.b64encode(buffer).decode('utf-8')
|
|
|
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
|
|
response = client.chat.completions.create(
|
|
model="gpt-4o-mini",
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": "What is this item? Please provide a brief description."},
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": f"data:image/jpeg;base64,{base64_image}"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
],
|
|
max_tokens=300
|
|
)
|
|
|
|
result_text = response.choices[0].message.content
|
|
self.performSelectorOnMainThread_withObject_waitUntilDone_("handleResponse:", result_text, False)
|
|
|
|
except Exception as e:
|
|
self.performSelectorOnMainThread_withObject_waitUntilDone_("handleError:", str(e), False)
|
|
|
|
def handleResponse_(self, result):
|
|
print(f"OpenAI Response received")
|
|
self.text_view.setString_(result)
|
|
self.capture_button.setTitle_("Scan Another")
|
|
self.capture_button.setEnabled_(True)
|
|
self.capture_button.setAction_("resetScan:")
|
|
|
|
def handleError_(self, error_msg):
|
|
print(f"Error: {error_msg}")
|
|
self.text_view.setString_(f"Error: {error_msg}")
|
|
self.capture_button.setTitle_("Error - Try Again")
|
|
self.capture_button.setEnabled_(True)
|
|
self.capture_button.setAction_("captureClicked:") # Ensure it resets to capture logic
|
|
self.is_capturing = True
|
|
|
|
if __name__ == "__main__":
|
|
app = NSApplication.sharedApplication()
|
|
app.setActivationPolicy_(NSApplicationActivationPolicyRegular)
|
|
delegate = ItemSenseApp.alloc().init()
|
|
app.setDelegate_(delegate)
|
|
|
|
# Allow time for policy to take effect? Usually acceptable immediately.
|
|
app.activateIgnoringOtherApps_(True)
|
|
app.run()
|