import sys import cv2 import numpy as np from PIL import Image import objc import threading import base64 import os import json from dotenv import load_dotenv from openai import OpenAI from AppKit import ( NSApplication, NSApp, NSWindow, NSView, NSImageView, NSButton, NSStackView, NSImage, NSBitmapImageRep, NSBackingStoreBuffered, NSWindowStyleMaskTitled, NSWindowStyleMaskClosable, NSWindowStyleMaskResizable, NSWindowStyleMaskMiniaturizable, NSTimer, NSMakeSize, NSMakeRect, NSObject, NSLog, NSUserInterfaceLayoutOrientationVertical, NSSplitView, NSLayoutAttributeCenterX, NSLayoutAttributeCenterY, NSLayoutAttributeWidth, NSLayoutAttributeHeight, NSLayoutAttributeTop, NSLayoutAttributeBottom, NSLayoutAttributeLeading, NSLayoutAttributeTrailing, NSScrollView, NSTextView, NSApplicationActivationPolicyRegular, NSFont ) from WebKit import WKWebView, WKWebViewConfiguration from Foundation import NSObject, NSTimer, NSDate, NSURL, NSURLRequest load_dotenv() from PyObjCTools import AppHelper class ItemSenseApp(NSObject): def applicationDidFinishLaunching_(self, notification): try: print("Application did finish launching...") # Increased width for split view (1200px width) self.window = NSWindow.alloc().initWithContentRect_styleMask_backing_defer_( NSMakeRect(0, 0, 1200, 600), NSWindowStyleMaskTitled | NSWindowStyleMaskClosable | NSWindowStyleMaskResizable | NSWindowStyleMaskMiniaturizable, NSBackingStoreBuffered, False ) self.window.setTitle_("ItemSense") self.window.center() # Main Split View (Horizontal) self.split_view = NSSplitView.alloc().initWithFrame_(self.window.contentView().bounds()) self.split_view.setVertical_(True) self.split_view.setDividerStyle_(1) # NSSplitViewDividerStyleThin self.window.setContentView_(self.split_view) # Left Pane (Camera + Controls + Description) self.left_pane = NSStackView.alloc().init() self.left_pane.setOrientation_(NSUserInterfaceLayoutOrientationVertical) self.left_pane.setSpacing_(10) self.left_pane.setEdgeInsets_((10, 10, 10, 10)) # Set a minimum width for the left pane so it doesn't disappear self.left_pane.setTranslatesAutoresizingMaskIntoConstraints_(False) self.left_pane.widthAnchor().constraintGreaterThanOrEqualToConstant_(400.0).setActive_(True) self.split_view.addArrangedSubview_(self.left_pane) # Image View for Camera Feed self.image_view = NSImageView.alloc().init() self.image_view.setImageScaling_(0) # NSImageScaleProportionallyDown self.left_pane.addView_inGravity_(self.image_view, 1) # Top gravity # Result View (Scrollable Text) self.scroll_view = NSScrollView.alloc().init() self.scroll_view.setHasVerticalScroller_(True) self.scroll_view.setBorderType_(2) # NSBezelBorder # Text View content_size = self.scroll_view.contentSize() self.text_view = NSTextView.alloc().initWithFrame_(NSMakeRect(0, 0, content_size.width, content_size.height)) self.text_view.setMinSize_(NSMakeSize(0.0, content_size.height)) self.text_view.setMaxSize_(NSMakeSize(float("inf"), float("inf"))) self.text_view.setVerticallyResizable_(True) self.text_view.setHorizontallyResizable_(False) self.text_view.setAutoresizingMask_(18) # NSViewWidthSizable | NSViewHeightSizable self.text_view.textContainer().setContainerSize_(NSMakeSize(content_size.width, float("inf"))) self.text_view.textContainer().setWidthTracksTextView_(True) self.text_view.setEditable_(False) self.text_view.setRichText_(False) self.text_view.setFont_(NSFont.systemFontOfSize_(18.0)) self.scroll_view.setDocumentView_(self.text_view) self.left_pane.addView_inGravity_(self.scroll_view, 2) # Constraint: Give the scroll view a minimum height self.scroll_view.setTranslatesAutoresizingMaskIntoConstraints_(False) self.scroll_view.heightAnchor().constraintGreaterThanOrEqualToConstant_(150.0).setActive_(True) self.scroll_view.widthAnchor().constraintEqualToAnchor_constant_(self.left_pane.widthAnchor(), -20.0).setActive_(True) self.text_view.setString_("Initializing camera...") # Capture Button self.capture_button = NSButton.buttonWithTitle_target_action_("Capture", self, "captureClicked:") self.left_pane.addView_inGravity_(self.capture_button, 3) # Bottom gravity # Right Pane (WebView) config = WKWebViewConfiguration.alloc().init() self.web_view = WKWebView.alloc().initWithFrame_configuration_(NSMakeRect(0, 0, 500, 600), config) self.split_view.addArrangedSubview_(self.web_view) self.window.makeKeyAndOrderFront_(None) self.window.orderFrontRegardless() # Set Split View Divider Position and Priority # Priority 251 > 250 (default), so left pane resists resizing. self.split_view.setHoldingPriority_forSubviewAtIndex_(251.0, 0) self.split_view.setHoldingPriority_forSubviewAtIndex_(249.0, 1) self.split_view.setPosition_ofDividerAtIndex_(660.0, 0) print("Window ordered front.") # State self.is_capturing = True self.current_frame = None # Initialize Camera with a delay to allow UI to render first self.performSelector_withObject_afterDelay_("initCamera:", None, 0.5) except Exception as e: import traceback traceback.print_exc() print(f"Error in applicationDidFinishLaunching: {e}") def initCamera_(self, sender): print("Initializing camera...") self.cap = cv2.VideoCapture(0) self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) if not self.cap.isOpened(): NSLog("Error: Could not open camera") self.text_view.setString_("Error: Could not open camera.") return print("Camera opened.") self.text_view.setString_("Ready to capture") # Start Timer for 30 FPS self.timer = NSTimer.scheduledTimerWithTimeInterval_target_selector_userInfo_repeats_( 1.0/30.0, self, "updateFrame:", None, True ) def applicationShouldTerminateAfterLastWindowClosed_(self, sender): return True def applicationWillTerminate_(self, notification): if hasattr(self, 'cap') and self.cap.isOpened(): self.cap.release() def updateFrame_(self, timer): if not self.is_capturing: return if hasattr(self, 'cap') and self.cap.isOpened(): ret, frame = self.cap.read() if ret: self.current_frame = frame # Store BGR frame rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) height, width, channels = rgb_frame.shape header = f"P6 {width} {height} 255 ".encode() data = header + rgb_frame.tobytes() # NSData creation from bytes ns_data = objc.lookUpClass("NSData").dataWithBytes_length_(data, len(data)) ns_image = NSImage.alloc().initWithData_(ns_data) self.image_view.setImage_(ns_image) def captureClicked_(self, sender): if self.is_capturing: print("Capture clicked - Processing...") self.is_capturing = False self.capture_button.setTitle_("Processing...") self.capture_button.setEnabled_(False) self.text_view.setString_("Analyzing image...") # Start background processing threading.Thread(target=self.processImage).start() def resetScan_(self, sender): print("Resetting...") self.text_view.setString_("") self.capture_button.setTitle_("Capture") self.capture_button.setAction_("captureClicked:") self.is_capturing = True # Clear Web View (optional, or load about:blank) url = NSURL.URLWithString_("about:blank") request = NSURLRequest.requestWithURL_(url) self.web_view.loadRequest_(request) def processImage(self): try: if self.current_frame is None: self.performSelectorOnMainThread_withObject_waitUntilDone_("handleError:", "No frame captured", False) return # Encode image to base64 _, buffer = cv2.imencode('.jpg', self.current_frame) base64_image = base64.b64encode(buffer).decode('utf-8') client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) prompt_text = ( "Identify the main item in the foreground, including the brand name if visible. Ignore the background and any people present. " "Return a JSON object with two keys: 'description' (a brief description of the item including brand) " "and 'search_term' (keywords to search for this item on Amazon, including brand). " "Return ONLY the JSON. Do not wrap in markdown code blocks." ) response = client.chat.completions.create( model="gpt-4o-mini", messages=[ { "role": "user", "content": [ {"type": "text", "text": prompt_text}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } ] } ], max_tokens=300 ) result_text = response.choices[0].message.content self.performSelectorOnMainThread_withObject_waitUntilDone_("handleResponse:", result_text, False) except Exception as e: self.performSelectorOnMainThread_withObject_waitUntilDone_("handleError:", str(e), False) def handleResponse_(self, result): print(f"OpenAI Response received: {result}") try: # Clean up result if it contains markdown formatting clean_result = result.replace("```json", "").replace("```", "").strip() data = json.loads(clean_result) description = data.get("description", "No description found.") search_term = data.get("search_term", "") self.text_view.setString_(description) if search_term: search_query = search_term.replace(" ", "+") amazon_url = f"https://www.amazon.com/s?k={search_query}" print(f"Loading Amazon URL: {amazon_url}") url = NSURL.URLWithString_(amazon_url) request = NSURLRequest.requestWithURL_(url) self.web_view.loadRequest_(request) else: print("No search term found.") except json.JSONDecodeError: print("Failed to parse JSON response") self.text_view.setString_(f"Error parsing response: {result}") except Exception as e: print(f"Error handling response: {e}") self.text_view.setString_(f"Error: {e}") self.capture_button.setTitle_("Scan Another") self.capture_button.setEnabled_(True) self.capture_button.setAction_("resetScan:") def handleError_(self, error_msg): print(f"Error: {error_msg}") self.text_view.setString_(f"Error: {error_msg}") self.capture_button.setTitle_("Error - Try Again") self.capture_button.setEnabled_(True) self.capture_button.setAction_("captureClicked:") # Ensure it resets to capture logic self.is_capturing = True if __name__ == "__main__": app = NSApplication.sharedApplication() app.setActivationPolicy_(NSApplicationActivationPolicyRegular) delegate = ItemSenseApp.alloc().init() app.setDelegate_(delegate) app.activateIgnoringOtherApps_(True) AppHelper.runEventLoop()