diff --git a/main.py b/main.py index 922a15d..b9441f3 100644 --- a/main.py +++ b/main.py @@ -6,6 +6,7 @@ import objc import threading import base64 import os +import json from dotenv import load_dotenv from openai import OpenAI from AppKit import ( @@ -14,13 +15,15 @@ from AppKit import ( NSWindowStyleMaskTitled, NSWindowStyleMaskClosable, NSWindowStyleMaskResizable, NSWindowStyleMaskMiniaturizable, NSTimer, NSMakeSize, NSMakeRect, NSObject, NSLog, - NSUserInterfaceLayoutOrientationVertical, NSLayoutAttributeCenterX, - NSLayoutAttributeCenterY, NSLayoutAttributeWidth, NSLayoutAttributeHeight, + NSUserInterfaceLayoutOrientationVertical, NSSplitView, + NSLayoutAttributeCenterX, NSLayoutAttributeCenterY, + NSLayoutAttributeWidth, NSLayoutAttributeHeight, NSLayoutAttributeTop, NSLayoutAttributeBottom, NSLayoutAttributeLeading, NSLayoutAttributeTrailing, NSScrollView, NSTextView, - NSApplicationActivationPolicyRegular + NSApplicationActivationPolicyRegular, NSFont ) -from Foundation import NSObject, NSTimer, NSDate +from WebKit import WKWebView, WKWebViewConfiguration +from Foundation import NSObject, NSTimer, NSDate, NSURL, NSURLRequest load_dotenv() @@ -30,8 +33,9 @@ class ItemSenseApp(NSObject): def applicationDidFinishLaunching_(self, notification): try: print("Application did finish launching...") + # Increased width for split view (1200px width) self.window = NSWindow.alloc().initWithContentRect_styleMask_backing_defer_( - NSMakeRect(0, 0, 800, 700), + NSMakeRect(0, 0, 1200, 600), NSWindowStyleMaskTitled | NSWindowStyleMaskClosable | NSWindowStyleMaskResizable | NSWindowStyleMaskMiniaturizable, NSBackingStoreBuffered, False @@ -39,18 +43,27 @@ class ItemSenseApp(NSObject): self.window.setTitle_("ItemSense") self.window.center() - # Main content view (StackView for layout) - self.stack_view = NSStackView.alloc().init() - self.stack_view.setOrientation_(NSUserInterfaceLayoutOrientationVertical) - self.stack_view.setSpacing_(10) - self.stack_view.setEdgeInsets_((10, 10, 10, 10)) - self.window.setContentView_(self.stack_view) + # Main Split View (Horizontal) + self.split_view = NSSplitView.alloc().initWithFrame_(self.window.contentView().bounds()) + self.split_view.setVertical_(True) + self.split_view.setDividerStyle_(1) # NSSplitViewDividerStyleThin + self.window.setContentView_(self.split_view) + + # Left Pane (Camera + Controls + Description) + self.left_pane = NSStackView.alloc().init() + self.left_pane.setOrientation_(NSUserInterfaceLayoutOrientationVertical) + self.left_pane.setSpacing_(10) + self.left_pane.setEdgeInsets_((10, 10, 10, 10)) + # Set a minimum width for the left pane so it doesn't disappear + self.left_pane.setTranslatesAutoresizingMaskIntoConstraints_(False) + self.left_pane.widthAnchor().constraintGreaterThanOrEqualToConstant_(400.0).setActive_(True) + self.split_view.addArrangedSubview_(self.left_pane) # Image View for Camera Feed self.image_view = NSImageView.alloc().init() self.image_view.setImageScaling_(0) # NSImageScaleProportionallyDown - self.stack_view.addView_inGravity_(self.image_view, 1) # Top gravity - + self.left_pane.addView_inGravity_(self.image_view, 1) # Top gravity + # Result View (Scrollable Text) self.scroll_view = NSScrollView.alloc().init() self.scroll_view.setHasVerticalScroller_(True) @@ -68,23 +81,36 @@ class ItemSenseApp(NSObject): self.text_view.textContainer().setWidthTracksTextView_(True) self.text_view.setEditable_(False) self.text_view.setRichText_(False) + self.text_view.setFont_(NSFont.systemFontOfSize_(18.0)) self.scroll_view.setDocumentView_(self.text_view) - self.stack_view.addView_inGravity_(self.scroll_view, 2) + self.left_pane.addView_inGravity_(self.scroll_view, 2) - # Constraint: Give the scroll view a minimum height so it doesn't collapse + # Constraint: Give the scroll view a minimum height self.scroll_view.setTranslatesAutoresizingMaskIntoConstraints_(False) self.scroll_view.heightAnchor().constraintGreaterThanOrEqualToConstant_(150.0).setActive_(True) - self.scroll_view.widthAnchor().constraintEqualToAnchor_constant_(self.window.contentView().widthAnchor(), -20.0).setActive_(True) + self.scroll_view.widthAnchor().constraintEqualToAnchor_constant_(self.left_pane.widthAnchor(), -20.0).setActive_(True) self.text_view.setString_("Initializing camera...") # Capture Button self.capture_button = NSButton.buttonWithTitle_target_action_("Capture", self, "captureClicked:") - self.stack_view.addView_inGravity_(self.capture_button, 3) # Bottom gravity + self.left_pane.addView_inGravity_(self.capture_button, 3) # Bottom gravity + # Right Pane (WebView) + config = WKWebViewConfiguration.alloc().init() + self.web_view = WKWebView.alloc().initWithFrame_configuration_(NSMakeRect(0, 0, 500, 600), config) + self.split_view.addArrangedSubview_(self.web_view) + self.window.makeKeyAndOrderFront_(None) self.window.orderFrontRegardless() + + # Set Split View Divider Position and Priority + # Priority 251 > 250 (default), so left pane resists resizing. + self.split_view.setHoldingPriority_forSubviewAtIndex_(251.0, 0) + self.split_view.setHoldingPriority_forSubviewAtIndex_(249.0, 1) + self.split_view.setPosition_ofDividerAtIndex_(660.0, 0) + print("Window ordered front.") # State @@ -101,12 +127,15 @@ class ItemSenseApp(NSObject): def initCamera_(self, sender): print("Initializing camera...") self.cap = cv2.VideoCapture(0) + self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) + self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) if not self.cap.isOpened(): NSLog("Error: Could not open camera") self.text_view.setString_("Error: Could not open camera.") return print("Camera opened.") + self.text_view.setString_("Ready to capture") # Start Timer for 30 FPS self.timer = NSTimer.scheduledTimerWithTimeInterval_target_selector_userInfo_repeats_( 1.0/30.0, self, "updateFrame:", None, True @@ -133,6 +162,7 @@ class ItemSenseApp(NSObject): header = f"P6 {width} {height} 255 ".encode() data = header + rgb_frame.tobytes() + # NSData creation from bytes ns_data = objc.lookUpClass("NSData").dataWithBytes_length_(data, len(data)) ns_image = NSImage.alloc().initWithData_(ns_data) @@ -156,8 +186,10 @@ class ItemSenseApp(NSObject): self.capture_button.setAction_("captureClicked:") self.is_capturing = True - # Restart timer if it was invalidated? No, we didn't invalidate it, just guarded with is_capturing. - # So it should resume immediately. + # Clear Web View (optional, or load about:blank) + url = NSURL.URLWithString_("about:blank") + request = NSURLRequest.requestWithURL_(url) + self.web_view.loadRequest_(request) def processImage(self): try: @@ -171,13 +203,20 @@ class ItemSenseApp(NSObject): client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + prompt_text = ( + "Identify the main item in the foreground, including the brand name if visible. Ignore the background and any people present. " + "Return a JSON object with two keys: 'description' (a brief description of the item including brand) " + "and 'search_term' (keywords to search for this item on Amazon, including brand). " + "Return ONLY the JSON. Do not wrap in markdown code blocks." + ) + response = client.chat.completions.create( model="gpt-4o-mini", messages=[ { "role": "user", "content": [ - {"type": "text", "text": "What is this item? Please provide a brief description."}, + {"type": "text", "text": prompt_text}, { "type": "image_url", "image_url": { @@ -198,7 +237,34 @@ class ItemSenseApp(NSObject): def handleResponse_(self, result): print(f"OpenAI Response received: {result}") - self.text_view.setString_(result) + try: + # Clean up result if it contains markdown formatting + clean_result = result.replace("```json", "").replace("```", "").strip() + data = json.loads(clean_result) + + description = data.get("description", "No description found.") + search_term = data.get("search_term", "") + + self.text_view.setString_(description) + + if search_term: + search_query = search_term.replace(" ", "+") + amazon_url = f"https://www.amazon.com/s?k={search_query}" + print(f"Loading Amazon URL: {amazon_url}") + + url = NSURL.URLWithString_(amazon_url) + request = NSURLRequest.requestWithURL_(url) + self.web_view.loadRequest_(request) + else: + print("No search term found.") + + except json.JSONDecodeError: + print("Failed to parse JSON response") + self.text_view.setString_(f"Error parsing response: {result}") + except Exception as e: + print(f"Error handling response: {e}") + self.text_view.setString_(f"Error: {e}") + self.capture_button.setTitle_("Scan Another") self.capture_button.setEnabled_(True) self.capture_button.setAction_("resetScan:") @@ -217,6 +283,5 @@ if __name__ == "__main__": delegate = ItemSenseApp.alloc().init() app.setDelegate_(delegate) - # Allow time for policy to take effect? Usually acceptable immediately. app.activateIgnoringOtherApps_(True) AppHelper.runEventLoop() diff --git a/requirements.txt b/requirements.txt index 5ebdf86..5bf0a96 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ opencv-python pillow openai python-dotenv +pyobjc-framework-WebKit