feat: Add Amazon search, split view UI, and refine prompt

2026-01-21 13:52:04 -05:00
parent 34df717a75
commit 3a98051de1
2 changed files with 88 additions and 22 deletions
--- a/main.py
+++ b/main.py
@@ -6,6 +6,7 @@ import objc
 import threading
 import base64
 import os
 import json
 from dotenv import load_dotenv
 from openai import OpenAI
 from AppKit import (
@@ -14,13 +15,15 @@ from AppKit import (
    NSWindowStyleMaskTitled, NSWindowStyleMaskClosable,
    NSWindowStyleMaskResizable, NSWindowStyleMaskMiniaturizable,
    NSTimer, NSMakeSize, NSMakeRect, NSObject, NSLog,
-    NSUserInterfaceLayoutOrientationVertical, NSLayoutAttributeCenterX,
+    NSUserInterfaceLayoutOrientationVertical, NSSplitView,
-    NSLayoutAttributeCenterY, NSLayoutAttributeWidth, NSLayoutAttributeHeight,
+    NSLayoutAttributeCenterX, NSLayoutAttributeCenterY, 
    NSLayoutAttributeWidth, NSLayoutAttributeHeight,
    NSLayoutAttributeTop, NSLayoutAttributeBottom, NSLayoutAttributeLeading,
    NSLayoutAttributeTrailing, NSScrollView, NSTextView,
-    NSApplicationActivationPolicyRegular
+    NSApplicationActivationPolicyRegular, NSFont
 )
-from Foundation import NSObject, NSTimer, NSDate
+from WebKit import WKWebView, WKWebViewConfiguration
 from Foundation import NSObject, NSTimer, NSDate, NSURL, NSURLRequest
 load_dotenv()
@@ -30,8 +33,9 @@ class ItemSenseApp(NSObject):
    def applicationDidFinishLaunching_(self, notification):
        try:
            print("Application did finish launching...")
            # Increased width for split view (1200px width)
            self.window = NSWindow.alloc().initWithContentRect_styleMask_backing_defer_(
-                NSMakeRect(0, 0, 800, 700),
+                NSMakeRect(0, 0, 1200, 600),
                NSWindowStyleMaskTitled | NSWindowStyleMaskClosable | NSWindowStyleMaskResizable | NSWindowStyleMaskMiniaturizable,
                NSBackingStoreBuffered,
                False
@@ -39,17 +43,26 @@ class ItemSenseApp(NSObject):
            self.window.setTitle_("ItemSense")
            self.window.center()
-            # Main content view (StackView for layout)
+            # Main Split View (Horizontal)
-            self.stack_view = NSStackView.alloc().init()
+            self.split_view = NSSplitView.alloc().initWithFrame_(self.window.contentView().bounds())
-            self.stack_view.setOrientation_(NSUserInterfaceLayoutOrientationVertical)
+            self.split_view.setVertical_(True)
-            self.stack_view.setSpacing_(10)
+            self.split_view.setDividerStyle_(1) # NSSplitViewDividerStyleThin
-            self.stack_view.setEdgeInsets_((10, 10, 10, 10))
+            self.window.setContentView_(self.split_view)
-            self.window.setContentView_(self.stack_view)
+
            # Left Pane (Camera + Controls + Description)
            self.left_pane = NSStackView.alloc().init()
            self.left_pane.setOrientation_(NSUserInterfaceLayoutOrientationVertical)
            self.left_pane.setSpacing_(10)
            self.left_pane.setEdgeInsets_((10, 10, 10, 10))
            # Set a minimum width for the left pane so it doesn't disappear
            self.left_pane.setTranslatesAutoresizingMaskIntoConstraints_(False)
            self.left_pane.widthAnchor().constraintGreaterThanOrEqualToConstant_(400.0).setActive_(True)
            self.split_view.addArrangedSubview_(self.left_pane)
            # Image View for Camera Feed
            self.image_view = NSImageView.alloc().init()
            self.image_view.setImageScaling_(0) # NSImageScaleProportionallyDown
-            self.stack_view.addView_inGravity_(self.image_view, 1) # Top gravity
+            self.left_pane.addView_inGravity_(self.image_view, 1) # Top gravity
            # Result View (Scrollable Text)
            self.scroll_view = NSScrollView.alloc().init()
@@ -68,23 +81,36 @@ class ItemSenseApp(NSObject):
            self.text_view.textContainer().setWidthTracksTextView_(True)
            self.text_view.setEditable_(False)
            self.text_view.setRichText_(False)
            self.text_view.setFont_(NSFont.systemFontOfSize_(18.0))
            self.scroll_view.setDocumentView_(self.text_view)
-            self.stack_view.addView_inGravity_(self.scroll_view, 2)
+            self.left_pane.addView_inGravity_(self.scroll_view, 2)
-            # Constraint: Give the scroll view a minimum height so it doesn't collapse
+            # Constraint: Give the scroll view a minimum height
            self.scroll_view.setTranslatesAutoresizingMaskIntoConstraints_(False)
            self.scroll_view.heightAnchor().constraintGreaterThanOrEqualToConstant_(150.0).setActive_(True)
-            self.scroll_view.widthAnchor().constraintEqualToAnchor_constant_(self.window.contentView().widthAnchor(), -20.0).setActive_(True)
+            self.scroll_view.widthAnchor().constraintEqualToAnchor_constant_(self.left_pane.widthAnchor(), -20.0).setActive_(True)
            self.text_view.setString_("Initializing camera...")
            # Capture Button
            self.capture_button = NSButton.buttonWithTitle_target_action_("Capture", self, "captureClicked:")
-            self.stack_view.addView_inGravity_(self.capture_button, 3) # Bottom gravity
+            self.left_pane.addView_inGravity_(self.capture_button, 3) # Bottom gravity
            # Right Pane (WebView)
            config = WKWebViewConfiguration.alloc().init()
            self.web_view = WKWebView.alloc().initWithFrame_configuration_(NSMakeRect(0, 0, 500, 600), config)
            self.split_view.addArrangedSubview_(self.web_view)
            self.window.makeKeyAndOrderFront_(None)
            self.window.orderFrontRegardless()
            # Set Split View Divider Position and Priority
            # Priority 251 > 250 (default), so left pane resists resizing.
            self.split_view.setHoldingPriority_forSubviewAtIndex_(251.0, 0)
            self.split_view.setHoldingPriority_forSubviewAtIndex_(249.0, 1)
            self.split_view.setPosition_ofDividerAtIndex_(660.0, 0)
            print("Window ordered front.")
            # State
@@ -101,12 +127,15 @@ class ItemSenseApp(NSObject):
    def initCamera_(self, sender):
        print("Initializing camera...")
        self.cap = cv2.VideoCapture(0)
        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
        if not self.cap.isOpened():
            NSLog("Error: Could not open camera")
            self.text_view.setString_("Error: Could not open camera.")
            return
        print("Camera opened.")
        self.text_view.setString_("Ready to capture")
        # Start Timer for 30 FPS
        self.timer = NSTimer.scheduledTimerWithTimeInterval_target_selector_userInfo_repeats_(
            1.0/30.0, self, "updateFrame:", None, True
@@ -133,6 +162,7 @@ class ItemSenseApp(NSObject):
                header = f"P6 {width} {height} 255 ".encode()
                data = header + rgb_frame.tobytes()
                # NSData creation from bytes
                ns_data = objc.lookUpClass("NSData").dataWithBytes_length_(data, len(data))
                ns_image = NSImage.alloc().initWithData_(ns_data)
@@ -156,8 +186,10 @@ class ItemSenseApp(NSObject):
        self.capture_button.setAction_("captureClicked:")
        self.is_capturing = True
-        # Restart timer if it was invalidated? No, we didn't invalidate it, just guarded with is_capturing.
+        # Clear Web View (optional, or load about:blank)
-        # So it should resume immediately.
+        url = NSURL.URLWithString_("about:blank")
        request = NSURLRequest.requestWithURL_(url)
        self.web_view.loadRequest_(request)
    def processImage(self):
        try:
@@ -171,13 +203,20 @@ class ItemSenseApp(NSObject):
            client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
            prompt_text = (
                "Identify the main item in the foreground, including the brand name if visible. Ignore the background and any people present. "
                "Return a JSON object with two keys: 'description' (a brief description of the item including brand) "
                "and 'search_term' (keywords to search for this item on Amazon, including brand). "
                "Return ONLY the JSON. Do not wrap in markdown code blocks."
            )
            response = client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {
                        "role": "user",
                        "content": [
-                            {"type": "text", "text": "What is this item? Please provide a brief description."},
+                            {"type": "text", "text": prompt_text},
                            {
                                "type": "image_url",
                                "image_url": {
@@ -198,7 +237,34 @@ class ItemSenseApp(NSObject):
    def handleResponse_(self, result):
        print(f"OpenAI Response received: {result}")
-        self.text_view.setString_(result)
+        try:
            # Clean up result if it contains markdown formatting
            clean_result = result.replace("```json", "").replace("```", "").strip()
            data = json.loads(clean_result)
            description = data.get("description", "No description found.")
            search_term = data.get("search_term", "")
            self.text_view.setString_(description)
            if search_term:
                search_query = search_term.replace(" ", "+")
                amazon_url = f"https://www.amazon.com/s?k={search_query}"
                print(f"Loading Amazon URL: {amazon_url}")
                url = NSURL.URLWithString_(amazon_url)
                request = NSURLRequest.requestWithURL_(url)
                self.web_view.loadRequest_(request)
            else:
                 print("No search term found.")
        except json.JSONDecodeError:
            print("Failed to parse JSON response")
            self.text_view.setString_(f"Error parsing response: {result}")
        except Exception as e:
            print(f"Error handling response: {e}")
            self.text_view.setString_(f"Error: {e}")
        self.capture_button.setTitle_("Scan Another")
        self.capture_button.setEnabled_(True)
        self.capture_button.setAction_("resetScan:")
@@ -217,6 +283,5 @@ if __name__ == "__main__":
    delegate = ItemSenseApp.alloc().init()
    app.setDelegate_(delegate)
    # Allow time for policy to take effect? Usually acceptable immediately.
    app.activateIgnoringOtherApps_(True)
    AppHelper.runEventLoop()
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ opencv-python
 pillow
 openai
 python-dotenv
 pyobjc-framework-WebKit