feat: Add Amazon search, split view UI, and refine prompt
This commit is contained in:
107
main.py
107
main.py
@@ -6,6 +6,7 @@ import objc
|
|||||||
import threading
|
import threading
|
||||||
import base64
|
import base64
|
||||||
import os
|
import os
|
||||||
|
import json
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from AppKit import (
|
from AppKit import (
|
||||||
@@ -14,13 +15,15 @@ from AppKit import (
|
|||||||
NSWindowStyleMaskTitled, NSWindowStyleMaskClosable,
|
NSWindowStyleMaskTitled, NSWindowStyleMaskClosable,
|
||||||
NSWindowStyleMaskResizable, NSWindowStyleMaskMiniaturizable,
|
NSWindowStyleMaskResizable, NSWindowStyleMaskMiniaturizable,
|
||||||
NSTimer, NSMakeSize, NSMakeRect, NSObject, NSLog,
|
NSTimer, NSMakeSize, NSMakeRect, NSObject, NSLog,
|
||||||
NSUserInterfaceLayoutOrientationVertical, NSLayoutAttributeCenterX,
|
NSUserInterfaceLayoutOrientationVertical, NSSplitView,
|
||||||
NSLayoutAttributeCenterY, NSLayoutAttributeWidth, NSLayoutAttributeHeight,
|
NSLayoutAttributeCenterX, NSLayoutAttributeCenterY,
|
||||||
|
NSLayoutAttributeWidth, NSLayoutAttributeHeight,
|
||||||
NSLayoutAttributeTop, NSLayoutAttributeBottom, NSLayoutAttributeLeading,
|
NSLayoutAttributeTop, NSLayoutAttributeBottom, NSLayoutAttributeLeading,
|
||||||
NSLayoutAttributeTrailing, NSScrollView, NSTextView,
|
NSLayoutAttributeTrailing, NSScrollView, NSTextView,
|
||||||
NSApplicationActivationPolicyRegular
|
NSApplicationActivationPolicyRegular, NSFont
|
||||||
)
|
)
|
||||||
from Foundation import NSObject, NSTimer, NSDate
|
from WebKit import WKWebView, WKWebViewConfiguration
|
||||||
|
from Foundation import NSObject, NSTimer, NSDate, NSURL, NSURLRequest
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
@@ -30,8 +33,9 @@ class ItemSenseApp(NSObject):
|
|||||||
def applicationDidFinishLaunching_(self, notification):
|
def applicationDidFinishLaunching_(self, notification):
|
||||||
try:
|
try:
|
||||||
print("Application did finish launching...")
|
print("Application did finish launching...")
|
||||||
|
# Increased width for split view (1200px width)
|
||||||
self.window = NSWindow.alloc().initWithContentRect_styleMask_backing_defer_(
|
self.window = NSWindow.alloc().initWithContentRect_styleMask_backing_defer_(
|
||||||
NSMakeRect(0, 0, 800, 700),
|
NSMakeRect(0, 0, 1200, 600),
|
||||||
NSWindowStyleMaskTitled | NSWindowStyleMaskClosable | NSWindowStyleMaskResizable | NSWindowStyleMaskMiniaturizable,
|
NSWindowStyleMaskTitled | NSWindowStyleMaskClosable | NSWindowStyleMaskResizable | NSWindowStyleMaskMiniaturizable,
|
||||||
NSBackingStoreBuffered,
|
NSBackingStoreBuffered,
|
||||||
False
|
False
|
||||||
@@ -39,17 +43,26 @@ class ItemSenseApp(NSObject):
|
|||||||
self.window.setTitle_("ItemSense")
|
self.window.setTitle_("ItemSense")
|
||||||
self.window.center()
|
self.window.center()
|
||||||
|
|
||||||
# Main content view (StackView for layout)
|
# Main Split View (Horizontal)
|
||||||
self.stack_view = NSStackView.alloc().init()
|
self.split_view = NSSplitView.alloc().initWithFrame_(self.window.contentView().bounds())
|
||||||
self.stack_view.setOrientation_(NSUserInterfaceLayoutOrientationVertical)
|
self.split_view.setVertical_(True)
|
||||||
self.stack_view.setSpacing_(10)
|
self.split_view.setDividerStyle_(1) # NSSplitViewDividerStyleThin
|
||||||
self.stack_view.setEdgeInsets_((10, 10, 10, 10))
|
self.window.setContentView_(self.split_view)
|
||||||
self.window.setContentView_(self.stack_view)
|
|
||||||
|
# Left Pane (Camera + Controls + Description)
|
||||||
|
self.left_pane = NSStackView.alloc().init()
|
||||||
|
self.left_pane.setOrientation_(NSUserInterfaceLayoutOrientationVertical)
|
||||||
|
self.left_pane.setSpacing_(10)
|
||||||
|
self.left_pane.setEdgeInsets_((10, 10, 10, 10))
|
||||||
|
# Set a minimum width for the left pane so it doesn't disappear
|
||||||
|
self.left_pane.setTranslatesAutoresizingMaskIntoConstraints_(False)
|
||||||
|
self.left_pane.widthAnchor().constraintGreaterThanOrEqualToConstant_(400.0).setActive_(True)
|
||||||
|
self.split_view.addArrangedSubview_(self.left_pane)
|
||||||
|
|
||||||
# Image View for Camera Feed
|
# Image View for Camera Feed
|
||||||
self.image_view = NSImageView.alloc().init()
|
self.image_view = NSImageView.alloc().init()
|
||||||
self.image_view.setImageScaling_(0) # NSImageScaleProportionallyDown
|
self.image_view.setImageScaling_(0) # NSImageScaleProportionallyDown
|
||||||
self.stack_view.addView_inGravity_(self.image_view, 1) # Top gravity
|
self.left_pane.addView_inGravity_(self.image_view, 1) # Top gravity
|
||||||
|
|
||||||
# Result View (Scrollable Text)
|
# Result View (Scrollable Text)
|
||||||
self.scroll_view = NSScrollView.alloc().init()
|
self.scroll_view = NSScrollView.alloc().init()
|
||||||
@@ -68,23 +81,36 @@ class ItemSenseApp(NSObject):
|
|||||||
self.text_view.textContainer().setWidthTracksTextView_(True)
|
self.text_view.textContainer().setWidthTracksTextView_(True)
|
||||||
self.text_view.setEditable_(False)
|
self.text_view.setEditable_(False)
|
||||||
self.text_view.setRichText_(False)
|
self.text_view.setRichText_(False)
|
||||||
|
self.text_view.setFont_(NSFont.systemFontOfSize_(18.0))
|
||||||
|
|
||||||
self.scroll_view.setDocumentView_(self.text_view)
|
self.scroll_view.setDocumentView_(self.text_view)
|
||||||
self.stack_view.addView_inGravity_(self.scroll_view, 2)
|
self.left_pane.addView_inGravity_(self.scroll_view, 2)
|
||||||
|
|
||||||
# Constraint: Give the scroll view a minimum height so it doesn't collapse
|
# Constraint: Give the scroll view a minimum height
|
||||||
self.scroll_view.setTranslatesAutoresizingMaskIntoConstraints_(False)
|
self.scroll_view.setTranslatesAutoresizingMaskIntoConstraints_(False)
|
||||||
self.scroll_view.heightAnchor().constraintGreaterThanOrEqualToConstant_(150.0).setActive_(True)
|
self.scroll_view.heightAnchor().constraintGreaterThanOrEqualToConstant_(150.0).setActive_(True)
|
||||||
self.scroll_view.widthAnchor().constraintEqualToAnchor_constant_(self.window.contentView().widthAnchor(), -20.0).setActive_(True)
|
self.scroll_view.widthAnchor().constraintEqualToAnchor_constant_(self.left_pane.widthAnchor(), -20.0).setActive_(True)
|
||||||
|
|
||||||
self.text_view.setString_("Initializing camera...")
|
self.text_view.setString_("Initializing camera...")
|
||||||
|
|
||||||
# Capture Button
|
# Capture Button
|
||||||
self.capture_button = NSButton.buttonWithTitle_target_action_("Capture", self, "captureClicked:")
|
self.capture_button = NSButton.buttonWithTitle_target_action_("Capture", self, "captureClicked:")
|
||||||
self.stack_view.addView_inGravity_(self.capture_button, 3) # Bottom gravity
|
self.left_pane.addView_inGravity_(self.capture_button, 3) # Bottom gravity
|
||||||
|
|
||||||
|
# Right Pane (WebView)
|
||||||
|
config = WKWebViewConfiguration.alloc().init()
|
||||||
|
self.web_view = WKWebView.alloc().initWithFrame_configuration_(NSMakeRect(0, 0, 500, 600), config)
|
||||||
|
self.split_view.addArrangedSubview_(self.web_view)
|
||||||
|
|
||||||
self.window.makeKeyAndOrderFront_(None)
|
self.window.makeKeyAndOrderFront_(None)
|
||||||
self.window.orderFrontRegardless()
|
self.window.orderFrontRegardless()
|
||||||
|
|
||||||
|
# Set Split View Divider Position and Priority
|
||||||
|
# Priority 251 > 250 (default), so left pane resists resizing.
|
||||||
|
self.split_view.setHoldingPriority_forSubviewAtIndex_(251.0, 0)
|
||||||
|
self.split_view.setHoldingPriority_forSubviewAtIndex_(249.0, 1)
|
||||||
|
self.split_view.setPosition_ofDividerAtIndex_(660.0, 0)
|
||||||
|
|
||||||
print("Window ordered front.")
|
print("Window ordered front.")
|
||||||
|
|
||||||
# State
|
# State
|
||||||
@@ -101,12 +127,15 @@ class ItemSenseApp(NSObject):
|
|||||||
def initCamera_(self, sender):
|
def initCamera_(self, sender):
|
||||||
print("Initializing camera...")
|
print("Initializing camera...")
|
||||||
self.cap = cv2.VideoCapture(0)
|
self.cap = cv2.VideoCapture(0)
|
||||||
|
self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
|
||||||
|
self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
|
||||||
if not self.cap.isOpened():
|
if not self.cap.isOpened():
|
||||||
NSLog("Error: Could not open camera")
|
NSLog("Error: Could not open camera")
|
||||||
self.text_view.setString_("Error: Could not open camera.")
|
self.text_view.setString_("Error: Could not open camera.")
|
||||||
return
|
return
|
||||||
|
|
||||||
print("Camera opened.")
|
print("Camera opened.")
|
||||||
|
self.text_view.setString_("Ready to capture")
|
||||||
# Start Timer for 30 FPS
|
# Start Timer for 30 FPS
|
||||||
self.timer = NSTimer.scheduledTimerWithTimeInterval_target_selector_userInfo_repeats_(
|
self.timer = NSTimer.scheduledTimerWithTimeInterval_target_selector_userInfo_repeats_(
|
||||||
1.0/30.0, self, "updateFrame:", None, True
|
1.0/30.0, self, "updateFrame:", None, True
|
||||||
@@ -133,6 +162,7 @@ class ItemSenseApp(NSObject):
|
|||||||
|
|
||||||
header = f"P6 {width} {height} 255 ".encode()
|
header = f"P6 {width} {height} 255 ".encode()
|
||||||
data = header + rgb_frame.tobytes()
|
data = header + rgb_frame.tobytes()
|
||||||
|
# NSData creation from bytes
|
||||||
ns_data = objc.lookUpClass("NSData").dataWithBytes_length_(data, len(data))
|
ns_data = objc.lookUpClass("NSData").dataWithBytes_length_(data, len(data))
|
||||||
ns_image = NSImage.alloc().initWithData_(ns_data)
|
ns_image = NSImage.alloc().initWithData_(ns_data)
|
||||||
|
|
||||||
@@ -156,8 +186,10 @@ class ItemSenseApp(NSObject):
|
|||||||
self.capture_button.setAction_("captureClicked:")
|
self.capture_button.setAction_("captureClicked:")
|
||||||
self.is_capturing = True
|
self.is_capturing = True
|
||||||
|
|
||||||
# Restart timer if it was invalidated? No, we didn't invalidate it, just guarded with is_capturing.
|
# Clear Web View (optional, or load about:blank)
|
||||||
# So it should resume immediately.
|
url = NSURL.URLWithString_("about:blank")
|
||||||
|
request = NSURLRequest.requestWithURL_(url)
|
||||||
|
self.web_view.loadRequest_(request)
|
||||||
|
|
||||||
def processImage(self):
|
def processImage(self):
|
||||||
try:
|
try:
|
||||||
@@ -171,13 +203,20 @@ class ItemSenseApp(NSObject):
|
|||||||
|
|
||||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
|
|
||||||
|
prompt_text = (
|
||||||
|
"Identify the main item in the foreground, including the brand name if visible. Ignore the background and any people present. "
|
||||||
|
"Return a JSON object with two keys: 'description' (a brief description of the item including brand) "
|
||||||
|
"and 'search_term' (keywords to search for this item on Amazon, including brand). "
|
||||||
|
"Return ONLY the JSON. Do not wrap in markdown code blocks."
|
||||||
|
)
|
||||||
|
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
model="gpt-4o-mini",
|
model="gpt-4o-mini",
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "text", "text": "What is this item? Please provide a brief description."},
|
{"type": "text", "text": prompt_text},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "image_url",
|
||||||
"image_url": {
|
"image_url": {
|
||||||
@@ -198,7 +237,34 @@ class ItemSenseApp(NSObject):
|
|||||||
|
|
||||||
def handleResponse_(self, result):
|
def handleResponse_(self, result):
|
||||||
print(f"OpenAI Response received: {result}")
|
print(f"OpenAI Response received: {result}")
|
||||||
self.text_view.setString_(result)
|
try:
|
||||||
|
# Clean up result if it contains markdown formatting
|
||||||
|
clean_result = result.replace("```json", "").replace("```", "").strip()
|
||||||
|
data = json.loads(clean_result)
|
||||||
|
|
||||||
|
description = data.get("description", "No description found.")
|
||||||
|
search_term = data.get("search_term", "")
|
||||||
|
|
||||||
|
self.text_view.setString_(description)
|
||||||
|
|
||||||
|
if search_term:
|
||||||
|
search_query = search_term.replace(" ", "+")
|
||||||
|
amazon_url = f"https://www.amazon.com/s?k={search_query}"
|
||||||
|
print(f"Loading Amazon URL: {amazon_url}")
|
||||||
|
|
||||||
|
url = NSURL.URLWithString_(amazon_url)
|
||||||
|
request = NSURLRequest.requestWithURL_(url)
|
||||||
|
self.web_view.loadRequest_(request)
|
||||||
|
else:
|
||||||
|
print("No search term found.")
|
||||||
|
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
print("Failed to parse JSON response")
|
||||||
|
self.text_view.setString_(f"Error parsing response: {result}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error handling response: {e}")
|
||||||
|
self.text_view.setString_(f"Error: {e}")
|
||||||
|
|
||||||
self.capture_button.setTitle_("Scan Another")
|
self.capture_button.setTitle_("Scan Another")
|
||||||
self.capture_button.setEnabled_(True)
|
self.capture_button.setEnabled_(True)
|
||||||
self.capture_button.setAction_("resetScan:")
|
self.capture_button.setAction_("resetScan:")
|
||||||
@@ -217,6 +283,5 @@ if __name__ == "__main__":
|
|||||||
delegate = ItemSenseApp.alloc().init()
|
delegate = ItemSenseApp.alloc().init()
|
||||||
app.setDelegate_(delegate)
|
app.setDelegate_(delegate)
|
||||||
|
|
||||||
# Allow time for policy to take effect? Usually acceptable immediately.
|
|
||||||
app.activateIgnoringOtherApps_(True)
|
app.activateIgnoringOtherApps_(True)
|
||||||
AppHelper.runEventLoop()
|
AppHelper.runEventLoop()
|
||||||
|
|||||||
@@ -3,3 +3,4 @@ opencv-python
|
|||||||
pillow
|
pillow
|
||||||
openai
|
openai
|
||||||
python-dotenv
|
python-dotenv
|
||||||
|
pyobjc-framework-WebKit
|
||||||
|
|||||||
Reference in New Issue
Block a user