
This tutorial walks you through building a Python GUI app using Tkinter where users can paste an image URL and get a description using Google’s Gemini AI model.
import tkinter as tk
from tkinter import messagebox, scrolledtext
import requests
from PIL import Image
import io
import os
import google.generativeai as genai
from dotenv import load_dotenv
More about Python Image Library ( PIL ) here . pip install google-generativeai python-dotenv
# Load your API key securely from a .env file
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
# Configure Gemini SDK
genai.configure(api_key=GOOGLE_API_KEY)
# Initialize Gemini model
model = genai.GenerativeModel('gemini-1.5-flash')
def describe_image():
image_url = url_entry.get().strip()
if not image_url:
messagebox.showwarning("Input Error", "Please enter an image URL.")
return
try:
# Download the image
response = requests.get(image_url)
if response.status_code != 200:
raise Exception("Failed to fetch image.")
img = Image.open(io.BytesIO(response.content))
# Convert image to JPEG bytes
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format='JPEG')
img_bytes = img_byte_arr.getvalue()
# Gemini expects image as a dict
gemini_image = {
"mime_type": "image/jpeg",
"data": img_bytes
}
prompt = ["Describe the image", gemini_image]
result = model.generate_content(prompt)
output_box.delete('1.0', tk.END)
output_box.insert(tk.END, result.text)
except Exception as e:
messagebox.showerror("Error", f"An error occurred:\n{e}")
# Initialize main window
root = tk.Tk()
root.title("Image Description with Gemini")
root.geometry("600x400")
tk.Label(root, text="Enter Image URL:").pack(pady=10)
# Input for image URL
url_entry = tk.Entry(root, width=70)
url_entry.pack(pady=5)
# Button to trigger Gemini call
tk.Button(
root,
text="Describe Image",
command=describe_image,
bg="blue",
fg="white"
).pack(pady=10)
tk.Label(root, text="Gemini Description:").pack(pady=5)
# ScrolledText to show Gemini's response
output_box = scrolledtext.ScrolledText(root, wrap=tk.WORD, height=10, width=70)
output_box.pack(padx=10, pady=10)
# Run the GUI event loop
root.mainloop()
URL: https://upload.wikimedia.org/wikipedia/commons/thumb/5/59/Dog_in_field.jpg/800px-Dog_in_field.jpg
Gemini: "This is an image of a happy dog standing on green grass in an open field, looking toward the camera."
.jpg file..env file.gemini-1.5-pro for richer responses.import tkinter as tk
from tkinter import messagebox, scrolledtext
import requests
from PIL import Image
import io
import os
import google.generativeai as genai
from dotenv import load_dotenv
# Load API key
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
# Configure Gemini
genai.configure(api_key=GOOGLE_API_KEY)
# Initialize Gemini model
model = genai.GenerativeModel('gemini-1.5-flash')
def describe_image():
image_url = url_entry.get().strip()
if not image_url:
messagebox.showwarning("Input Error", "Enter an image URL.")
return
try:
# Download image
response = requests.get(image_url)
if response.status_code != 200:
raise Exception("Failed to fetch image.")
img = Image.open(io.BytesIO(response.content))
# Convert image to bytes in JPEG format
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format='JPEG')
img_bytes = img_byte_arr.getvalue()
# Gemini expects image part in a specific format
gemini_image = {
"mime_type": "image/jpeg",
"data": img_bytes
}
# Send to Gemini
prompt = ["Describe the image", gemini_image]
result = model.generate_content(prompt)
output_box.delete('1.0', tk.END)
output_box.insert(tk.END, result.text)
except Exception as e:
messagebox.showerror("Error", f"An error occurred:\n{e}")
# GUI Setup
root = tk.Tk()
root.title("Image Description with Gemini")
root.geometry("600x400")
tk.Label(root, text="Enter Image URL:").pack(pady=10)
url_entry = tk.Entry(root, width=70)
url_entry.pack(pady=5)
tk.Button(
root, text="Describe Image",
command=describe_image, bg="blue", fg="white"
).pack(pady=10)
tk.Label(root, text="Gemini Description:").pack(pady=5)
output_box = scrolledtext.ScrolledText(root, wrap=tk.WORD, height=10, width=70)
output_box.pack(padx=10, pady=10)
root.mainloop()
The application uses a user-provided image URL, fetches the image using Python’s requests module, and sends it to Google's Gemini Generative AI model for visual content analysis and description.
The app primarily supports JPEG images as they are converted to image/jpeg format before being sent to the Gemini API. Other image formats can be used if convertible via PIL.
If the image cannot be retrieved (e.g., due to an invalid URL or a failed server response), the application shows an error using Tkinter’s messagebox. Always ensure the URL is accessible and publicly hosted.
This version supports only URLs. However, with minor code changes, you can allow file selection using tkinter.filedialog and load images from your local system.
Author
🎥 Join me live on YouTubePassionate about coding and teaching, I publish practical tutorials on PHP, Python, JavaScript, SQL, and web development. My goal is to make learning simple, engaging, and project‑oriented with real examples and source code.