Spaces:

kalhar
/

ComputerVision

Sleeping

ComputerVision / app.py

Kalhar.Pandya

cache

1b4e3b5 11 months ago

16.8 kB

	from huggingface_hub import hf_hub_download, login
	import cv2
	import numpy as np
	import pickle # for loading tile features and raw images
	from skimage.feature import local_binary_pattern, graycomatrix, graycoprops, hog
	from skimage.metrics import structural_similarity as ssim, peak_signal_noise_ratio as psnr
	from PIL import Image
	import gradio as gr
	import time
	import os

	# ---------------------------------------------------------------------
	# Feature Extraction Functions
	# ---------------------------------------------------------------------
	def get_average_color(image):
	"""Compute the average color (per channel) of the image (BGR format)."""
	return np.mean(image, axis=(0, 1))

	def get_color_histogram(image, bins=(8, 8, 8)):
	"""Compute a normalized color histogram in HSV color space."""
	hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
	hist = cv2.calcHist([hsv], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
	cv2.normalize(hist, hist)
	return hist.flatten()

	def get_lbp_histogram(image, numPoints=24, radius=8, bins=59):
	"""Compute a histogram of Local Binary Patterns (LBP) from the grayscale image."""
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	lbp = local_binary_pattern(gray, numPoints, radius, method="uniform")
	hist, _ = np.histogram(lbp.ravel(), bins=bins, range=(0, bins))
	hist = hist.astype("float")
	hist /= (hist.sum() + 1e-7)
	return hist

	def get_glcm_features(image, distances=[1, 2, 4], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4],
	properties=('contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM')):
	"""
	Compute GLCM (Gray Level Co-occurrence Matrix) features (Haralick features).
	Returns a concatenated feature vector of all requested properties, for each distance & angle.
	"""
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	glcm = graycomatrix(gray, distances=distances, angles=angles, levels=256,
	symmetric=True, normed=True)
	feats = []
	for prop in properties:
	vals = graycoprops(glcm, prop)
	feats.append(vals.ravel())
	return np.hstack(feats)

	def get_hog_features(image, orientations=9, pixels_per_cell=(8, 8),
	cells_per_block=(2, 2), block_norm='L2-Hys'):
	"""
	Compute Histogram of Oriented Gradients (HOG) from the grayscale image.
	The image is forcibly resized to 16×16 to avoid errors.
	"""
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	return hog(gray, orientations=orientations, pixels_per_cell=pixels_per_cell,
	cells_per_block=(2, 2), block_norm=block_norm)

	def get_combined_features(image):
	"""
	Compute and combine all features in the following order:
	- Average Color (3)
	- HSV Color Histogram (512)
	- LBP Histogram (59)
	- GLCM Features (72)
	- HOG Features (36)
	Total length = 682.
	"""
	avg_color = get_average_color(image)
	color_hist = get_color_histogram(image)
	lbp_hist = get_lbp_histogram(image)
	glcm_feats = get_glcm_features(image)
	hog_feats = get_hog_features(cv2.resize(image, (16, 16), interpolation=cv2.INTER_LINEAR))
	return np.concatenate([avg_color, color_hist, lbp_hist, glcm_feats, hog_feats])

	# ---------------------------------------------------------------------
	# Feature Dictionary and Order
	# ---------------------------------------------------------------------
	FEATURES = {
	"Average Color (Color, Fast)": {
	"func": get_average_color,
	"range": (0, 3)
	},
	"HSV Histogram (Color Dist., Slow)": {
	"func": get_color_histogram,
	"range": (3, 515)
	},
	"LBP Histogram (Texture, Normal)": {
	"func": get_lbp_histogram,
	"range": (515, 574)
	},
	"GLCM Features (Texture Stats, Very Slow)": {
	"func": get_glcm_features,
	"range": (574, 646)
	},
	"HOG Features (Edges/Shapes, Normal)": {
	"func": lambda image: get_hog_features(cv2.resize(image, (16, 16), interpolation=cv2.INTER_LINEAR)),
	"range": (646, 682)
	}
	}
	FEATURE_ORDER = list(FEATURES.keys())

	def get_selected_features(image, selected_features):
	"""
	Compute and combine only the selected features from the image.
	Uses the canonical order defined in FEATURE_ORDER.
	"""
	feats = []
	for feat in FEATURE_ORDER:
	if feat in selected_features:
	feats.append(FEATURES[feat]["func"](image))
	if not feats:
	return np.array([], dtype=np.float32)
	return np.concatenate(feats).astype(np.float32)

	# ---------------------------------------------------------------------
	# Load Precomputed Tile Features & Raw Images
	# ---------------------------------------------------------------------
	try:
	with open("tile_features.pkl", "rb") as f:
	data = pickle.load(f)
	tile_features = data["features"] # shape: (num_tiles, 682)
	tile_paths = data["paths"] # e.g. "image_dataset/21837.jpg"
	print(f"Loaded {len(tile_paths)} tile features from tile_features.pkl")
	except Exception as e:
	print("Error loading tile features from local file:", e)
	tile_features = None
	tile_paths = None

	try:
	with open("tile_images_raw.pkl", "rb") as f:
	raw_images_dict = pickle.load(f)
	print(f"Loaded raw images dictionary with {len(raw_images_dict)} entries.")
	except Exception as e:
	print("Error loading raw images dictionary:", e)
	raw_images_dict = {}

	def get_tile_image(tile_path):
	"""
	Given a tile image path from the features pickle (e.g. "image_dataset\\21837.jpg"),
	decode it from the raw_images_dict. Expects tile to be ~150×150.
	"""
	fixed_path = tile_path.replace("\\", "/").strip()
	if fixed_path in raw_images_dict:
	raw_bytes = raw_images_dict[fixed_path]
	np_arr = np.frombuffer(raw_bytes, np.uint8)
	img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
	if img is None:
	print(f"cv2.imdecode failed for: {fixed_path}")
	return img
	else:
	print(f"Tile image '{fixed_path}' not found.")
	return None

	# ---------------------------------------------------------------------
	# Mosaic Generation Function (No tile scaling, with Output Scale)
	# ---------------------------------------------------------------------
	def mosaic_generator(user_img, block_size, output_scale=1.0,
	weight_avg_color=1.0, weight_hsv_hist=1.0,
	weight_lbp=1.0, weight_glcm=1.0, weight_hog=1.0):
	"""
	Create a photomosaic using 150×150 tiles with no tile scaling.
	For each block (block_size x block_size) in the cropped user image, compute the selected features
	and perform a weighted linear search over the tile_features subset.

	Each block is replaced by one 150×150 tile, so the final mosaic dimensions are:
	(grid_rows * 150) x (grid_cols * 150).

	The final mosaic is optionally rescaled by output_scale (range: 0.1 to 1.0; default 1.0).

	Performance metrics (MSE, SSIM, PSNR) compare the original cropped image with a downsized version
	of the mosaic.
	"""
	start_time = time.time()

	# Build a dictionary of feature weights.
	feature_weights = {
	"Average Color (Color, Fast)": weight_avg_color,
	"HSV Histogram (Color Dist., Slow)": weight_hsv_hist,
	"LBP Histogram (Texture, Normal)": weight_lbp,
	"GLCM Features (Texture Stats, Very Slow)": weight_glcm,
	"HOG Features (Edges/Shapes, Normal)": weight_hog
	}
	effective_features = [f for f in FEATURE_ORDER if feature_weights.get(f, 0) > 0]
	if not effective_features:
	return "Error: All features have weight = 0. Please enable at least one feature.", ""

	# Build the tile_feature subset for only the selected features.
	selected_indices = []
	weights_list = []
	for feat in FEATURE_ORDER:
	if feat in effective_features:
	start_idx, end_idx = FEATURES[feat]["range"]
	selected_indices.extend(range(start_idx, end_idx))
	w = feature_weights[feat]
	weights_list.extend([w] * (end_idx - start_idx))
	weights_vector = np.array(weights_list, dtype=np.float32)

	if tile_features is None or tile_paths is None:
	return "Error: Tile features are not loaded or incompatible.", ""
	tile_subset = tile_features[:, selected_indices].astype(np.float32)

	# Crop the user image to multiples of block_size.
	user_img_bgr = cv2.cvtColor(np.array(user_img), cv2.COLOR_RGB2BGR)
	h, w, _ = user_img_bgr.shape
	new_h = (h // block_size) * block_size
	new_w = (w // block_size) * block_size
	user_img_bgr = user_img_bgr[:new_h, :new_w]
	grid_rows = new_h // block_size
	grid_cols = new_w // block_size

	# Save a copy in RGB for final metrics.
	original_cropped_rgb = cv2.cvtColor(user_img_bgr, cv2.COLOR_BGR2RGB)

	mosaic_grid = []
	progress = gr.Progress() # Row-by-row progress bar

	for row in range(grid_rows):
	row_tiles = []
	for col in range(grid_cols):
	y = row * block_size
	x = col * block_size
	block = user_img_bgr[y:y+block_size, x:x+block_size]

	# Compute only the selected features from this block.
	query_feats = get_selected_features(block, effective_features)
	if query_feats.size == 0:
	best_tile = np.zeros((150, 150, 3), dtype=np.uint8)
	row_tiles.append(best_tile)
	continue
	query_feats = query_feats.reshape(1, -1)
	query_weighted = query_feats * weights_vector
	tile_subset_weighted = tile_subset * weights_vector

	dists = np.linalg.norm(tile_subset_weighted - query_weighted, axis=1)
	best_idx = np.argmin(dists)

	best_tile_path = tile_paths[best_idx]
	best_tile = get_tile_image(best_tile_path)
	if best_tile is None:
	best_tile = np.zeros((150, 150, 3), dtype=np.uint8)
	else:
	if best_tile.shape[:2] != (150, 150):
	best_tile = cv2.resize(best_tile, (150, 150), interpolation=cv2.INTER_AREA)
	row_tiles.append(best_tile)

	row_image = np.hstack(row_tiles)
	mosaic_grid.append(row_image)
	progress((row + 1) / grid_rows, desc=f"Processed row {row+1}/{grid_rows}")

	mosaic_bgr = np.vstack(mosaic_grid)
	mosaic_rgb = cv2.cvtColor(mosaic_bgr, cv2.COLOR_BGR2RGB)

	# Rescale mosaic output if output_scale is not 1.0.
	if output_scale != 1.0:
	out_w = int(mosaic_rgb.shape[1] * output_scale)
	out_h = int(mosaic_rgb.shape[0] * output_scale)
	mosaic_rgb = cv2.resize(mosaic_rgb, (out_w, out_h), interpolation=cv2.INTER_LINEAR)

	end_time = time.time()
	processing_time = end_time - start_time
	total_blocks = grid_rows * grid_cols

	# For performance metrics, downsize the mosaic to match original cropped dimensions.
	orig_h, orig_w, _ = original_cropped_rgb.shape
	mosaic_resized_for_metrics = cv2.resize(mosaic_rgb, (orig_w, orig_h), interpolation=cv2.INTER_AREA)

	mse_val = np.mean((original_cropped_rgb.astype(np.float32) - mosaic_resized_for_metrics.astype(np.float32)) ** 2)
	ssim_val = ssim(original_cropped_rgb, mosaic_resized_for_metrics, channel_axis=-1, win_size=3)
	psnr_val = psnr(original_cropped_rgb, mosaic_resized_for_metrics)

	metrics = (
	f"Processing Time: {processing_time:.2f} seconds\n"
	f"Grid Dimensions: {grid_rows} rows x {grid_cols} columns\n"
	f"Total Blocks Processed: {total_blocks}\n"
	f"MSE: {mse_val:.2f}\n"
	f"SSIM: {ssim_val:.4f}\n"
	f"PSNR: {psnr_val:.2f} dB\n"
	)

	return mosaic_rgb, metrics

	# ---------------------------------------------------------------------
	# Gradio Interface
	# ---------------------------------------------------------------------
	iface = gr.Interface(
	fn=mosaic_generator,
	cache_examples=True,
	inputs=[
	gr.Image(type="pil", label="Upload Your Image"),
	gr.Slider(minimum=1, maximum=32, step=1, value=20,
	label="Block Size (px) for Feature Extraction"),
	gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=1.0,
	label="Output Scale (0.1 to 1.0)"),
	# Feature priority sliders:
	gr.Slider(minimum=0.0, maximum=5.0, step=0.1, value=3.5,
	label="Priority for Average Color (Fast)"),
	gr.Slider(minimum=0.0, maximum=5.0, step=0.1, value=5.0,
	label="Priority for HSV Histogram (Slow)"),
	gr.Slider(minimum=0.0, maximum=5.0, step=0.1, value=0.2,
	label="Priority for LBP Histogram (Normal)"),
	gr.Slider(minimum=0.0, maximum=5.0, step=0.1, value=0.2,
	label="Priority for GLCM Features (Very Slow)"),
	gr.Slider(minimum=0.0, maximum=5.0, step=0.1, value=0.2,
	label="Priority for HOG Features (Normal)")
	],
	outputs=[
	gr.Image(type="numpy", label="Mosaic Image", format="png"),
	gr.Textbox(label="Performance Metrics")
	],
	title="Photomosaic Generator",
	description=(
	"Turn your image into a mesmerizing photomosaic, crafted from carefully selected 150×150 tiles. Each block is replaced with the best-matching tile, preserving the essence of your original picture. Customize the look by adjusting feature priorities and output scale. The final mosaic captures intricate details while maintaining artistic harmony, creating a unique visual story."
	),
	examples=[
	# For each sample image, all examples use an output scale of 0.1.
	# -- SAMPLE (1).png --
	[
	"samples/sample (1).png",
	20,
	0.1, # Output Scale set to 0.1
	5.0, # Priority for Average Color only
	0.0, # HSV
	0.0, # LBP
	0.0, # GLCM
	0.0 # HOG
	],
	[
	"samples/sample (1).png",
	20,
	0.1, # Output Scale set to 0.1
	0.0, # Priority for Average Color
	5.0, # Priority for HSV only
	0.0, # LBP
	0.0, # GLCM
	0.0 # HOG
	],
	[
	"samples/sample (1).png",
	20,
	0.1, # Output Scale set to 0.1
	3.5, # Combination: avg=3.5, hsv=5, rest=0.2
	5.0,
	0.2,
	0.2,
	0.2
	],
	# -- SAMPLE (2).jpg --
	[
	"samples/sample (2).jpg",
	20,
	0.1,
	5.0,
	0.0,
	0.0,
	0.0,
	0.0
	],
	[
	"samples/sample (2).jpg",
	20,
	0.1,
	0.0,
	5.0,
	0.0,
	0.0,
	0.0
	],
	[
	"samples/sample (2).jpg",
	20,
	0.1,
	3.5,
	5.0,
	0.2,
	0.2,
	0.2
	],
	# -- SAMPLE (3).jpg --
	[
	"samples/sample (3).jpg",
	20,
	0.1,
	5.0,
	0.0,
	0.0,
	0.0,
	0.0
	],
	[
	"samples/sample (3).jpg",
	20,
	0.1,
	0.0,
	5.0,
	0.0,
	0.0,
	0.0
	],
	[
	"samples/sample (3).jpg",
	20,
	0.1,
	3.5,
	5.0,
	0.2,
	0.2,
	0.2
	],
	# -- SAMPLE (4).webp --
	[
	"samples/sample (4).webp",
	20,
	0.1,
	5.0,
	0.0,
	0.0,
	0.0,
	0.0
	],
	[
	"samples/sample (4).webp",
	20,
	0.1,
	0.0,
	5.0,
	0.0,
	0.0,
	0.0
	],
	[
	"samples/sample (4).webp",
	20,
	0.1,
	3.5,
	5.0,
	0.2,
	0.2,
	0.2
	],
	# -- SAMPLE (5).jpg --
	[
	"samples/sample (5).jpg",
	20,
	0.1,
	5.0,
	0.0,
	0.0,
	0.0,
	0.0
	],
	[
	"samples/sample (5).jpg",
	20,
	0.1,
	0.0,
	5.0,
	0.0,
	0.0,
	0.0
	],
	[
	"samples/sample (5).jpg",
	20,
	0.1,
	3.5,
	5.0,
	0.2,
	0.2,
	0.2
	]
	]
	)

	if __name__ == "__main__":
	iface.launch()