Hacker News new | ask | show | jobs
by mbil 1476 days ago
Good question. I thought of an algorithm to find the hex word colors that most closely match the color they represent...

Basically: get the color of the _word_ by doing an image search and extracting the dominant color from the first image result. Then compare that color to the hex color (in CIELAB color space) to get the color difference. Track the hex words with lowest difference.

Here's a rough impl in Python:

  import sys
  import os
  import heapq
  
  import requests
  from google.cloud import vision
  
  from colormath.color_objects import sRGBColor, LabColor
  from colormath.color_conversions import convert_color
  from colormath.color_diff import delta_e_cie2000
  
  from bs4 import BeautifulSoup
  
  def download_image_for_query(query):
      search_key = os.getenv("GOOGLE_CUSTOM_SEARCH_ENGINE_KEY")
      search_engine_id = os.getenv("GOOGLE_CUSTOM_SEARCH_ENGINE_ID")
      resp = requests.get(
          f"https://www.googleapis.com/customsearch/v1?key={search_key}&cx={search_engine_id}&q={query}&searchType=image"
      )
      img_url = resp.json()["items"][0]["link"]
      img_content = requests.get(img_url).content
      return img_content
  
  def dominant_rgb_colors(image_content, num_colors=1):
      vision_client = vision.ImageAnnotatorClient()
      image = vision.Image(content=image_content)
      response = vision_client.annotate_image({"image": image})
      return [
          (int(c.color.red), int(c.color.green), int(c.color.blue))
          for c in response.image_properties_annotation.dominant_colors.colors
      ][:num_colors]
  
  # class to store hex word and calculate the difference from "true" image, based on image search
  class HeapibleHexWord:
      def generate_rgb_version(self):
          return tuple(int(self.hex_version.lstrip("#")[i:i+2], 16) for i in (0, 2, 4))
          
      def __init__(self, hex_version, english_version):
          self.hex_version = hex_version
          self.english_version = english_version
          self.rgb_version = self.generate_rgb_version()
          self.delta_from_true = float("inf")
          
      def calculate_delta_from_true(self):
          true_rgb = sRGBColor(*self.rgb_version)
          query = self.english_version
          img_content = download_image_for_query(query)
          test_rgb = sRGBColor(*dominant_rgb_colors(img_content)[0])
         
          # via http://hanzratech.in/2015/01/16/color-difference-between-2-colors-using-python.html
          delta = delta_e_cie2000(
              convert_color(true_rgb, LabColor),
              convert_color(test_rgb, LabColor)
          )
          self.delta_from_true = delta
          
      def __lt__(self, other):
          return self.delta_from_true < other.delta_from_true
  
  # scrape words from hexwords site
  hexwords_url = 'https://hexwords.netlify.app/'
  
  hexwords_page = requests.get(hexwords_url)
  soup = BeautifulSoup(hexwords_page.text, 'html.parser')
  
  buttons = soup.find_all('button', class_='svelte-1m9ptdb')
  
  hex_words = []
  for button in buttons:
      if button.text == '':
          continue
      hex_version, _, english_version = button.text.split("\n")
      hex_words.append((hex_version, english_version.replace("(", "").replace(")", "")))
  
  # iterate over the hex words, calculating the color diff between the word's hex
  # color and the "true" color based on image search
  
  hex_words_heap = []
  heapq.heapify(hex_words_heap)
  
  NUM_WORDS = 10 # looks like throttled at some point
  
  for i, hex_word in enumerate(hex_words[:NUM_WORDS]):
      print(f"working on {i}: {hex_word}")
      heapible_hex_word = HeapibleHexWord(*hex_word)
      heapible_hex_word.calculate_delta_from_true()
      heapq.heappush(hex_words_heap, heapible_hex_word)
  
  # popping from the min heap yields hexwords with smallers difference from true color
  heapq.heappop(hex_words_heap).english_version
3 comments

I sometimes wish HN had the equivalent of Reddit Gold. Well done.
Alas, I'm gonna bet a lot of images are an object in the center and plenty of white or other background around it.
awesome! Agree with previous comment, this is cool