Files
house-design/scripts/import-ikea-hf.js
m ceea42ac1d Add IKEA furniture catalog with 41 items and tabbed browse UI
- Create data/ikea-catalog.json with 41 curated IKEA items across 23 series
  (KALLAX, BILLY, MALM, PAX, HEMNES, LACK, etc.) with verified dimensions
- Add source tabs (All/Standard/IKEA) to catalog panel for filtering
- Add IKEA series filter bar when viewing IKEA items
- Add IKEA badge and series label on item cards
- Add mergeCatalog() to renderer for loading additional catalog files
- Add scripts/import-ikea-hf.js for importing from HuggingFace dataset
2026-02-07 12:58:52 +01:00

362 lines
11 KiB
JavaScript

#!/usr/bin/env node
/**
* IKEA HuggingFace Dataset Importer
*
* Fetches product data from the tsazan/ikea-us-commercetxt dataset on HuggingFace
* and converts items with valid dimensions into our catalog JSON format.
*
* Usage:
* node scripts/import-ikea-hf.js [--limit N] [--output path]
*
* The HuggingFace dataset stores products in CommerceTXT format where each row
* is a line of text. Products are spread across multiple rows with sections like
* @PRODUCT, @SPECS, @IMAGES. This script streams through rows, reassembles
* product records, extracts dimensions, and generates procedural box meshes.
*/
const DATASET = 'tsazan/ikea-us-commercetxt';
const API_BASE = 'https://datasets-server.huggingface.co';
const BATCH_SIZE = 100;
// Category mapping from IKEA categories to our catalog categories
const CATEGORY_MAP = {
'sofas': 'seating',
'armchairs': 'seating',
'chairs': 'seating',
'dining chairs': 'seating',
'office chairs': 'office',
'desk chairs': 'office',
'desks': 'tables',
'dining tables': 'tables',
'coffee tables': 'tables',
'side tables': 'tables',
'console tables': 'tables',
'nightstands': 'tables',
'bedside tables': 'tables',
'bookcases': 'storage',
'shelving units': 'storage',
'shelf units': 'storage',
'dressers': 'storage',
'chests of drawers': 'storage',
'wardrobes': 'storage',
'tv stands': 'storage',
'tv benches': 'storage',
'sideboards': 'storage',
'cabinets': 'storage',
'beds': 'beds',
'bed frames': 'beds',
'kitchen cabinets': 'kitchen',
'kitchen islands': 'kitchen',
'base cabinets': 'kitchen',
'wall cabinets': 'kitchen',
};
// Room mapping based on category
const ROOM_MAP = {
'seating': ['wohnzimmer'],
'tables': ['wohnzimmer', 'esszimmer'],
'storage': ['wohnzimmer', 'arbeitszimmer'],
'beds': ['schlafzimmer'],
'kitchen': ['kueche'],
'office': ['arbeitszimmer'],
};
// Parse dimension string like '23⅝"' or '50¾"' to meters
function parseInchDim(str) {
if (!str) return null;
str = str.trim().replace(/"/g, '').replace(/'/g, '');
// Handle fractions like ⅝, ¾, ½, ¼, ⅜, ⅞
const fractions = { '⅛': 0.125, '¼': 0.25, '⅜': 0.375, '½': 0.5, '⅝': 0.625, '¾': 0.75, '⅞': 0.875 };
let value = 0;
for (const [frac, num] of Object.entries(fractions)) {
if (str.includes(frac)) {
str = str.replace(frac, '');
value += num;
}
}
const numPart = parseFloat(str);
if (!isNaN(numPart)) value += numPart;
// Convert inches to meters
return value > 0 ? Math.round(value * 0.0254 * 1000) / 1000 : null;
}
// Parse a dimensions line from @SPECS section
// Examples: "Width: 23⅝" and 50¾".", "Height: 29½"", "Depth: 15⅜""
function parseDimensions(specsLines) {
let width = null, height = null, depth = null;
for (const line of specsLines) {
const lower = line.toLowerCase();
// Try "Width: X" pattern
const wMatch = line.match(/Width:\s*([^,.\n]+)/i);
if (wMatch) {
// Take first value if multiple ("23⅝" and 50¾"")
const parts = wMatch[1].split(/\s+and\s+/);
width = parseInchDim(parts[parts.length - 1]); // take largest
}
const hMatch = line.match(/Height:\s*([^,.\n]+)/i);
if (hMatch) {
const parts = hMatch[1].split(/\s+and\s+/);
height = parseInchDim(parts[parts.length - 1]);
}
const dMatch = line.match(/Depth:\s*([^,.\n]+)/i);
if (dMatch) {
const parts = dMatch[1].split(/\s+and\s+/);
depth = parseInchDim(parts[parts.length - 1]);
}
// Also try "WxDxH" or "W"xD"xH"" pattern
const xMatch = line.match(/(\d+[⅛¼⅜½⅝¾⅞]?)"?\s*x\s*(\d+[⅛¼⅜½⅝¾⅞]?)"?\s*x\s*(\d+[⅛¼⅜½⅝¾⅞]?)"/i);
if (xMatch) {
width = width || parseInchDim(xMatch[1]);
depth = depth || parseInchDim(xMatch[2]);
height = height || parseInchDim(xMatch[3]);
}
}
if (width && height && depth) {
return { width, depth, height };
}
// At minimum need width and one other
if (width && (height || depth)) {
return {
width,
depth: depth || Math.round(width * 0.5 * 1000) / 1000,
height: height || Math.round(width * 0.8 * 1000) / 1000
};
}
return null;
}
// Generate a simple procedural box mesh from dimensions
function generateMesh(dims, category) {
const { width, depth, height } = dims;
const color = {
seating: '#7a8a9a',
tables: '#b09870',
storage: '#f0ece4',
beds: '#f5f0eb',
kitchen: '#e0dcd4',
office: '#cccccc',
}[category] || '#aaaaaa';
return {
type: 'group',
parts: [
{
name: 'body',
geometry: 'box',
size: [width, height, depth],
position: [0, height / 2, 0],
color
}
]
};
}
// Generate slug ID from product name
function slugify(name) {
return 'ikea-hf-' + name
.toLowerCase()
.replace(/[äöü]/g, c => ({ 'ä': 'ae', 'ö': 'oe', 'ü': 'ue' }[c]))
.replace(/[^a-z0-9]+/g, '-')
.replace(/(^-|-$)/g, '')
.slice(0, 50);
}
// Guess category from product name/context
function guessCategory(name, contextCategory) {
const lower = name.toLowerCase();
if (/sofa|couch|loveseat/i.test(lower)) return 'seating';
if (/chair|armchair|stool/i.test(lower)) return 'seating';
if (/desk|table/i.test(lower)) return 'tables';
if (/shelf|bookcase|shelving|kallax|billy/i.test(lower)) return 'storage';
if (/dresser|drawer|wardrobe|pax|malm.*drawer/i.test(lower)) return 'storage';
if (/tv.*bench|tv.*stand|besta|bestå/i.test(lower)) return 'storage';
if (/bed|mattress/i.test(lower)) return 'beds';
if (/cabinet|kitchen|metod|knoxhult/i.test(lower)) return 'kitchen';
if (/office/i.test(lower)) return 'office';
// Try context category
for (const [key, cat] of Object.entries(CATEGORY_MAP)) {
if (contextCategory && contextCategory.toLowerCase().includes(key)) return cat;
}
return 'storage'; // default
}
// Extract IKEA series name from product name
function extractSeries(name) {
// IKEA series are typically the first all-caps word
const match = name.match(/^([A-ZÅÄÖ]{2,})/);
return match ? match[1] : null;
}
async function fetchRows(offset, length) {
const url = `${API_BASE}/rows?dataset=${DATASET}&config=default&split=train&offset=${offset}&length=${length}`;
const resp = await fetch(url);
if (!resp.ok) throw new Error(`API error: ${resp.status}`);
const data = await resp.json();
return data.rows?.map(r => r.row?.text || '') || [];
}
async function importDataset(maxItems = 50) {
console.error(`Fetching IKEA products from HuggingFace (limit: ${maxItems})...`);
const items = [];
const seenIds = new Set();
let offset = 0;
let currentProduct = null;
let currentSection = null;
let currentCategory = null;
let specsLines = [];
let totalRows = 0;
// Process in batches
while (items.length < maxItems) {
let rows;
try {
rows = await fetchRows(offset, BATCH_SIZE);
} catch (e) {
console.error(` Fetch error at offset ${offset}: ${e.message}`);
break;
}
if (!rows || rows.length === 0) break;
totalRows += rows.length;
for (const line of rows) {
// Track sections
if (line.startsWith('# @CATEGORY')) {
currentSection = 'category';
continue;
}
if (line.startsWith('# @PRODUCT')) {
currentSection = 'product';
currentProduct = {};
specsLines = [];
continue;
}
if (line.startsWith('# @SPECS')) {
currentSection = 'specs';
continue;
}
if (line.startsWith('# @FILTERS')) {
currentSection = 'filters';
continue;
}
if (line.startsWith('# @ITEMS')) {
currentSection = 'items';
continue;
}
if (line.startsWith('# @IMAGES')) {
currentSection = 'images';
continue;
}
if (line === '---' || line.startsWith('# DISCLAIMER')) {
// End of product — process if we have one
if (currentProduct && currentProduct.name) {
const dims = parseDimensions(specsLines);
if (dims && dims.width > 0.1 && dims.height > 0.1) {
const category = guessCategory(currentProduct.name, currentCategory);
const id = slugify(currentProduct.name);
if (!seenIds.has(id)) {
seenIds.add(id);
items.push({
id,
name: currentProduct.name,
ikeaSeries: extractSeries(currentProduct.name),
sku: currentProduct.sku || null,
category,
rooms: ROOM_MAP[category] || [],
dimensions: dims,
mesh: generateMesh(dims, category)
});
if (items.length >= maxItems) break;
}
}
}
currentProduct = null;
currentSection = line.startsWith('# DISCLAIMER') ? 'disclaimer' : null;
specsLines = [];
continue;
}
// Parse line content based on section
if (currentSection === 'category') {
const nameMatch = line.match(/^Name:\s*(.+)/);
if (nameMatch) currentCategory = nameMatch[1].trim();
}
if (currentSection === 'product' && currentProduct) {
const nameMatch = line.match(/^Name:\s*(.+)/);
if (nameMatch) currentProduct.name = nameMatch[1].trim();
const skuMatch = line.match(/^SKU:\s*(.+)/);
if (skuMatch) currentProduct.sku = skuMatch[1].trim();
}
if (currentSection === 'specs') {
if (line.trim()) specsLines.push(line);
}
}
if (items.length >= maxItems) break;
offset += BATCH_SIZE;
// Safety limit: don't scan more than 100k rows
if (offset > 100000) {
console.error(` Reached scan limit at ${offset} rows`);
break;
}
}
console.error(` Scanned ${totalRows} rows, extracted ${items.length} items with dimensions`);
return items;
}
async function main() {
const args = process.argv.slice(2);
let limit = 100;
let outputPath = null;
for (let i = 0; i < args.length; i++) {
if (args[i] === '--limit' && args[i + 1]) limit = parseInt(args[i + 1], 10);
if (args[i] === '--output' && args[i + 1]) outputPath = args[i + 1];
}
const items = await importDataset(limit);
const catalog = {
version: '1.0',
source: 'huggingface-ikea-us-commercetxt',
units: 'meters',
description: `Imported from HuggingFace dataset tsazan/ikea-us-commercetxt (${items.length} items)`,
categories: [...new Set(items.map(i => i.category))].sort(),
items
};
const json = JSON.stringify(catalog, null, 2);
if (outputPath) {
const fs = await import('fs');
fs.writeFileSync(outputPath, json);
console.error(`Wrote ${items.length} items to ${outputPath}`);
} else {
process.stdout.write(json);
}
}
main().catch(e => {
console.error('Error:', e.message);
process.exit(1);
});