Add IKEA furniture catalog with 41 items and tabbed browse UI
- Create data/ikea-catalog.json with 41 curated IKEA items across 23 series (KALLAX, BILLY, MALM, PAX, HEMNES, LACK, etc.) with verified dimensions - Add source tabs (All/Standard/IKEA) to catalog panel for filtering - Add IKEA series filter bar when viewing IKEA items - Add IKEA badge and series label on item cards - Add mergeCatalog() to renderer for loading additional catalog files - Add scripts/import-ikea-hf.js for importing from HuggingFace dataset
This commit is contained in:
361
scripts/import-ikea-hf.js
Normal file
361
scripts/import-ikea-hf.js
Normal file
@@ -0,0 +1,361 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* IKEA HuggingFace Dataset Importer
|
||||
*
|
||||
* Fetches product data from the tsazan/ikea-us-commercetxt dataset on HuggingFace
|
||||
* and converts items with valid dimensions into our catalog JSON format.
|
||||
*
|
||||
* Usage:
|
||||
* node scripts/import-ikea-hf.js [--limit N] [--output path]
|
||||
*
|
||||
* The HuggingFace dataset stores products in CommerceTXT format where each row
|
||||
* is a line of text. Products are spread across multiple rows with sections like
|
||||
* @PRODUCT, @SPECS, @IMAGES. This script streams through rows, reassembles
|
||||
* product records, extracts dimensions, and generates procedural box meshes.
|
||||
*/
|
||||
|
||||
const DATASET = 'tsazan/ikea-us-commercetxt';
|
||||
const API_BASE = 'https://datasets-server.huggingface.co';
|
||||
const BATCH_SIZE = 100;
|
||||
|
||||
// Category mapping from IKEA categories to our catalog categories
|
||||
const CATEGORY_MAP = {
|
||||
'sofas': 'seating',
|
||||
'armchairs': 'seating',
|
||||
'chairs': 'seating',
|
||||
'dining chairs': 'seating',
|
||||
'office chairs': 'office',
|
||||
'desk chairs': 'office',
|
||||
'desks': 'tables',
|
||||
'dining tables': 'tables',
|
||||
'coffee tables': 'tables',
|
||||
'side tables': 'tables',
|
||||
'console tables': 'tables',
|
||||
'nightstands': 'tables',
|
||||
'bedside tables': 'tables',
|
||||
'bookcases': 'storage',
|
||||
'shelving units': 'storage',
|
||||
'shelf units': 'storage',
|
||||
'dressers': 'storage',
|
||||
'chests of drawers': 'storage',
|
||||
'wardrobes': 'storage',
|
||||
'tv stands': 'storage',
|
||||
'tv benches': 'storage',
|
||||
'sideboards': 'storage',
|
||||
'cabinets': 'storage',
|
||||
'beds': 'beds',
|
||||
'bed frames': 'beds',
|
||||
'kitchen cabinets': 'kitchen',
|
||||
'kitchen islands': 'kitchen',
|
||||
'base cabinets': 'kitchen',
|
||||
'wall cabinets': 'kitchen',
|
||||
};
|
||||
|
||||
// Room mapping based on category
|
||||
const ROOM_MAP = {
|
||||
'seating': ['wohnzimmer'],
|
||||
'tables': ['wohnzimmer', 'esszimmer'],
|
||||
'storage': ['wohnzimmer', 'arbeitszimmer'],
|
||||
'beds': ['schlafzimmer'],
|
||||
'kitchen': ['kueche'],
|
||||
'office': ['arbeitszimmer'],
|
||||
};
|
||||
|
||||
// Parse dimension string like '23⅝"' or '50¾"' to meters
|
||||
function parseInchDim(str) {
|
||||
if (!str) return null;
|
||||
str = str.trim().replace(/"/g, '').replace(/'/g, '');
|
||||
|
||||
// Handle fractions like ⅝, ¾, ½, ¼, ⅜, ⅞
|
||||
const fractions = { '⅛': 0.125, '¼': 0.25, '⅜': 0.375, '½': 0.5, '⅝': 0.625, '¾': 0.75, '⅞': 0.875 };
|
||||
let value = 0;
|
||||
|
||||
for (const [frac, num] of Object.entries(fractions)) {
|
||||
if (str.includes(frac)) {
|
||||
str = str.replace(frac, '');
|
||||
value += num;
|
||||
}
|
||||
}
|
||||
|
||||
const numPart = parseFloat(str);
|
||||
if (!isNaN(numPart)) value += numPart;
|
||||
|
||||
// Convert inches to meters
|
||||
return value > 0 ? Math.round(value * 0.0254 * 1000) / 1000 : null;
|
||||
}
|
||||
|
||||
// Parse a dimensions line from @SPECS section
|
||||
// Examples: "Width: 23⅝" and 50¾".", "Height: 29½"", "Depth: 15⅜""
|
||||
function parseDimensions(specsLines) {
|
||||
let width = null, height = null, depth = null;
|
||||
|
||||
for (const line of specsLines) {
|
||||
const lower = line.toLowerCase();
|
||||
|
||||
// Try "Width: X" pattern
|
||||
const wMatch = line.match(/Width:\s*([^,.\n]+)/i);
|
||||
if (wMatch) {
|
||||
// Take first value if multiple ("23⅝" and 50¾"")
|
||||
const parts = wMatch[1].split(/\s+and\s+/);
|
||||
width = parseInchDim(parts[parts.length - 1]); // take largest
|
||||
}
|
||||
|
||||
const hMatch = line.match(/Height:\s*([^,.\n]+)/i);
|
||||
if (hMatch) {
|
||||
const parts = hMatch[1].split(/\s+and\s+/);
|
||||
height = parseInchDim(parts[parts.length - 1]);
|
||||
}
|
||||
|
||||
const dMatch = line.match(/Depth:\s*([^,.\n]+)/i);
|
||||
if (dMatch) {
|
||||
const parts = dMatch[1].split(/\s+and\s+/);
|
||||
depth = parseInchDim(parts[parts.length - 1]);
|
||||
}
|
||||
|
||||
// Also try "WxDxH" or "W"xD"xH"" pattern
|
||||
const xMatch = line.match(/(\d+[⅛¼⅜½⅝¾⅞]?)"?\s*x\s*(\d+[⅛¼⅜½⅝¾⅞]?)"?\s*x\s*(\d+[⅛¼⅜½⅝¾⅞]?)"/i);
|
||||
if (xMatch) {
|
||||
width = width || parseInchDim(xMatch[1]);
|
||||
depth = depth || parseInchDim(xMatch[2]);
|
||||
height = height || parseInchDim(xMatch[3]);
|
||||
}
|
||||
}
|
||||
|
||||
if (width && height && depth) {
|
||||
return { width, depth, height };
|
||||
}
|
||||
// At minimum need width and one other
|
||||
if (width && (height || depth)) {
|
||||
return {
|
||||
width,
|
||||
depth: depth || Math.round(width * 0.5 * 1000) / 1000,
|
||||
height: height || Math.round(width * 0.8 * 1000) / 1000
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Generate a simple procedural box mesh from dimensions
|
||||
function generateMesh(dims, category) {
|
||||
const { width, depth, height } = dims;
|
||||
const color = {
|
||||
seating: '#7a8a9a',
|
||||
tables: '#b09870',
|
||||
storage: '#f0ece4',
|
||||
beds: '#f5f0eb',
|
||||
kitchen: '#e0dcd4',
|
||||
office: '#cccccc',
|
||||
}[category] || '#aaaaaa';
|
||||
|
||||
return {
|
||||
type: 'group',
|
||||
parts: [
|
||||
{
|
||||
name: 'body',
|
||||
geometry: 'box',
|
||||
size: [width, height, depth],
|
||||
position: [0, height / 2, 0],
|
||||
color
|
||||
}
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
// Generate slug ID from product name
|
||||
function slugify(name) {
|
||||
return 'ikea-hf-' + name
|
||||
.toLowerCase()
|
||||
.replace(/[äöü]/g, c => ({ 'ä': 'ae', 'ö': 'oe', 'ü': 'ue' }[c]))
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/(^-|-$)/g, '')
|
||||
.slice(0, 50);
|
||||
}
|
||||
|
||||
// Guess category from product name/context
|
||||
function guessCategory(name, contextCategory) {
|
||||
const lower = name.toLowerCase();
|
||||
if (/sofa|couch|loveseat/i.test(lower)) return 'seating';
|
||||
if (/chair|armchair|stool/i.test(lower)) return 'seating';
|
||||
if (/desk|table/i.test(lower)) return 'tables';
|
||||
if (/shelf|bookcase|shelving|kallax|billy/i.test(lower)) return 'storage';
|
||||
if (/dresser|drawer|wardrobe|pax|malm.*drawer/i.test(lower)) return 'storage';
|
||||
if (/tv.*bench|tv.*stand|besta|bestå/i.test(lower)) return 'storage';
|
||||
if (/bed|mattress/i.test(lower)) return 'beds';
|
||||
if (/cabinet|kitchen|metod|knoxhult/i.test(lower)) return 'kitchen';
|
||||
if (/office/i.test(lower)) return 'office';
|
||||
|
||||
// Try context category
|
||||
for (const [key, cat] of Object.entries(CATEGORY_MAP)) {
|
||||
if (contextCategory && contextCategory.toLowerCase().includes(key)) return cat;
|
||||
}
|
||||
|
||||
return 'storage'; // default
|
||||
}
|
||||
|
||||
// Extract IKEA series name from product name
|
||||
function extractSeries(name) {
|
||||
// IKEA series are typically the first all-caps word
|
||||
const match = name.match(/^([A-ZÅÄÖ]{2,})/);
|
||||
return match ? match[1] : null;
|
||||
}
|
||||
|
||||
async function fetchRows(offset, length) {
|
||||
const url = `${API_BASE}/rows?dataset=${DATASET}&config=default&split=train&offset=${offset}&length=${length}`;
|
||||
const resp = await fetch(url);
|
||||
if (!resp.ok) throw new Error(`API error: ${resp.status}`);
|
||||
const data = await resp.json();
|
||||
return data.rows?.map(r => r.row?.text || '') || [];
|
||||
}
|
||||
|
||||
async function importDataset(maxItems = 50) {
|
||||
console.error(`Fetching IKEA products from HuggingFace (limit: ${maxItems})...`);
|
||||
|
||||
const items = [];
|
||||
const seenIds = new Set();
|
||||
let offset = 0;
|
||||
let currentProduct = null;
|
||||
let currentSection = null;
|
||||
let currentCategory = null;
|
||||
let specsLines = [];
|
||||
let totalRows = 0;
|
||||
|
||||
// Process in batches
|
||||
while (items.length < maxItems) {
|
||||
let rows;
|
||||
try {
|
||||
rows = await fetchRows(offset, BATCH_SIZE);
|
||||
} catch (e) {
|
||||
console.error(` Fetch error at offset ${offset}: ${e.message}`);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!rows || rows.length === 0) break;
|
||||
totalRows += rows.length;
|
||||
|
||||
for (const line of rows) {
|
||||
// Track sections
|
||||
if (line.startsWith('# @CATEGORY')) {
|
||||
currentSection = 'category';
|
||||
continue;
|
||||
}
|
||||
if (line.startsWith('# @PRODUCT')) {
|
||||
currentSection = 'product';
|
||||
currentProduct = {};
|
||||
specsLines = [];
|
||||
continue;
|
||||
}
|
||||
if (line.startsWith('# @SPECS')) {
|
||||
currentSection = 'specs';
|
||||
continue;
|
||||
}
|
||||
if (line.startsWith('# @FILTERS')) {
|
||||
currentSection = 'filters';
|
||||
continue;
|
||||
}
|
||||
if (line.startsWith('# @ITEMS')) {
|
||||
currentSection = 'items';
|
||||
continue;
|
||||
}
|
||||
if (line.startsWith('# @IMAGES')) {
|
||||
currentSection = 'images';
|
||||
continue;
|
||||
}
|
||||
if (line === '---' || line.startsWith('# DISCLAIMER')) {
|
||||
// End of product — process if we have one
|
||||
if (currentProduct && currentProduct.name) {
|
||||
const dims = parseDimensions(specsLines);
|
||||
if (dims && dims.width > 0.1 && dims.height > 0.1) {
|
||||
const category = guessCategory(currentProduct.name, currentCategory);
|
||||
const id = slugify(currentProduct.name);
|
||||
|
||||
if (!seenIds.has(id)) {
|
||||
seenIds.add(id);
|
||||
items.push({
|
||||
id,
|
||||
name: currentProduct.name,
|
||||
ikeaSeries: extractSeries(currentProduct.name),
|
||||
sku: currentProduct.sku || null,
|
||||
category,
|
||||
rooms: ROOM_MAP[category] || [],
|
||||
dimensions: dims,
|
||||
mesh: generateMesh(dims, category)
|
||||
});
|
||||
|
||||
if (items.length >= maxItems) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
currentProduct = null;
|
||||
currentSection = line.startsWith('# DISCLAIMER') ? 'disclaimer' : null;
|
||||
specsLines = [];
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse line content based on section
|
||||
if (currentSection === 'category') {
|
||||
const nameMatch = line.match(/^Name:\s*(.+)/);
|
||||
if (nameMatch) currentCategory = nameMatch[1].trim();
|
||||
}
|
||||
|
||||
if (currentSection === 'product' && currentProduct) {
|
||||
const nameMatch = line.match(/^Name:\s*(.+)/);
|
||||
if (nameMatch) currentProduct.name = nameMatch[1].trim();
|
||||
const skuMatch = line.match(/^SKU:\s*(.+)/);
|
||||
if (skuMatch) currentProduct.sku = skuMatch[1].trim();
|
||||
}
|
||||
|
||||
if (currentSection === 'specs') {
|
||||
if (line.trim()) specsLines.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
if (items.length >= maxItems) break;
|
||||
offset += BATCH_SIZE;
|
||||
|
||||
// Safety limit: don't scan more than 100k rows
|
||||
if (offset > 100000) {
|
||||
console.error(` Reached scan limit at ${offset} rows`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
console.error(` Scanned ${totalRows} rows, extracted ${items.length} items with dimensions`);
|
||||
return items;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
let limit = 100;
|
||||
let outputPath = null;
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
if (args[i] === '--limit' && args[i + 1]) limit = parseInt(args[i + 1], 10);
|
||||
if (args[i] === '--output' && args[i + 1]) outputPath = args[i + 1];
|
||||
}
|
||||
|
||||
const items = await importDataset(limit);
|
||||
|
||||
const catalog = {
|
||||
version: '1.0',
|
||||
source: 'huggingface-ikea-us-commercetxt',
|
||||
units: 'meters',
|
||||
description: `Imported from HuggingFace dataset tsazan/ikea-us-commercetxt (${items.length} items)`,
|
||||
categories: [...new Set(items.map(i => i.category))].sort(),
|
||||
items
|
||||
};
|
||||
|
||||
const json = JSON.stringify(catalog, null, 2);
|
||||
|
||||
if (outputPath) {
|
||||
const fs = await import('fs');
|
||||
fs.writeFileSync(outputPath, json);
|
||||
console.error(`Wrote ${items.length} items to ${outputPath}`);
|
||||
} else {
|
||||
process.stdout.write(json);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(e => {
|
||||
console.error('Error:', e.message);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user