The OCR pipeline leverages the GPT-4 Vision API to automate the extraction of chart data from scanned images. This process is orchestrated via the ocr_chart_gpt4o.php
and ocr_chart_upload.php
modules.
POST https://api.openai.com/v1/chat/completions Content-Type: application/json Authorization: Bearer <API_KEY> { "model": "gpt-4-vision-preview", "messages": [ {"role": "user", "content": [ {"type": "text", "text": "Extract the chart table as CSV: position, artist, track."}, {"type": "image_url", "image_url": "data:image/png;base64,..."} ]} ] }
Duplicate or variant artist entries are resolved via the data_quality_artists.php
admin tool. This tool provides:
The data_quality_tracks.php
tool identifies and merges duplicate tracks (e.g., typos, alternate spellings) for the same artist. Features include:
Component | Requirement |
---|---|
OS | Windows (tested), Linux compatible |
Web Server | Apache, Nginx, or IIS |
PHP | 7.4 or newer, with curl and fileinfo extensions |
Database | MySQL/MariaDB (InnoDB) |
External APIs | OpenAI GPT-4 Vision, YouTube Data API v3 |
Filesystem | Writable img/ , video/ , db_baks/ directories |
db_baks/
.-- -- Database: `groovecharts` -- -- -------------------------------------------------------- -- -- Table structure for table `artist` -- CREATE TABLE `artist` ( `id` int(11) NOT NULL, `artist_name` varchar(255) NOT NULL ) ENGINE=InnoDB DEFAULT CHARSET=latin1; -- -------------------------------------------------------- -- -- Table structure for table `chart` -- CREATE TABLE `chart` ( `id` int(11) NOT NULL, `chart_date` date NOT NULL ) ENGINE=InnoDB DEFAULT CHARSET=latin1; -- -------------------------------------------------------- -- -- Table structure for table `chartentry` -- CREATE TABLE `chartentry` ( `id` int(11) NOT NULL, `chart_id` int(11) NOT NULL, `chart_position` int(11) NOT NULL, `track_id` int(11) NOT NULL ) ENGINE=InnoDB DEFAULT CHARSET=latin1; -- -------------------------------------------------------- -- -- Table structure for table `track` -- CREATE TABLE `track` ( `id` int(11) NOT NULL, `artist_id` int(11) NOT NULL, `track_name` varchar(255) NOT NULL, `video` varchar(1024) DEFAULT NULL ) ENGINE=InnoDB DEFAULT CHARSET=latin1; -- -- Indexes for dumped tables -- -- -- Indexes for table `artist` -- ALTER TABLE `artist` ADD PRIMARY KEY (`id`); -- -- Indexes for table `chart` -- ALTER TABLE `chart` ADD PRIMARY KEY (`id`); -- -- Indexes for table `chartentry` -- ALTER TABLE `chartentry` ADD PRIMARY KEY (`id`), ADD KEY `chart_id` (`chart_id`), ADD KEY `track_id` (`track_id`); -- -- Indexes for table `track` -- ALTER TABLE `track` ADD PRIMARY KEY (`id`), ADD KEY `artist_id` (`artist_id`); -- -- AUTO_INCREMENT for dumped tables -- -- -- AUTO_INCREMENT for table `artist` -- ALTER TABLE `artist` MODIFY `id` int(11) NOT NULL AUTO_INCREMENT; -- -- AUTO_INCREMENT for table `chart` -- ALTER TABLE `chart` MODIFY `id` int(11) NOT NULL AUTO_INCREMENT; -- -- AUTO_INCREMENT for table `chartentry` -- ALTER TABLE `chartentry` MODIFY `id` int(11) NOT NULL AUTO_INCREMENT; -- -- AUTO_INCREMENT for table `track` -- ALTER TABLE `track` MODIFY `id` int(11) NOT NULL AUTO_INCREMENT; -- -- Constraints for dumped tables -- -- -- Constraints for table `chartentry` -- ALTER TABLE `chartentry` ADD CONSTRAINT `chartentry_ibfk_1` FOREIGN KEY (`chart_id`) REFERENCES `chart` (`id`), ADD CONSTRAINT `chartentry_ibfk_3` FOREIGN KEY (`track_id`) REFERENCES `track` (`id`); -- -- Constraints for table `track` -- ALTER TABLE `track` ADD CONSTRAINT `track_ibfk_1` FOREIGN KEY (`artist_id`) REFERENCES `artist` (`id`); COMMIT;
// OCR: Sending image to GPT-4 Vision $response = openai_api_call([ 'model' => 'gpt-4-vision-preview', 'messages' => [...], 'max_tokens' => 2048 ]); // Artist merge (transactional) $pdo->beginTransaction(); // ...reassign tracks and chartentries... $pdo->commit(); // Video upload if (move_uploaded_file($_FILES['video']['tmp_name'], $targetPath)) { $stmt = $pdo->prepare('UPDATE track SET video = ? WHERE id = ?'); $stmt->execute([$relPath, $track_id]); }