diff --git a/client/src/lib/integration/youtubemusic/YoutubeMusicWebScraper.tsx b/client/src/lib/integration/youtubemusic/YoutubeMusicWebScraper.tsx index 4184ddb..3decd46 100644 --- a/client/src/lib/integration/youtubemusic/YoutubeMusicWebScraper.tsx +++ b/client/src/lib/integration/youtubemusic/YoutubeMusicWebScraper.tsx @@ -4,6 +4,9 @@ import StoreLinkIcon from '../../../components/common/StoreLinkIcon'; import { IntegrationWith } from '../../../api/api'; import { runInNewContext } from 'vm'; import { TextRotateVertical } from '@material-ui/icons'; +import AlbumWindow from '../../../components/windows/album/AlbumWindow'; +import { isUndefined } from 'util'; +let _ = require('lodash'); enum SearchType { Track = 'track', @@ -66,161 +69,130 @@ export function extractInitialData(text: string): any | undefined { return result; } -export function parseTracks(initialData: any): IntegrationTrack[] { +export function parseItems(initialData: any): { + tracks: IntegrationTrack[], + albums: IntegrationAlbum[], + artists: IntegrationArtist[], +} { try { - var musicResponsiveListItemRenderers: any[] = []; - - // Scrape for any "Song"-type items. - initialData.contents.sectionListRenderer.contents.forEach((c: any) => { - if (c.musicShelfRenderer) { - c.musicShelfRenderer.contents.forEach((cc: any) => { - if (cc.musicResponsiveListItemRenderer && - cc.musicResponsiveListItemRenderer.flexColumns && - cc.musicResponsiveListItemRenderer.flexColumns[1] - .musicResponsiveListItemFlexColumnRenderer.text.runs[0].text === "Song") { - musicResponsiveListItemRenderers.push(cc.musicResponsiveListItemRenderer); - } - }) - } - }) + var musicResponsiveListItemRenderers: { type: 'track' | 'album' | 'artist', content: any }[] = []; - console.log("Found song itemrenderers:", musicResponsiveListItemRenderers); + console.log('initialData', initialData) - return musicResponsiveListItemRenderers.map((s: any) => { - // There are some options that were encountered in the field. - // let videoId: string | undefined = undefined; - // if('doubleTapCommand' in s) s = s || s.doubleTapCommand.watchEndpoint.videoId; - // if('playlistItemData' in s) s = s || s.playlistItemData.videoId; + let retval: any = { + tracks: [], + albums: [], + artists: [], + }; - let runs: any[] = []; - // Gather all 'runs' fields together from all columns. - s.flexColumns.forEach((column: any) => { - runs.push(...column.musicResponsiveListItemFlexColumnRenderer.text.runs); - }) + let parseTrack: (...args: any) => IntegrationTrack | undefined = (renderer: any, runs: any[]) => { + let track: IntegrationTrack = {}; - // Find the runs that hold the title, artist or album. - let title: string | undefined = undefined; - let album: IntegrationAlbum = {}; - let artist: IntegrationArtist = {}; - let videoId: string | undefined = undefined; runs.forEach((run: any) => { - if ('navigationEndpoint' in run && - 'watchEndpoint' in run.navigationEndpoint && - 'videoId' in run.navigationEndpoint.watchEndpoint) { - videoId = run.navigationEndpoint.watchEndpoint.videoId; - title = run.text; - } else if ('navigationEndpoint' in run && - 'browseEndpoint' in run.navigationEndpoint && - 'browseEndpointContextSupportedConfigs' in run.navigationEndpoint.browseEndpoint && - 'browseEndpointContextMusicConfig' in run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs && - 'pageType' in run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig && - run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType === 'MUSIC_PAGE_TYPE_ALBUM') { - album = { - url: `https://music.youtube.com/browse/${run.navigationEndpoint.browseEndpoint.browseId}`, - name: run.text, + let maybeVideoId = _.get(run, 'navigationEndpoint.watchEndpoint.videoId'); + if (maybeVideoId) { + track.url = `https://music.youtube.com/watch?v=${maybeVideoId}`; + track.title = _.get(run, 'text'); + } else if (_.get(run, + 'navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType') + === 'MUSIC_PAGE_TYPE_ALBUM') { + track.album = { + url: `https://music.youtube.com/browse/${_.get(run, 'navigationEndpoint.browseEndpoint.browseId')}`, + name: _.get(run, 'text'), } - } else if ('navigationEndpoint' in run && - 'browseEndpoint' in run.navigationEndpoint && - 'browseEndpointContextSupportedConfigs' in run.navigationEndpoint.browseEndpoint && - 'browseEndpointContextMusicConfig' in run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs && - 'pageType' in run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig && - run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType === 'MUSIC_PAGE_TYPE_ARTIST') { - artist = { - url: `https://music.youtube.com/browse/${run.navigationEndpoint.browseEndpoint.browseId}`, - name: run.text, + } else if (_.get(run, + 'navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType') + === 'MUSIC_PAGE_TYPE_ARTIST') { + track.artist = { + url: `https://music.youtube.com/browse/${_.get(run, 'navigationEndpoint.browseEndpoint.browseId')}`, + name: _.get(run, 'text'), } } - }); + }) - if(album.name && artist.name) { - album.artist = artist; + if (track.artist && track.album) { + track.album.artist = track.artist; } - return { - title: title, - url: `https://music.youtube.com/watch?v=${videoId}`, - artist: artist, - album: album, - } - }) - } catch (e) { - console.log("Error parsing tracks:", e.message); - return []; - } -} -export function parseArtists(initialData: any): IntegrationArtist[] { - try { - var musicResponsiveListItemRenderers: any[] = []; - - // Scrape for any "Artist"-type items. - initialData.contents.sectionListRenderer.contents.forEach((c: any) => { - if (c.musicShelfRenderer) { - c.musicShelfRenderer.contents.forEach((cc: any) => { - if (cc.musicResponsiveListItemRenderer && - cc.musicResponsiveListItemRenderer.flexColumns && - cc.musicResponsiveListItemRenderer.flexColumns[1] - .musicResponsiveListItemFlexColumnRenderer.text.runs[0].text === "Artist") { - musicResponsiveListItemRenderers.push(cc.musicResponsiveListItemRenderer); - } - }) + if (track.title && track.album && track.url && track.artist) { + return track; } - }) + return undefined; + } - return musicResponsiveListItemRenderers.map((s: any) => { - let browseId = s.navigationEndpoint.browseEndpoint.browseId; - let columns = s.flexColumns; + let parseAlbum: (...args: any) => IntegrationAlbum | undefined = (renderer: any, runs: any[]) => { + let album: IntegrationAlbum = {}; - if (columns[1].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text !== "Artist") { - throw new Error('artist item doesnt match scraper expectation'); + let maybeBrowseId = _.get(renderer, 'navigationEndpoint.browseEndpoint.browseId') + if (maybeBrowseId) { + album.url = `https://music.youtube.com/browse/${maybeBrowseId}`; } - let name = columns[0].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text; - - return { - name: name, - url: `https://music.youtube.com/browse/${browseId}`, + let maybeName = _.get(runs[0], 'text'); + if (maybeName) { + album.name = maybeName; } - }) - } catch (e) { - console.log("Error parsing artists:", e.message); - return []; - } -} -export function parseAlbums(initialData: any): IntegrationTrack[] { - try { - var musicResponsiveListItemRenderers: any[] = []; - - // Scrape for any "Artist"-type items. - initialData.contents.sectionListRenderer.contents.forEach((c: any) => { - if (c.musicShelfRenderer) { - c.musicShelfRenderer.contents.forEach((cc: any) => { - if (cc.musicResponsiveListItemRenderer && - cc.musicResponsiveListItemRenderer.flexColumns && - ["Album", "Single"].includes(cc.musicResponsiveListItemRenderer.flexColumns[1] - .musicResponsiveListItemFlexColumnRenderer.text.runs[0].text)) { - musicResponsiveListItemRenderers.push(cc.musicResponsiveListItemRenderer); + runs.forEach((run: any) => { + if (_.get(run, + 'navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType') + === 'MUSIC_PAGE_TYPE_ARTIST') { + album.artist = { + url: `https://music.youtube.com/browse/${_.get(run, 'navigationEndpoint.browseEndpoint.browseId')}`, + name: _.get(run, 'text'), } - }) - } - }) + } + }) - return musicResponsiveListItemRenderers.map((s: any) => { - let browseId = s.navigationEndpoint.browseEndpoint.browseId; - let columns = s.flexColumns; + return album; + } - if (!["Album", "Single"].includes(columns[1].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text)) { - throw new Error('album item doesnt match scraper expectation'); - } - let name = columns[0].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text; + let parseArtist: (...args: any) => IntegrationArtist | undefined = (renderer: any, runs: any[]) => { + let artist: IntegrationArtist = {}; - return { - name: name, - url: `https://music.youtube.com/browse/${browseId}`, + let maybeBrowseId = _.get(renderer, 'navigationEndpoint.browseEndpoint.browseId') + if (maybeBrowseId) { + artist.url = `https://music.youtube.com/browse/${maybeBrowseId}`; } - }) + let maybeName = _.get(runs[0], 'text'); + if (maybeName) { + artist.name = maybeName; + } + + return artist; + } + + // Scrape for songs, artists and albums. + _.get(initialData, 'contents.sectionListRenderer.contents', []).forEach((contents: any) => { + _.get(contents, 'musicShelfRenderer.contents', []).forEach((_contents: any) => { + let runs = _.get(_contents, 'musicResponsiveListItemRenderer.flexColumns').map((column: any) => { + return _.get(column, 'musicResponsiveListItemFlexColumnRenderer.text.runs'); + }).flat(); + + switch (_.get(_contents, 'musicResponsiveListItemRenderer.flexColumns[1].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text', '')) { + case "Song": { + retval.tracks.push(parseTrack(_.get(_contents, 'musicResponsiveListItemRenderer'), runs)); + break; + } + case "Artist": { + retval.artists.push(parseArtist(_.get(_contents, 'musicResponsiveListItemRenderer'), runs)); + break; + } + case "Album": + case "Single": { + retval.albums.push(parseAlbum(_.get(_contents, 'musicResponsiveListItemRenderer'), runs)); + break; + } + default: { + break; + } + } + }) + }); + + return retval; } catch (e) { - console.log("Error parsing albums:", e.message); - return []; + console.log("Error parsing items:", e.message); + return { tracks: [], albums: [], artists: [] } } } @@ -250,16 +222,41 @@ export default class YoutubeMusicWebScraper extends Integration { } async test(testParams: {}) { - const response = await fetch( + // Test songs + let response = await fetch( (process.env.REACT_APP_BACKEND || "") + `/integrations/${this.integrationId}/search?q=${encodeURIComponent('No One Knows Queens Of The Stone Age')}`); let text = await response.text(); - let songs = parseTracks(extractInitialData(text)); + let results: any = parseItems(extractInitialData(text)).tracks; - if (!Array.isArray(songs) || songs.length === 0 || songs[0].title !== "No One Knows") { + if (!Array.isArray(results) || results.length === 0 || !results[0] || results[0].title !== "No One Knows") { throw new Error("Test failed; No One Knows was not correctly identified."); } + + // Test albums + response = await fetch( + (process.env.REACT_APP_BACKEND || "") + + `/integrations/${this.integrationId}/search?q=${encodeURIComponent('Songs For The Deaf Queens Of The Stone Age')}`); + + text = await response.text(); + results = parseItems(extractInitialData(text)).albums; + + if (!Array.isArray(results) || results.length === 0 || !results[0] || results[0].name !== "Songs For The Deaf") { + throw new Error("Test failed; Songs For The Deaf was not correctly identified."); + } + + // Test artists + response = await fetch( + (process.env.REACT_APP_BACKEND || "") + + `/integrations/${this.integrationId}/search?q=${encodeURIComponent('Queens Of The Stone Age')}`); + + text = await response.text(); + results = parseItems(extractInitialData(text)).artists; + + if (!Array.isArray(results) || results.length === 0 || !results[0] || results[0].name !== "Queens Of The Stone Age") { + throw new Error("Test failed; Queens Of The Stone Age was not correctly identified."); + } } async searchTrack(query: string, limit: number): Promise { @@ -268,7 +265,7 @@ export default class YoutubeMusicWebScraper extends Integration { `/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`); let text = await response.text(); - return parseTracks(extractInitialData(text)); + return parseItems(extractInitialData(text)).tracks; } async searchAlbum(query: string, limit: number): Promise { const response = await fetch( @@ -276,7 +273,7 @@ export default class YoutubeMusicWebScraper extends Integration { `/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`); let text = await response.text(); - return parseAlbums(extractInitialData(text)); + return parseItems(extractInitialData(text)).albums; } async searchArtist(query: string, limit: number): Promise { const response = await fetch( @@ -284,6 +281,6 @@ export default class YoutubeMusicWebScraper extends Integration { `/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`); let text = await response.text(); - return parseArtists(extractInitialData(text)); + return parseItems(extractInitialData(text)).artists; } } \ No newline at end of file