Fixed youtube integration again.

editsong
Sander Vocke 5 years ago
parent 600a8ced5d
commit d1ad2a0bdf
  1. 271
      client/src/lib/integration/youtubemusic/YoutubeMusicWebScraper.tsx

@ -4,6 +4,9 @@ import StoreLinkIcon from '../../../components/common/StoreLinkIcon';
import { IntegrationWith } from '../../../api/api'; import { IntegrationWith } from '../../../api/api';
import { runInNewContext } from 'vm'; import { runInNewContext } from 'vm';
import { TextRotateVertical } from '@material-ui/icons'; import { TextRotateVertical } from '@material-ui/icons';
import AlbumWindow from '../../../components/windows/album/AlbumWindow';
import { isUndefined } from 'util';
let _ = require('lodash');
enum SearchType { enum SearchType {
Track = 'track', Track = 'track',
@ -66,161 +69,130 @@ export function extractInitialData(text: string): any | undefined {
return result; return result;
} }
export function parseTracks(initialData: any): IntegrationTrack[] { export function parseItems(initialData: any): {
tracks: IntegrationTrack[],
albums: IntegrationAlbum[],
artists: IntegrationArtist[],
} {
try { try {
var musicResponsiveListItemRenderers: any[] = []; var musicResponsiveListItemRenderers: { type: 'track' | 'album' | 'artist', content: any }[] = [];
// Scrape for any "Song"-type items.
initialData.contents.sectionListRenderer.contents.forEach((c: any) => {
if (c.musicShelfRenderer) {
c.musicShelfRenderer.contents.forEach((cc: any) => {
if (cc.musicResponsiveListItemRenderer &&
cc.musicResponsiveListItemRenderer.flexColumns &&
cc.musicResponsiveListItemRenderer.flexColumns[1]
.musicResponsiveListItemFlexColumnRenderer.text.runs[0].text === "Song") {
musicResponsiveListItemRenderers.push(cc.musicResponsiveListItemRenderer);
}
})
}
})
console.log("Found song itemrenderers:", musicResponsiveListItemRenderers); console.log('initialData', initialData)
return musicResponsiveListItemRenderers.map((s: any) => { let retval: any = {
// There are some options that were encountered in the field. tracks: [],
// let videoId: string | undefined = undefined; albums: [],
// if('doubleTapCommand' in s) s = s || s.doubleTapCommand.watchEndpoint.videoId; artists: [],
// if('playlistItemData' in s) s = s || s.playlistItemData.videoId; };
let runs: any[] = []; let parseTrack: (...args: any) => IntegrationTrack | undefined = (renderer: any, runs: any[]) => {
// Gather all 'runs' fields together from all columns. let track: IntegrationTrack = {};
s.flexColumns.forEach((column: any) => {
runs.push(...column.musicResponsiveListItemFlexColumnRenderer.text.runs);
})
// Find the runs that hold the title, artist or album.
let title: string | undefined = undefined;
let album: IntegrationAlbum = {};
let artist: IntegrationArtist = {};
let videoId: string | undefined = undefined;
runs.forEach((run: any) => { runs.forEach((run: any) => {
if ('navigationEndpoint' in run && let maybeVideoId = _.get(run, 'navigationEndpoint.watchEndpoint.videoId');
'watchEndpoint' in run.navigationEndpoint && if (maybeVideoId) {
'videoId' in run.navigationEndpoint.watchEndpoint) { track.url = `https://music.youtube.com/watch?v=${maybeVideoId}`;
videoId = run.navigationEndpoint.watchEndpoint.videoId; track.title = _.get(run, 'text');
title = run.text; } else if (_.get(run,
} else if ('navigationEndpoint' in run && 'navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType')
'browseEndpoint' in run.navigationEndpoint && === 'MUSIC_PAGE_TYPE_ALBUM') {
'browseEndpointContextSupportedConfigs' in run.navigationEndpoint.browseEndpoint && track.album = {
'browseEndpointContextMusicConfig' in run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs && url: `https://music.youtube.com/browse/${_.get(run, 'navigationEndpoint.browseEndpoint.browseId')}`,
'pageType' in run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig && name: _.get(run, 'text'),
run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType === 'MUSIC_PAGE_TYPE_ALBUM') {
album = {
url: `https://music.youtube.com/browse/${run.navigationEndpoint.browseEndpoint.browseId}`,
name: run.text,
} }
} else if ('navigationEndpoint' in run && } else if (_.get(run,
'browseEndpoint' in run.navigationEndpoint && 'navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType')
'browseEndpointContextSupportedConfigs' in run.navigationEndpoint.browseEndpoint && === 'MUSIC_PAGE_TYPE_ARTIST') {
'browseEndpointContextMusicConfig' in run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs && track.artist = {
'pageType' in run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig && url: `https://music.youtube.com/browse/${_.get(run, 'navigationEndpoint.browseEndpoint.browseId')}`,
run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType === 'MUSIC_PAGE_TYPE_ARTIST') { name: _.get(run, 'text'),
artist = {
url: `https://music.youtube.com/browse/${run.navigationEndpoint.browseEndpoint.browseId}`,
name: run.text,
} }
} }
}); })
if(album.name && artist.name) { if (track.artist && track.album) {
album.artist = artist; track.album.artist = track.artist;
} }
return {
title: title,
url: `https://music.youtube.com/watch?v=${videoId}`,
artist: artist,
album: album,
}
})
} catch (e) {
console.log("Error parsing tracks:", e.message);
return [];
}
}
export function parseArtists(initialData: any): IntegrationArtist[] { if (track.title && track.album && track.url && track.artist) {
try { return track;
var musicResponsiveListItemRenderers: any[] = [];
// Scrape for any "Artist"-type items.
initialData.contents.sectionListRenderer.contents.forEach((c: any) => {
if (c.musicShelfRenderer) {
c.musicShelfRenderer.contents.forEach((cc: any) => {
if (cc.musicResponsiveListItemRenderer &&
cc.musicResponsiveListItemRenderer.flexColumns &&
cc.musicResponsiveListItemRenderer.flexColumns[1]
.musicResponsiveListItemFlexColumnRenderer.text.runs[0].text === "Artist") {
musicResponsiveListItemRenderers.push(cc.musicResponsiveListItemRenderer);
}
})
} }
}) return undefined;
}
return musicResponsiveListItemRenderers.map((s: any) => { let parseAlbum: (...args: any) => IntegrationAlbum | undefined = (renderer: any, runs: any[]) => {
let browseId = s.navigationEndpoint.browseEndpoint.browseId; let album: IntegrationAlbum = {};
let columns = s.flexColumns;
if (columns[1].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text !== "Artist") { let maybeBrowseId = _.get(renderer, 'navigationEndpoint.browseEndpoint.browseId')
throw new Error('artist item doesnt match scraper expectation'); if (maybeBrowseId) {
album.url = `https://music.youtube.com/browse/${maybeBrowseId}`;
} }
let name = columns[0].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text; let maybeName = _.get(runs[0], 'text');
if (maybeName) {
return { album.name = maybeName;
name: name,
url: `https://music.youtube.com/browse/${browseId}`,
} }
})
} catch (e) {
console.log("Error parsing artists:", e.message);
return [];
}
}
export function parseAlbums(initialData: any): IntegrationTrack[] { runs.forEach((run: any) => {
try { if (_.get(run,
var musicResponsiveListItemRenderers: any[] = []; 'navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType')
=== 'MUSIC_PAGE_TYPE_ARTIST') {
// Scrape for any "Artist"-type items. album.artist = {
initialData.contents.sectionListRenderer.contents.forEach((c: any) => { url: `https://music.youtube.com/browse/${_.get(run, 'navigationEndpoint.browseEndpoint.browseId')}`,
if (c.musicShelfRenderer) { name: _.get(run, 'text'),
c.musicShelfRenderer.contents.forEach((cc: any) => {
if (cc.musicResponsiveListItemRenderer &&
cc.musicResponsiveListItemRenderer.flexColumns &&
["Album", "Single"].includes(cc.musicResponsiveListItemRenderer.flexColumns[1]
.musicResponsiveListItemFlexColumnRenderer.text.runs[0].text)) {
musicResponsiveListItemRenderers.push(cc.musicResponsiveListItemRenderer);
} }
}) }
} })
})
return musicResponsiveListItemRenderers.map((s: any) => { return album;
let browseId = s.navigationEndpoint.browseEndpoint.browseId; }
let columns = s.flexColumns;
if (!["Album", "Single"].includes(columns[1].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text)) { let parseArtist: (...args: any) => IntegrationArtist | undefined = (renderer: any, runs: any[]) => {
throw new Error('album item doesnt match scraper expectation'); let artist: IntegrationArtist = {};
}
let name = columns[0].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text;
return { let maybeBrowseId = _.get(renderer, 'navigationEndpoint.browseEndpoint.browseId')
name: name, if (maybeBrowseId) {
url: `https://music.youtube.com/browse/${browseId}`, artist.url = `https://music.youtube.com/browse/${maybeBrowseId}`;
} }
}) let maybeName = _.get(runs[0], 'text');
if (maybeName) {
artist.name = maybeName;
}
return artist;
}
// Scrape for songs, artists and albums.
_.get(initialData, 'contents.sectionListRenderer.contents', []).forEach((contents: any) => {
_.get(contents, 'musicShelfRenderer.contents', []).forEach((_contents: any) => {
let runs = _.get(_contents, 'musicResponsiveListItemRenderer.flexColumns').map((column: any) => {
return _.get(column, 'musicResponsiveListItemFlexColumnRenderer.text.runs');
}).flat();
switch (_.get(_contents, 'musicResponsiveListItemRenderer.flexColumns[1].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text', '')) {
case "Song": {
retval.tracks.push(parseTrack(_.get(_contents, 'musicResponsiveListItemRenderer'), runs));
break;
}
case "Artist": {
retval.artists.push(parseArtist(_.get(_contents, 'musicResponsiveListItemRenderer'), runs));
break;
}
case "Album":
case "Single": {
retval.albums.push(parseAlbum(_.get(_contents, 'musicResponsiveListItemRenderer'), runs));
break;
}
default: {
break;
}
}
})
});
return retval;
} catch (e) { } catch (e) {
console.log("Error parsing albums:", e.message); console.log("Error parsing items:", e.message);
return []; return { tracks: [], albums: [], artists: [] }
} }
} }
@ -250,16 +222,41 @@ export default class YoutubeMusicWebScraper extends Integration {
} }
async test(testParams: {}) { async test(testParams: {}) {
const response = await fetch( // Test songs
let response = await fetch(
(process.env.REACT_APP_BACKEND || "") + (process.env.REACT_APP_BACKEND || "") +
`/integrations/${this.integrationId}/search?q=${encodeURIComponent('No One Knows Queens Of The Stone Age')}`); `/integrations/${this.integrationId}/search?q=${encodeURIComponent('No One Knows Queens Of The Stone Age')}`);
let text = await response.text(); let text = await response.text();
let songs = parseTracks(extractInitialData(text)); let results: any = parseItems(extractInitialData(text)).tracks;
if (!Array.isArray(songs) || songs.length === 0 || songs[0].title !== "No One Knows") { if (!Array.isArray(results) || results.length === 0 || !results[0] || results[0].title !== "No One Knows") {
throw new Error("Test failed; No One Knows was not correctly identified."); throw new Error("Test failed; No One Knows was not correctly identified.");
} }
// Test albums
response = await fetch(
(process.env.REACT_APP_BACKEND || "") +
`/integrations/${this.integrationId}/search?q=${encodeURIComponent('Songs For The Deaf Queens Of The Stone Age')}`);
text = await response.text();
results = parseItems(extractInitialData(text)).albums;
if (!Array.isArray(results) || results.length === 0 || !results[0] || results[0].name !== "Songs For The Deaf") {
throw new Error("Test failed; Songs For The Deaf was not correctly identified.");
}
// Test artists
response = await fetch(
(process.env.REACT_APP_BACKEND || "") +
`/integrations/${this.integrationId}/search?q=${encodeURIComponent('Queens Of The Stone Age')}`);
text = await response.text();
results = parseItems(extractInitialData(text)).artists;
if (!Array.isArray(results) || results.length === 0 || !results[0] || results[0].name !== "Queens Of The Stone Age") {
throw new Error("Test failed; Queens Of The Stone Age was not correctly identified.");
}
} }
async searchTrack(query: string, limit: number): Promise<IntegrationTrack[]> { async searchTrack(query: string, limit: number): Promise<IntegrationTrack[]> {
@ -268,7 +265,7 @@ export default class YoutubeMusicWebScraper extends Integration {
`/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`); `/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`);
let text = await response.text(); let text = await response.text();
return parseTracks(extractInitialData(text)); return parseItems(extractInitialData(text)).tracks;
} }
async searchAlbum(query: string, limit: number): Promise<IntegrationAlbum[]> { async searchAlbum(query: string, limit: number): Promise<IntegrationAlbum[]> {
const response = await fetch( const response = await fetch(
@ -276,7 +273,7 @@ export default class YoutubeMusicWebScraper extends Integration {
`/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`); `/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`);
let text = await response.text(); let text = await response.text();
return parseAlbums(extractInitialData(text)); return parseItems(extractInitialData(text)).albums;
} }
async searchArtist(query: string, limit: number): Promise<IntegrationArtist[]> { async searchArtist(query: string, limit: number): Promise<IntegrationArtist[]> {
const response = await fetch( const response = await fetch(
@ -284,6 +281,6 @@ export default class YoutubeMusicWebScraper extends Integration {
`/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`); `/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`);
let text = await response.text(); let text = await response.text();
return parseArtists(extractInitialData(text)); return parseItems(extractInitialData(text)).artists;
} }
} }
Loading…
Cancel
Save