Add a YouTube Music web scraper.

editsong
Sander Vocke 5 years ago
parent 9c661d67ee
commit cd9ee9bbb1
  1. 9
      client/src/api.ts
  2. 16
      client/src/assets/youtubemusic_icon.svg
  3. 10
      client/src/components/common/StoreLinkIcon.tsx
  4. 34
      client/src/components/windows/settings/IntegrationSettings.tsx
  5. 2
      client/src/components/windows/song/EditSongDialog.tsx
  6. 15
      client/src/lib/integration/useIntegrations.tsx
  7. 203
      client/src/lib/integration/youtubemusic/YoutubeMusicWebScraper.tsx
  8. 15
      server/integrations/integrations.ts

@ -360,18 +360,21 @@ export const LogoutEndpoint = "/logout";
export enum IntegrationType {
SpotifyClientCredentials = "SpotifyClientCredentials",
YoutubeWebScraper = "YoutubeWebScraper",
}
export interface SpotifyClientCredentialsDetails {
clientId: string,
}
export interface SpotifyClientCredentialsSecretDetails {
clientSecret: string,
}
export type IntegrationDetails = SpotifyClientCredentialsDetails;
export type IntegrationSecretDetails = SpotifyClientCredentialsSecretDetails;
export interface YoutubeMusicWebScraperDetails {}
export interface YoutubeMusicWebScraperSecretDetails {}
export type IntegrationDetails = SpotifyClientCredentialsDetails | YoutubeMusicWebScraperDetails;
export type IntegrationSecretDetails = SpotifyClientCredentialsSecretDetails | YoutubeMusicWebScraperSecretDetails;
// Create a new integration (POST).
export const CreateIntegrationEndpoint = '/integration';

@ -0,0 +1,16 @@
<svg version="1.1" id="Layer_1"
xmlns="http://www.w3.org/2000/svg" x="0px" y="0px" viewBox="0 0 176 176"
enable-background="new 0 0 176 176">
<metadata>
<sfw xmlns="&ns_sfw;">
<slices></slices>
<sliceSourceBounds bottomLeftOrigin="true" height="176" width="176" x="8" y="-184"></sliceSourceBounds>
</sfw>
</metadata>
<g id="XMLID_167_">
<circle id="XMLID_791_" fill="#FF0000" cx="88" cy="88" r="88"/>
<path id="XMLID_42_" fill="#FFFFFF" d="M88,46c23.1,0,42,18.8,42,42s-18.8,42-42,42s-42-18.8-42-42S64.9,46,88,46 M88,42
c-25.4,0-46,20.6-46,46s20.6,46,46,46s46-20.6,46-46S113.4,42,88,42L88,42z"/>
<polygon id="XMLID_274_" fill="#FFFFFF" points="72,111 111,87 72,65 "/>
</g>
</svg>

After

Width:  |  Height:  |  Size: 696 B

@ -1,10 +1,12 @@
import React from 'react';
import { ReactComponent as GPMIcon } from '../../assets/googleplaymusic_icon.svg';
import { ReactComponent as SpotifyIcon } from '../../assets/spotify_icon.svg';
import { ReactComponent as YoutubeMusicIcon } from '../../assets/youtubemusic_icon.svg';
export enum ExternalStore {
GooglePlayMusic = "Google Play Music",
Spotify = "Spotify",
YoutubeMusic = "Youtube Music",
}
export interface IProps {
@ -16,6 +18,8 @@ export function whichStore(url: string) {
return ExternalStore.GooglePlayMusic;
} else if (url.includes('spotify.com')) {
return ExternalStore.Spotify;
} else if (url.includes('music.youtube.com')) {
return ExternalStore.YoutubeMusic;
}
return undefined;
}
@ -28,9 +32,11 @@ export default function StoreLinkIcon(props: any) {
switch (whichStore) {
case ExternalStore.GooglePlayMusic:
return <GPMIcon {...restProps} style={realStyle}/>;
return <GPMIcon {...restProps} style={realStyle} />;
case ExternalStore.Spotify:
return <SpotifyIcon {...restProps} style={realStyle}/>;
return <SpotifyIcon {...restProps} style={realStyle} />;
case ExternalStore.YoutubeMusic:
return <YoutubeMusicIcon {...restProps} style={realStyle} />;
default:
throw new Error("Unknown external store: " + whichStore)
}

@ -84,7 +84,16 @@ function EditIntegration(props: {
})}
</Box>
<Typography>Spotify (using Client Credentials)</Typography>
</Box>
</Box>,
[serverApi.IntegrationType.YoutubeWebScraper]:
<Box display="flex" alignItems="center">
<Box mr={1}>
{new IntegrationClasses[serverApi.IntegrationType.YoutubeWebScraper](-1).getIcon({
style: { height: '40px', width: '40px' }
})}
</Box>
<Typography>Youtube Music (using experimental web scraper)</Typography>
</Box>,
}
let IntegrationDescription: Record<any, any> = {
[serverApi.IntegrationType.SpotifyClientCredentials]:
@ -95,7 +104,15 @@ function EditIntegration(props: {
Please see the Spotify API documentation on how to generate a client ID
and client secret. Once set, you will only be able to overwrite the secret
here, not read it.
</Typography>
</Typography>,
[serverApi.IntegrationType.YoutubeWebScraper]:
<Typography>
This integration allows using the public Youtube Music search page to scrape
for music metadata. <br />
Because it relies on reverse-engineering of a web page that may change in the
future, this is considered to be experimental and unstable. However, the music links acquired
using this method are expected to remain reasonably stable.
</Typography>,
}
return <Card variant="outlined">
@ -122,8 +139,9 @@ function EditIntegration(props: {
</Box>
{props.integration.type === serverApi.IntegrationType.SpotifyClientCredentials &&
<EditSpotifyClientCredentialsDetails
clientId={props.integration.details.clientId}
clientSecret={props.integration.secretDetails ?
clientId={'clientId' in props.integration.details &&
props.integration.details.clientId || ""}
clientSecret={props.integration.secretDetails && 'clientSecret' in props.integration.secretDetails ?
props.integration.secretDetails.clientSecret :
(props.isNew ? "" : null)}
editing={props.editing || false}
@ -207,7 +225,13 @@ function AddIntegrationMenu(props: {
props.onAdd && props.onAdd(serverApi.IntegrationType.SpotifyClientCredentials);
props.onClose && props.onClose();
}}
>Spotify</MenuItem>
>Spotify via Client Credentials</MenuItem>
<MenuItem
onClick={() => {
props.onAdd && props.onAdd(serverApi.IntegrationType.YoutubeWebScraper);
props.onClose && props.onClose();
}}
>Youtube Music Web Scraper</MenuItem>
</Menu>
}

@ -23,7 +23,7 @@ export function ProvideLinksWidget(props: {
props.providers.length > 0 ? 0 : undefined
);
let [query, setQuery] = useState<string>(
`${props.metadata.title} ${props.metadata.artists && props.metadata.artists[0].name}`
`${props.metadata.title}${props.metadata.artists && ` ${props.metadata.artists[0].name}`}${props.metadata.albums && ` ${props.metadata.albums[0].name}`}`
)
let [results, setResults] = useState<IntegrationSong[]>([]);

@ -5,6 +5,7 @@ import SpotifyClientCreds from "./spotify/SpotifyClientCreds";
import * as backend from "../backend/integrations";
import { handleNotLoggedIn, NotLoggedInError } from "../backend/request";
import { useAuth } from "../useAuth";
import YoutubeMusicWebScraper from "./youtubemusic/YoutubeMusicWebScraper";
export type IntegrationState = {
id: number,
@ -27,6 +28,7 @@ export interface Integrations {
export const IntegrationClasses: Record<any, any> = {
[serverApi.IntegrationType.SpotifyClientCredentials]: SpotifyClientCreds,
[serverApi.IntegrationType.YoutubeWebScraper]: YoutubeMusicWebScraper,
}
export function makeDefaultIntegrationProperties(type: serverApi.IntegrationType):
@ -34,12 +36,20 @@ export function makeDefaultIntegrationProperties(type: serverApi.IntegrationType
switch(type) {
case serverApi.IntegrationType.SpotifyClientCredentials: {
return {
name: "Spotify",
name: "Spotify App",
type: type,
details: { clientId: "" },
secretDetails: { clientSecret: "" },
}
}
case serverApi.IntegrationType.YoutubeWebScraper: {
return {
name: "Youtube Music Web Scraper",
type: type,
details: {},
secretDetails: {},
}
}
default: {
throw new Error("Unimplemented default integration.")
}
@ -51,6 +61,9 @@ export function makeIntegration(p: serverApi.CreateIntegrationRequest, id: numbe
case serverApi.IntegrationType.SpotifyClientCredentials: {
return new SpotifyClientCreds(id);
}
case serverApi.IntegrationType.YoutubeWebScraper: {
return new YoutubeMusicWebScraper(id);
}
default: {
throw new Error("Unimplemented integration type.")
}

@ -0,0 +1,203 @@
import React from 'react';
import Integration, { IntegrationFeature, IntegrationAlbum, IntegrationArtist, IntegrationSong } from '../Integration';
import StoreLinkIcon, { ExternalStore } from '../../../components/common/StoreLinkIcon';
enum SearchType {
Song = 'track',
Artist = 'artist',
Album = 'album',
};
export function extractInitialData(text: string): any | undefined {
// At the time of writing this, the scraper is trying to capture from the following block:
//
// initialData.push({
// path: ...,
// params: {"query":"something"},
// data: "THIS",
// });
//
// the THIS part.
// Get the whole line containing the data part.
let pattern = /initialData\.push\({[\n\r\s]*path:.*[\n\r\s]+params:\s*{\s*['"]query['"].*[\n\r\s]+data:\s*['"](.*)['"]\s*[\n\r]/
let m = text.match(pattern);
let dataline = Array.isArray(m) && m.length >= 2 ? m[1] : undefined;
if (!dataline) { return undefined; }
// Now parse the data line.
let dataline_clean = dataline.replace(/\\"/g, '"').replace(/\\\\"/g, '\\"')
console.log(dataline);
console.log(dataline_clean);
let json = JSON.parse(dataline_clean);
return json;
}
export function parseSongs(initialData: any): IntegrationSong[] {
try {
var songMusicResponsiveListItemRenderers: any[] = [];
// Scrape for any "Song"-type items.
initialData.contents.sectionListRenderer.contents.forEach((c: any) => {
if (c.musicShelfRenderer) {
c.musicShelfRenderer.contents.forEach((cc: any) => {
if (cc.musicResponsiveListItemRenderer &&
cc.musicResponsiveListItemRenderer.flexColumns &&
cc.musicResponsiveListItemRenderer.flexColumns[1]
.musicResponsiveListItemFlexColumnRenderer.text.runs[0].text === "Song") {
songMusicResponsiveListItemRenderers.push(cc.musicResponsiveListItemRenderer);
}
})
}
})
return songMusicResponsiveListItemRenderers.map((s: any) => {
let videoId = s.doubleTapCommand.watchEndpoint.videoId;
let columns = s.flexColumns;
if (columns[1].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text !== "Song") {
throw new Error('song item doesnt match scraper expectation');
}
let title = columns[0].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text;
let artists = columns[2].musicResponsiveListItemFlexColumnRenderer.text.runs.filter((run: any) => {
return 'navigationEndpoint' in run;
}).map((run: any) => {
let id = run.navigationEndpoint.browseEndpoint.browseId;
return {
url: `https://music.youtube.com/browse/${id}`,
name: run.text,
}
});
let albums = columns[3].musicResponsiveListItemFlexColumnRenderer.text.runs.filter((run: any) => {
return 'navigationEndpoint' in run;
}).map((run: any) => {
let id = run.navigationEndpoint.browseEndpoint.browseId;
return {
url: `https://music.youtube.com/browse/${id}`,
name: run.text,
artist: artists[0],
}
});
return {
title: title,
url: `https://music.youtube.com/watch?v=${videoId}`,
artist: artists[0],
album: albums[0],
}
})
} catch (e) {
console.log("Error parsing songs:", e.message);
return [];
}
}
export default class YoutubeMusicWebScraper extends Integration {
integrationId: number;
constructor(integrationId: number) {
super(integrationId);
this.integrationId = integrationId;
}
getFeatures(): IntegrationFeature[] {
return [
IntegrationFeature.Test,
IntegrationFeature.SearchSong,
IntegrationFeature.SearchAlbum,
IntegrationFeature.SearchArtist,
]
}
getIcon(props: any) {
return <StoreLinkIcon whichStore={ExternalStore.YoutubeMusic} {...props} />
}
providesStoreLink() {
return ExternalStore.YoutubeMusic;
}
async test(testParams: {}) {
const response = await fetch(
(process.env.REACT_APP_BACKEND || "") +
`/integrations/${this.integrationId}/search?q=${encodeURIComponent('No One Knows Queens Of The Stone Age')}`);
let text = await response.text();
let songs = parseSongs(extractInitialData(text));
console.log("Found songs", songs);
if (!Array.isArray(songs) || songs.length === 0 || songs[0].title !== "No One Knows") {
throw new Error("Test failed; No One Knows was not correctly identified.");
}
}
async searchSong(query: string, limit: number): Promise<IntegrationSong[]> {
const response = await fetch(
(process.env.REACT_APP_BACKEND || "") +
`/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`);
let text = await response.text();
return parseSongs(extractInitialData(text));
}
async searchAlbum(query: string, limit: number): Promise<IntegrationAlbum[]> { return []; }
async searchArtist(query: string, limit: number): Promise<IntegrationArtist[]> { return []; }
async search(query: string, type: SearchType, limit: number):
Promise<IntegrationSong[] | IntegrationAlbum[] | IntegrationArtist[]> {
return [];
// const response = await fetch(
// (process.env.REACT_APP_BACKEND || "") +
// `/integrations/${this.integrationId}/v1/search?q=${encodeURIComponent(query)}&type=${type}&limit=${limit}`);
// if (!response.ok) {
// throw new Error("Spotify Client Credentials search failed: " + JSON.stringify(response));
// }
// let json = await response.json();
// console.log("Response:", json);
// switch(type) {
// case SearchType.Song: {
// return json.tracks.items.map((r: any): IntegrationSong => {
// return {
// title: r.name,
// url: r.external_urls.spotify,
// artist: {
// name: r.artists && r.artists[0].name,
// url: r.artists && r.artists[0].external_urls.spotify,
// },
// album: {
// name: r.album && r.album.name,
// url: r.album && r.album.external_urls.spotify,
// }
// }
// })
// }
// case SearchType.Artist: {
// return json.artists.items.map((r: any): IntegrationArtist => {
// return {
// name: r.name,
// url: r.external_urls.spotify,
// }
// })
// }
// case SearchType.Album: {
// return json.albums.items.map((r: any): IntegrationAlbum => {
// return {
// name: r.name,
// url: r.external_urls.spotify,
// artist: {
// name: r.artists[0].name,
// url: r.artists[0].external_urls.spotify,
// },
// }
// })
// }
}
}

@ -47,6 +47,17 @@ export function createIntegrations(knex: Knex) {
}
});
let proxyYoutubeMusic = createProxyMiddleware({
target: 'https://music.youtube.com/',
changeOrigin: true,
logLevel: 'debug',
pathRewrite: (path: string, req: any) => {
// Remove e.g. "/integrations/5"
console.log("Rewrite URL:", path);
return path.replace(/^\/integrations\/[0-9]+/, '');
}
})
// In the first layer, retrieve integration details and save details
// in the request.
return async (req: any, res: any, next: any) => {
@ -82,6 +93,10 @@ export function createIntegrations(knex: Knex) {
req.headers["Authorization"] = "Bearer " + req._access_token;
return proxySpotifyCC(req, res, next);
}
case IntegrationType.YoutubeWebScraper: {
console.log("Integration: ", req._integration)
return proxyYoutubeMusic(req, res, next);
}
default: {
res.status(500).send({ reason: "Unsupported integration type " + req._integration.type })
}

Loading…
Cancel
Save